use lua_types::error::LuaError;
use lua_types::value::LuaValue;
use lua_types::closure::LuaClosure;
use lua_types::{LuaType, LuaStatus};
use crate::state_stub::{LuaState, LuaStateStubExt as _, lua_CFunction, upvalue_index, CompareOp, LuaDebug};
const MAX_UNICODE: u32 = 0x10_FFFF;
const MAX_UTF: u32 = 0x7FFF_FFFF;
type UtfInt = u32;
const UTF8_PATT: &[u8] = b"[\x00-\x7F\xC2-\xFD][\x80-\xBF]*";
fn pos_relat(pos: i64, len: usize) -> i64 {
if pos >= 0 {
pos
} else {
let abs_pos = pos.unsigned_abs() as u64;
if abs_pos > len as u64 {
0
} else {
len as i64 + pos + 1
}
}
}
#[inline]
fn is_cont(c: u8) -> bool {
(c & 0xC0) == 0x80
}
#[inline]
fn is_cont_at(s: &[u8], pos: i64) -> bool {
if pos < 0 {
return false;
}
s.get(pos as usize).map_or(false, |&b| is_cont(b))
}
fn utf8_decode(s: &[u8], strict: bool) -> Option<(&[u8], UtfInt)> {
const LIMITS: [UtfInt; 6] = [u32::MAX, 0x80, 0x800, 0x10000, 0x200000, 0x4000000];
if s.is_empty() {
return None;
}
let mut c = s[0] as u32;
let res: UtfInt;
let advance: usize;
if c < 0x80 {
res = c;
advance = 1;
} else {
let mut count: usize = 0;
let mut r: UtfInt = 0;
while c & 0x40 != 0 {
count += 1;
if count >= s.len() {
return None; }
let cc = s[count] as u32;
if (cc & 0xC0) != 0x80 {
return None; }
r = (r << 6) | (cc & 0x3F);
c <<= 1;
}
r |= (c & 0x7F) << (count as u32 * 5);
if count > 5 || r > MAX_UTF || r < LIMITS[count] {
return None; }
res = r;
advance = count + 1;
if advance > s.len() {
return None;
}
}
if strict && (res > MAX_UNICODE || (0xD800 <= res && res <= 0xDFFF)) {
return None; }
Some((&s[advance..], res))
}
fn encode_utf8_codepoint(code: u32) -> Vec<u8> {
debug_assert!(code <= MAX_UTF);
if code < 0x80 {
return vec![code as u8];
}
let mut x = code;
let mut mfb: u32 = 0x3F;
let mut bytes_rev: Vec<u8> = Vec::with_capacity(6);
loop {
bytes_rev.push(0x80 | (x & 0x3F) as u8);
x >>= 6;
mfb >>= 1;
if x <= mfb {
break;
}
}
let leading = ((!mfb).wrapping_shl(1) as u8) | (x as u8);
let mut result = Vec::with_capacity(bytes_rev.len() + 1);
result.push(leading);
for &b in bytes_rev.iter().rev() {
result.push(b);
}
result
}
fn utf_len(state: &mut LuaState) -> Result<usize, LuaError> {
let s: Vec<u8> = state.check_arg_string(1)?.to_vec();
let len = s.len();
let raw_posi: i64 = state.opt_arg_integer(2, 1)?;
let mut posi: i64 = pos_relat(raw_posi, len);
let raw_posj: i64 = state.opt_arg_integer(3, -1)?;
let mut posj: i64 = pos_relat(raw_posj, len);
let lax: bool = state.to_boolean(4);
if posi < 1 {
return Err(LuaError::arg_error(2, "initial position out of bounds"));
}
posi -= 1; if posi > len as i64 {
return Err(LuaError::arg_error(2, "initial position out of bounds"));
}
posj -= 1; if posj >= len as i64 {
return Err(LuaError::arg_error(3, "final position out of bounds"));
}
let mut n: i64 = 0;
while posi <= posj {
match utf8_decode(&s[posi as usize..], !lax) {
None => {
state.push(LuaValue::Nil); state.push(LuaValue::Int(posi + 1)); return Ok(2);
}
Some((remaining, _)) => {
posi = (len - remaining.len()) as i64;
n += 1;
}
}
}
state.push(LuaValue::Int(n));
Ok(1)
}
fn codepoint(state: &mut LuaState) -> Result<usize, LuaError> {
let s: Vec<u8> = state.check_arg_string(1)?.to_vec();
let len = s.len();
let raw_posi: i64 = state.opt_arg_integer(2, 1)?;
let posi: i64 = pos_relat(raw_posi, len);
let raw_pose: i64 = state.opt_arg_integer(3, posi)?;
let pose: i64 = pos_relat(raw_pose, len);
let lax: bool = state.to_boolean(4);
if posi < 1 {
return Err(LuaError::arg_error(2, "out of bounds"));
}
if pose > len as i64 {
return Err(LuaError::arg_error(3, "out of bounds"));
}
if posi > pose {
return Ok(0); }
if pose - posi >= i32::MAX as i64 {
return Err(LuaError::runtime(format_args!("string slice too long")));
}
let n_max = (pose - posi + 1) as i32;
state.ensure_stack(n_max, "string slice too long")?;
let mut pos: usize = (posi - 1) as usize; let end: usize = pose as usize; let mut count: usize = 0;
while pos < end {
match utf8_decode(&s[pos..], !lax) {
None => return Err(LuaError::runtime(format_args!("invalid UTF-8 code"))),
Some((remaining, code)) => {
state.push(LuaValue::Int(code as i64));
count += 1;
pos = len - remaining.len(); }
}
}
Ok(count)
}
fn get_utf_char_bytes(state: &mut LuaState, arg: i32) -> Result<Vec<u8>, LuaError> {
let code = state.check_arg_integer(arg)? as u64;
if code > MAX_UTF as u64 {
return Err(LuaError::arg_error(arg, "value out of range"));
}
Ok(encode_utf8_codepoint(code as u32))
}
fn utf_char(state: &mut LuaState) -> Result<usize, LuaError> {
let n: i32 = state.stack_top() as i32;
if n == 1 {
let bytes = get_utf_char_bytes(state, 1)?;
let s = state.intern_str(&bytes)?;
state.push(LuaValue::Str(s));
} else {
let mut buf: Vec<u8> = Vec::new();
for i in 1..=n {
buf.extend_from_slice(&get_utf_char_bytes(state, i)?);
}
let s = state.intern_str(&buf)?;
state.push(LuaValue::Str(s));
}
Ok(1)
}
fn byte_offset(state: &mut LuaState) -> Result<usize, LuaError> {
let s: Vec<u8> = state.check_arg_string(1)?.to_vec();
let len = s.len();
let n: i64 = state.check_arg_integer(2)?;
let default_posi: i64 = if n >= 0 { 1 } else { len as i64 + 1 };
let raw_posi: i64 = state.opt_arg_integer(3, default_posi)?;
let posi_1based: i64 = pos_relat(raw_posi, len);
if posi_1based < 1 {
return Err(LuaError::arg_error(3, "position out of bounds"));
}
let mut posi: i64 = posi_1based - 1; if posi > len as i64 {
return Err(LuaError::arg_error(3, "position out of bounds"));
}
let mut count = n;
if count == 0 {
while posi > 0 && is_cont_at(&s, posi) {
posi -= 1;
}
} else {
if is_cont_at(&s, posi) {
return Err(LuaError::runtime(format_args!(
"initial position is a continuation byte"
)));
}
if count < 0 {
while count < 0 && posi > 0 {
loop {
posi -= 1;
if posi == 0 || !is_cont_at(&s, posi) {
break;
}
}
count += 1;
}
} else {
count -= 1; while count > 0 && posi < len as i64 {
loop {
posi += 1;
if !is_cont_at(&s, posi) {
break;
}
}
count -= 1;
}
}
}
if count == 0 {
state.push(LuaValue::Int(posi + 1)); } else {
state.push(LuaValue::Nil); }
Ok(1)
}
fn iter_aux(state: &mut LuaState, strict: bool) -> Result<usize, LuaError> {
let s: Vec<u8> = state.check_arg_string(1)?.to_vec();
let len = s.len();
let mut n: u64 = state.to_integer(2).unwrap_or(0) as u64;
if (n as usize) < len {
while (n as usize) < len && is_cont(s[n as usize]) {
n += 1;
}
}
if (n as usize) >= len {
return Ok(0); }
match utf8_decode(&s[n as usize..], strict) {
None => Err(LuaError::runtime(format_args!("invalid UTF-8 code"))),
Some((remaining, code)) => {
let next_pos = len - remaining.len(); if next_pos < len && is_cont(s[next_pos]) {
return Err(LuaError::runtime(format_args!("invalid UTF-8 code")));
}
state.push(LuaValue::Int((n + 1) as i64)); state.push(LuaValue::Int(code as i64));
Ok(2)
}
}
}
fn iter_aux_strict(state: &mut LuaState) -> Result<usize, LuaError> {
iter_aux(state, true)
}
fn iter_aux_lax(state: &mut LuaState) -> Result<usize, LuaError> {
iter_aux(state, false)
}
fn iter_codes(state: &mut LuaState) -> Result<usize, LuaError> {
let lax: bool = state.to_boolean(2);
let s: Vec<u8> = state.check_arg_string(1)?.to_vec();
if s.first().map_or(false, |&b| is_cont(b)) {
return Err(LuaError::arg_error(1, "invalid UTF-8 code"));
}
let iter_fn: fn(&mut LuaState) -> Result<usize, LuaError> =
if lax { iter_aux_lax } else { iter_aux_strict };
state.push_c_function(iter_fn)?;
state.push_value_at(1)?;
state.push(LuaValue::Int(0));
Ok(3)
}
pub const FUNCS: &[(&[u8], fn(&mut LuaState) -> Result<usize, LuaError>)] = &[
(b"offset", byte_offset),
(b"codepoint", codepoint),
(b"char", utf_char),
(b"len", utf_len),
(b"codes", iter_codes),
];
pub fn open_utf8(state: &mut LuaState) -> Result<usize, LuaError> {
state.new_lib(FUNCS)?;
let patt = state.intern_str(UTF8_PATT)?;
state.push(LuaValue::Str(patt));
state.set_field(-2, b"charpattern")?;
Ok(1)
}