use crate::{
Deserializer, ErrorType, Result, SillyWrapper,
safer_unchecked::GetSaferUnchecked,
stringparse::{ESCAPE_MAP, get_unicode_codepoint},
};
#[allow(clippy::cast_possible_truncation)]
pub(crate) unsafe fn parse_str<'invoke, 'de>(
input: SillyWrapper<'de>,
data: &'invoke [u8],
_buffer: &'invoke mut [u8],
idx: usize,
) -> Result<&'de str> {
use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint};
let input = input.input;
let src: &[u8] = unsafe { data.get_kinda_unchecked(idx + 1..) };
let input = unsafe { input.add(idx + 1) };
let mut src_i = 0;
let mut b = unsafe { *src.get_kinda_unchecked(src_i) };
while b != b'"' && b != b'\\' {
src_i += 1;
b = unsafe { *src.get_kinda_unchecked(src_i) };
}
if b == b'"' {
let v = unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(input, src_i)) };
return Ok(v);
}
let mut dst_i = src_i;
while b != b'"' {
if b == b'\\' {
let escape_char = unsafe { *src.get_kinda_unchecked(src_i + 1) };
if escape_char == b'u' {
let (cp, src_offset) =
unsafe { get_unicode_codepoint(src.get_kinda_unchecked(src_i..)) }.map_err(
|_| Deserializer::error_c(idx + 1 + src_i, 'u', InvalidUnicodeCodepoint),
)?;
unsafe {
if cp <= 0x7F {
input.add(dst_i).write(cp as u8);
dst_i += 1;
} else if cp <= 0x7FF {
input.add(dst_i).write(((cp >> 6) + 192) as u8);
dst_i += 1;
input.add(dst_i).write(((cp & 63) + 128) as u8);
dst_i += 1;
} else if cp <= 0xFFFF {
input.add(dst_i).write(((cp >> 12) + 224) as u8);
dst_i += 1;
input.add(dst_i).write((((cp >> 6) & 63) + 128) as u8);
dst_i += 1;
input.add(dst_i).write(((cp & 63) + 128) as u8);
dst_i += 1;
} else if cp <= 0x0010_FFFF {
input.add(dst_i).write(((cp >> 18) + 240) as u8);
dst_i += 1;
input.add(dst_i).write((((cp >> 12) & 63) + 128) as u8);
dst_i += 1;
input.add(dst_i).write((((cp >> 6) & 63) + 128) as u8);
dst_i += 1;
input.add(dst_i).write(((cp & 63) + 128) as u8);
dst_i += 1;
} else {
return Err(Deserializer::error_c(
idx + 1 + src_i,
'u',
InvalidUnicodeCodepoint,
));
}
}
src_i += src_offset - 1;
} else {
let escape_result: u8 =
unsafe { *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize) };
if escape_result == 0 {
return Err(Deserializer::error_c(
idx + 1 + src_i,
escape_char as char,
InvalidEscape,
));
}
unsafe { input.add(dst_i).write(escape_result) };
dst_i += 1;
src_i += 1;
}
} else {
unsafe { input.add(dst_i).write(b) };
dst_i += 1;
}
src_i += 1;
b = unsafe { *src.get_kinda_unchecked(src_i) };
}
unsafe {
Ok(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
input, dst_i,
)))
}
}
#[cfg(test)]
mod test {
use crate::SIMDJSON_PADDING;
fn deser_str(input: &[u8]) -> Result<String> {
let mut input = input.to_vec();
let mut input2 = input.clone();
input2.append(vec![0; SIMDJSON_PADDING * 2].as_mut());
let mut buffer = vec![0; 1024];
let r = unsafe {
super::parse_str(input.as_mut_ptr().into(), &input2, buffer.as_mut_slice(), 0)?
};
Ok(String::from(r))
}
use super::*;
#[test]
fn easy_string() -> Result<()> {
let s = deser_str(&br#""snot""#[..])?;
assert_eq!("snot", s);
Ok(())
}
#[test]
fn string_with_quote() -> Result<()> {
let s = deser_str(&br#""snot says:\n \"badger\"""#[..])?;
assert_eq!("snot says:\n \"badger\"", s);
Ok(())
}
#[test]
fn string_with_utf8() -> Result<()> {
let s = deser_str(&br#""\u000e""#[..])?;
assert_eq!("\u{e}", s);
Ok(())
}
}