use crate::{
safer_unchecked::GetSaferUnchecked,
stringparse::{get_unicode_codepoint, ESCAPE_MAP},
Deserializer, ErrorType, Result, SillyWrapper,
};
#[allow(clippy::cast_possible_truncation)]
pub(crate) unsafe fn parse_str<'invoke, 'de>(
input: SillyWrapper<'de>,
data: &'invoke [u8],
_buffer: &'invoke mut [u8],
idx: usize,
) -> Result<&'de str> {
use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint};
let input = input.input;
let src: &[u8] = data.get_kinda_unchecked(idx + 1..);
let input = input.add(idx + 1);
let mut src_i = 0;
let mut b = *src.get_kinda_unchecked(src_i);
while b != b'"' && b != b'\\' {
src_i += 1;
b = *src.get_kinda_unchecked(src_i);
}
if b == b'"' {
let v = std::str::from_utf8_unchecked(std::slice::from_raw_parts(input, src_i));
return Ok(v);
}
let mut dst_i = src_i;
while b != b'"' {
if b == b'\\' {
let escape_char = *src.get_kinda_unchecked(src_i + 1);
if escape_char == b'u' {
let (cp, src_offset) = get_unicode_codepoint(src.get_kinda_unchecked(src_i..))
.map_err(|_| {
Deserializer::error_c(idx + 1 + src_i, 'u', InvalidUnicodeCodepoint)
})?;
if cp <= 0x7F {
input.add(dst_i).write(cp as u8);
dst_i += 1;
} else if cp <= 0x7FF {
input.add(dst_i).write(((cp >> 6) + 192) as u8);
dst_i += 1;
input.add(dst_i).write(((cp & 63) + 128) as u8);
dst_i += 1;
} else if cp <= 0xFFFF {
input.add(dst_i).write(((cp >> 12) + 224) as u8);
dst_i += 1;
input.add(dst_i).write((((cp >> 6) & 63) + 128) as u8);
dst_i += 1;
input.add(dst_i).write(((cp & 63) + 128) as u8);
dst_i += 1;
} else if cp <= 0x0010_FFFF {
input.add(dst_i).write(((cp >> 18) + 240) as u8);
dst_i += 1;
input.add(dst_i).write((((cp >> 12) & 63) + 128) as u8);
dst_i += 1;
input.add(dst_i).write((((cp >> 6) & 63) + 128) as u8);
dst_i += 1;
input.add(dst_i).write(((cp & 63) + 128) as u8);
dst_i += 1;
} else {
return Err(Deserializer::error_c(
idx + 1 + src_i,
'u',
InvalidUnicodeCodepoint,
));
}
src_i += src_offset - 1;
} else {
let escape_result: u8 = *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize);
if escape_result == 0 {
return Err(Deserializer::error_c(
idx + 1 + src_i,
escape_char as char,
InvalidEscape,
));
}
input.add(dst_i).write(escape_result);
dst_i += 1;
src_i += 1;
}
} else {
input.add(dst_i).write(b);
dst_i += 1;
}
src_i += 1;
b = *src.get_kinda_unchecked(src_i);
}
Ok(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
input, dst_i,
)))
}
#[cfg(test)]
mod test {
use crate::SIMDJSON_PADDING;
fn deser_str(input: &[u8]) -> Result<String> {
let mut input = input.to_vec();
let mut input2 = input.clone();
input2.append(vec![0; SIMDJSON_PADDING * 2].as_mut());
let mut buffer = vec![0; 1024];
let r = unsafe {
super::parse_str(input.as_mut_ptr().into(), &input2, buffer.as_mut_slice(), 0)?
};
Ok(String::from(r))
}
use super::*;
#[test]
fn easy_string() -> Result<()> {
let s = deser_str(&br#""snot""#[..])?;
assert_eq!("snot", s);
Ok(())
}
#[test]
fn string_with_quote() -> Result<()> {
let s = deser_str(&br#""snot says:\n \"badger\"""#[..])?;
assert_eq!("snot says:\n \"badger\"", s);
Ok(())
}
#[test]
fn string_with_utf8() -> Result<()> {
let s = deser_str(&br#""\u000e""#[..])?;
assert_eq!("\u{e}", s);
Ok(())
}
}