use core::str;
use facet_reflect::Span;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
ObjectStart,
ObjectEnd,
ArrayStart,
ArrayEnd,
Colon,
Comma,
Null,
True,
False,
String {
start: usize,
end: usize,
has_escapes: bool,
},
Number {
start: usize,
end: usize,
hint: NumberHint,
},
Eof,
NeedMore {
consumed: usize,
},
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum NumberHint {
Unsigned,
Signed,
Float,
}
#[derive(Debug, Clone, PartialEq)]
pub struct SpannedToken {
pub token: Token,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ScanError {
pub kind: ScanErrorKind,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ScanErrorKind {
UnexpectedChar(char),
UnexpectedEof(&'static str),
InvalidUtf8,
}
pub type ScanResult = Result<SpannedToken, ScanError>;
pub struct Scanner {
pos: usize,
state: ScanState,
allow_comments: bool,
}
#[derive(Debug, Clone, Default)]
enum ScanState {
#[default]
Ready,
InString {
start: usize,
has_escapes: bool,
escape_next: bool,
},
InNumber { start: usize, hint: NumberHint },
InLiteral {
start: usize,
expected: &'static [u8],
matched: usize,
},
}
impl Scanner {
pub const fn new() -> Self {
Self {
pos: 0,
state: ScanState::Ready,
allow_comments: false,
}
}
pub const fn new_with_comments() -> Self {
Self {
pos: 0,
state: ScanState::Ready,
allow_comments: true,
}
}
#[allow(dead_code)]
pub const fn at_position(pos: usize) -> Self {
Self {
pos,
state: ScanState::Ready,
allow_comments: false,
}
}
pub const fn pos(&self) -> usize {
self.pos
}
#[allow(dead_code)]
pub const fn set_pos(&mut self, pos: usize) {
self.pos = pos;
}
pub fn finalize_at_eof(&mut self, buf: &[u8]) -> ScanResult {
match core::mem::take(&mut self.state) {
ScanState::Ready => {
Ok(SpannedToken {
token: Token::Eof,
span: Span::new(self.pos, 0),
})
}
ScanState::InNumber { start, hint } => {
let end = self.pos;
if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
return Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in number"),
span: Span::new(start, end - start),
});
}
Ok(SpannedToken {
token: Token::Number { start, end, hint },
span: Span::new(start, end - start),
})
}
ScanState::InString { start, .. } => {
Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in string"),
span: Span::new(start, self.pos - start),
})
}
ScanState::InLiteral {
start,
expected,
matched,
} => {
if matched == expected.len() {
let token = match expected {
b"true" => Token::True,
b"false" => Token::False,
b"null" => Token::Null,
_ => unreachable!(),
};
Ok(SpannedToken {
token,
span: Span::new(start, expected.len()),
})
} else {
Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in literal"),
span: Span::new(start, self.pos - start),
})
}
}
}
}
pub fn next_token(&mut self, buf: &[u8]) -> ScanResult {
if !matches!(self.state, ScanState::Ready) {
match core::mem::take(&mut self.state) {
ScanState::Ready => unreachable!(),
ScanState::InString {
start,
has_escapes,
escape_next,
} => {
return self.resume_string(buf, start, has_escapes, escape_next);
}
ScanState::InNumber { start, hint } => {
return self.resume_number(buf, start, hint);
}
ScanState::InLiteral {
start,
expected,
matched,
} => {
return self.resume_literal(buf, start, expected, matched);
}
}
}
self.skip_whitespace(buf)?;
let start = self.pos;
let Some(&byte) = buf.get(self.pos) else {
return Ok(SpannedToken {
token: Token::Eof,
span: Span::new(self.pos, 0),
});
};
match byte {
b'{' => {
self.pos += 1;
Ok(SpannedToken {
token: Token::ObjectStart,
span: Span::new(start, 1),
})
}
b'}' => {
self.pos += 1;
Ok(SpannedToken {
token: Token::ObjectEnd,
span: Span::new(start, 1),
})
}
b'[' => {
self.pos += 1;
Ok(SpannedToken {
token: Token::ArrayStart,
span: Span::new(start, 1),
})
}
b']' => {
self.pos += 1;
Ok(SpannedToken {
token: Token::ArrayEnd,
span: Span::new(start, 1),
})
}
b':' => {
self.pos += 1;
Ok(SpannedToken {
token: Token::Colon,
span: Span::new(start, 1),
})
}
b',' => {
self.pos += 1;
Ok(SpannedToken {
token: Token::Comma,
span: Span::new(start, 1),
})
}
b'"' => self.scan_string(buf, start),
b'-' | b'0'..=b'9' => self.scan_number(buf, start),
b't' => self.scan_literal(buf, start, b"true", Token::True),
b'f' => self.scan_literal(buf, start, b"false", Token::False),
b'n' => self.scan_literal(buf, start, b"null", Token::Null),
_ => Err(ScanError {
kind: ScanErrorKind::UnexpectedChar(byte as char),
span: Span::new(start, 1),
}),
}
}
fn skip_whitespace(&mut self, buf: &[u8]) -> Result<(), ScanError> {
let mut pos = self.pos;
loop {
while let Some(&b) = buf.get(pos) {
match b {
b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
_ => break,
}
}
if !self.allow_comments {
break;
}
match (buf.get(pos), buf.get(pos + 1)) {
(Some(&b'/'), Some(&b'/')) => {
pos += 2;
while let Some(&b) = buf.get(pos) {
pos += 1;
if b == b'\n' {
break;
}
}
}
(Some(&b'/'), Some(&b'*')) => {
let comment_start = pos;
pos += 2;
loop {
match buf.get(pos) {
Some(&b'*') if buf.get(pos + 1) == Some(&b'/') => {
pos += 2;
break;
}
Some(_) => pos += 1,
None => {
self.pos = pos;
return Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in block comment"),
span: Span::new(comment_start, pos - comment_start),
});
}
}
}
}
_ => break,
}
}
self.pos = pos;
Ok(())
}
fn scan_string(&mut self, buf: &[u8], start: usize) -> ScanResult {
self.pos += 1;
let content_start = self.pos;
self.scan_string_content(buf, start, content_start, false, false)
}
fn resume_string(
&mut self,
buf: &[u8],
start: usize,
has_escapes: bool,
escape_next: bool,
) -> ScanResult {
let content_start = start + 1; self.scan_string_content(buf, start, content_start, has_escapes, escape_next)
}
fn scan_string_content(
&mut self,
buf: &[u8],
start: usize,
content_start: usize,
mut has_escapes: bool,
mut escape_next: bool,
) -> ScanResult {
const STEP_SIZE: usize = 16;
type Window = u128;
type Chunk = [u8; STEP_SIZE];
if !escape_next {
loop {
if let Some(Ok(chunk)) = buf
.get(self.pos..)
.and_then(|s| s.get(..STEP_SIZE))
.map(Chunk::try_from)
{
let window = Window::from_ne_bytes(chunk);
let has_quote = contains_byte(window, b'"');
let has_backslash = contains_byte(window, b'\\');
if !has_quote && !has_backslash {
self.pos += STEP_SIZE;
continue;
}
}
break;
}
}
while let Some(&byte) = buf.get(self.pos) {
if escape_next {
escape_next = false;
self.pos += 1;
if byte == b'u' {
if self.pos + 4 > buf.len() {
self.state = ScanState::InString {
start,
has_escapes: true,
escape_next: false,
};
return Ok(SpannedToken {
token: Token::NeedMore { consumed: start },
span: Span::new(start, self.pos - start),
});
}
self.pos += 4;
if self.pos + 2 <= buf.len()
&& buf.get(self.pos) == Some(&b'\\')
&& buf.get(self.pos + 1) == Some(&b'u')
{
if self.pos + 6 > buf.len() {
self.state = ScanState::InString {
start,
has_escapes: true,
escape_next: false,
};
return Ok(SpannedToken {
token: Token::NeedMore { consumed: start },
span: Span::new(start, self.pos - start),
});
}
self.pos += 6;
}
}
continue;
}
match byte {
b'"' => {
let content_end = self.pos;
self.pos += 1;
return Ok(SpannedToken {
token: Token::String {
start: content_start,
end: content_end,
has_escapes,
},
span: Span::new(start, self.pos - start),
});
}
b'\\' => {
has_escapes = true;
escape_next = true;
self.pos += 1;
}
_ => {
self.pos += 1;
}
}
}
if escape_next || self.pos > start {
self.state = ScanState::InString {
start,
has_escapes,
escape_next,
};
Ok(SpannedToken {
token: Token::NeedMore { consumed: start },
span: Span::new(start, self.pos - start),
})
} else {
Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in string"),
span: Span::new(start, self.pos - start),
})
}
}
fn scan_number(&mut self, buf: &[u8], start: usize) -> ScanResult {
let mut hint = NumberHint::Unsigned;
if buf.get(self.pos) == Some(&b'-') {
hint = NumberHint::Signed;
self.pos += 1;
}
self.scan_number_content(buf, start, hint)
}
fn resume_number(&mut self, buf: &[u8], start: usize, hint: NumberHint) -> ScanResult {
self.pos = start;
if buf.get(self.pos) == Some(&b'-') {
self.pos += 1;
}
self.scan_number_content(buf, start, hint)
}
fn scan_number_content(
&mut self,
buf: &[u8],
start: usize,
mut hint: NumberHint,
) -> ScanResult {
let mut pos = self.pos;
while let Some(&b) = buf.get(pos) {
if b.is_ascii_digit() {
pos += 1;
} else {
break;
}
}
if buf.get(pos) == Some(&b'.') {
hint = NumberHint::Float;
pos += 1;
while let Some(&b) = buf.get(pos) {
if b.is_ascii_digit() {
pos += 1;
} else {
break;
}
}
}
if matches!(buf.get(pos), Some(b'e') | Some(b'E')) {
hint = NumberHint::Float;
pos += 1;
if matches!(buf.get(pos), Some(b'+') | Some(b'-')) {
pos += 1;
}
while let Some(&b) = buf.get(pos) {
if b.is_ascii_digit() {
pos += 1;
} else {
break;
}
}
}
self.pos = pos;
if pos == buf.len() {
self.state = ScanState::InNumber { start, hint };
return Ok(SpannedToken {
token: Token::NeedMore { consumed: start },
span: Span::new(start, pos - start),
});
}
let end = pos;
if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
return Err(ScanError {
kind: ScanErrorKind::UnexpectedChar(
buf.get(pos).map(|&b| b as char).unwrap_or('?'),
),
span: Span::new(start, 1),
});
}
Ok(SpannedToken {
token: Token::Number { start, end, hint },
span: Span::new(start, end - start),
})
}
fn scan_literal(
&mut self,
buf: &[u8],
start: usize,
expected: &'static [u8],
token: Token,
) -> ScanResult {
self.scan_literal_content(buf, start, expected, 0, token)
}
fn resume_literal(
&mut self,
buf: &[u8],
start: usize,
expected: &'static [u8],
matched: usize,
) -> ScanResult {
let token = match expected {
b"true" => Token::True,
b"false" => Token::False,
b"null" => Token::Null,
_ => unreachable!(),
};
self.scan_literal_content(buf, start, expected, matched, token)
}
fn scan_literal_content(
&mut self,
buf: &[u8],
start: usize,
expected: &'static [u8],
mut matched: usize,
token: Token,
) -> ScanResult {
while matched < expected.len() {
match buf.get(self.pos) {
Some(&b) if b == expected[matched] => {
self.pos += 1;
matched += 1;
}
Some(&b) => {
return Err(ScanError {
kind: ScanErrorKind::UnexpectedChar(b as char),
span: Span::new(self.pos, 1),
});
}
None => {
self.state = ScanState::InLiteral {
start,
expected,
matched,
};
return Ok(SpannedToken {
token: Token::NeedMore { consumed: start },
span: Span::new(start, self.pos - start),
});
}
}
}
Ok(SpannedToken {
token,
span: Span::new(start, expected.len()),
})
}
}
impl Default for Scanner {
fn default() -> Self {
Self::new()
}
}
#[inline]
const fn contains_byte(window: u128, byte: u8) -> bool {
let pattern = u128::from_ne_bytes([byte; 16]);
let xor = window ^ pattern;
let has_zero = (xor.wrapping_sub(0x01010101010101010101010101010101))
& !xor
& 0x80808080808080808080808080808080;
has_zero != 0
}
pub fn decode_string_owned(
buf: &[u8],
start: usize,
end: usize,
) -> Result<alloc::string::String, ScanError> {
use alloc::string::String;
let slice = &buf[start..end];
let mut result = String::with_capacity(end - start);
let mut i = 0;
while i < slice.len() {
let byte = slice[i];
if byte == b'\\' {
i += 1;
if i >= slice.len() {
return Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in escape sequence"),
span: Span::new(start + i - 1, 1),
});
}
match slice[i] {
b'"' => result.push('"'),
b'\\' => result.push('\\'),
b'/' => result.push('/'),
b'b' => result.push('\x08'),
b'f' => result.push('\x0c'),
b'n' => result.push('\n'),
b'r' => result.push('\r'),
b't' => result.push('\t'),
b'u' => {
i += 1;
if i + 4 > slice.len() {
return Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in unicode escape"),
span: Span::new(start + i - 2, slice.len() - i + 2),
});
}
let hex = &slice[i..i + 4];
let hex_str = str::from_utf8(hex).map_err(|_| ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i, 4),
})?;
let code_unit = u16::from_str_radix(hex_str, 16).map_err(|_| ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start + i, 4),
})?;
i += 4;
let code_point = if (0xD800..=0xDBFF).contains(&code_unit) {
if i + 6 > slice.len() || slice[i] != b'\\' || slice[i + 1] != b'u' {
return Err(ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 6, 6),
});
}
i += 2; let low_hex = &slice[i..i + 4];
let low_hex_str = str::from_utf8(low_hex).map_err(|_| ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i, 4),
})?;
let low_unit =
u16::from_str_radix(low_hex_str, 16).map_err(|_| ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start + i, 4),
})?;
i += 4;
if !(0xDC00..=0xDFFF).contains(&low_unit) {
return Err(ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 4, 4),
});
}
let high = code_unit as u32;
let low = low_unit as u32;
0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF)
} else if (0xDC00..=0xDFFF).contains(&code_unit) {
return Err(ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 4, 4),
});
} else {
code_unit as u32
};
let c = char::from_u32(code_point).ok_or_else(|| ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 4, 4),
})?;
result.push(c);
continue; }
other => {
result.push(other as char);
}
}
i += 1;
} else {
if byte < 0x80 {
result.push(byte as char);
i += 1;
} else {
let remaining = &slice[i..];
match str::from_utf8(remaining) {
Ok(s) => {
let ch = s.chars().next().expect("non-empty remaining slice");
result.push(ch);
i += ch.len_utf8();
}
Err(e) => {
let valid_len = e.valid_up_to();
if valid_len > 0 {
let valid = str::from_utf8(&remaining[..valid_len])
.expect("valid_up_to guarantees valid UTF-8");
let ch = valid.chars().next().expect("non-empty valid slice");
result.push(ch);
i += ch.len_utf8();
} else {
return Err(ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i, 1),
});
}
}
}
}
}
}
Ok(result)
}
pub fn decode_string_borrowed(buf: &[u8], start: usize, end: usize) -> Option<&str> {
let slice = &buf[start..end];
if slice.contains(&b'\\') {
return None;
}
str::from_utf8(slice).ok()
}
pub unsafe fn decode_string_borrowed_unchecked(
buf: &[u8],
start: usize,
end: usize,
) -> Option<&str> {
let slice = &buf[start..end];
if slice.contains(&b'\\') {
return None;
}
Some(unsafe { str::from_utf8_unchecked(slice) })
}
pub unsafe fn decode_string_owned_unchecked(
buf: &[u8],
start: usize,
end: usize,
) -> Result<alloc::string::String, ScanError> {
use alloc::string::String;
let slice = &buf[start..end];
let mut result = String::with_capacity(end - start);
let mut i = 0;
while i < slice.len() {
let byte = slice[i];
if byte == b'\\' {
i += 1;
if i >= slice.len() {
return Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in escape sequence"),
span: Span::new(start + i - 1, 1),
});
}
match slice[i] {
b'"' => result.push('"'),
b'\\' => result.push('\\'),
b'/' => result.push('/'),
b'b' => result.push('\x08'),
b'f' => result.push('\x0c'),
b'n' => result.push('\n'),
b'r' => result.push('\r'),
b't' => result.push('\t'),
b'u' => {
i += 1;
if i + 4 > slice.len() {
return Err(ScanError {
kind: ScanErrorKind::UnexpectedEof("in unicode escape"),
span: Span::new(start + i - 2, slice.len() - i + 2),
});
}
let hex = &slice[i..i + 4];
let hex_str = unsafe { str::from_utf8_unchecked(hex) };
let code_unit = u16::from_str_radix(hex_str, 16).map_err(|_| ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start + i, 4),
})?;
i += 4;
let code_point = if (0xD800..=0xDBFF).contains(&code_unit) {
if i + 6 > slice.len() || slice[i] != b'\\' || slice[i + 1] != b'u' {
return Err(ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 6, 6),
});
}
i += 2; let low_hex = &slice[i..i + 4];
let low_hex_str = unsafe { str::from_utf8_unchecked(low_hex) };
let low_unit =
u16::from_str_radix(low_hex_str, 16).map_err(|_| ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start + i, 4),
})?;
i += 4;
if !(0xDC00..=0xDFFF).contains(&low_unit) {
return Err(ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 4, 4),
});
}
let high = code_unit as u32;
let low = low_unit as u32;
0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF)
} else if (0xDC00..=0xDFFF).contains(&code_unit) {
return Err(ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 4, 4),
});
} else {
code_unit as u32
};
let c = char::from_u32(code_point).ok_or_else(|| ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start + i - 4, 4),
})?;
result.push(c);
continue; }
other => {
result.push(other as char);
}
}
i += 1;
} else {
if byte < 0x80 {
result.push(byte as char);
i += 1;
} else {
let remaining = &slice[i..];
let s = unsafe { str::from_utf8_unchecked(remaining) };
let ch = s.chars().next().expect("non-empty remaining slice");
result.push(ch);
i += ch.len_utf8();
}
}
}
Ok(result)
}
#[allow(dead_code)]
pub fn decode_string<'a>(
buf: &'a [u8],
start: usize,
end: usize,
has_escapes: bool,
) -> Result<alloc::borrow::Cow<'a, str>, ScanError> {
use alloc::borrow::Cow;
if has_escapes {
decode_string_owned(buf, start, end).map(Cow::Owned)
} else {
decode_string_borrowed(buf, start, end)
.map(Cow::Borrowed)
.ok_or_else(|| ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start, end - start),
})
}
}
#[allow(dead_code)]
pub unsafe fn decode_string_unchecked<'a>(
buf: &'a [u8],
start: usize,
end: usize,
has_escapes: bool,
) -> Result<alloc::borrow::Cow<'a, str>, ScanError> {
use alloc::borrow::Cow;
if has_escapes {
unsafe { decode_string_owned_unchecked(buf, start, end) }.map(Cow::Owned)
} else {
unsafe { decode_string_borrowed_unchecked(buf, start, end) }
.map(Cow::Borrowed)
.ok_or_else(|| ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start, end - start),
})
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ParsedNumber {
U64(u64),
I64(i64),
U128(u128),
I128(i128),
F64(f64),
}
#[cfg(feature = "lexical-parse")]
pub fn parse_number(
buf: &[u8],
start: usize,
end: usize,
hint: NumberHint,
) -> Result<ParsedNumber, ScanError> {
use lexical_parse_float::FromLexical as _;
use lexical_parse_integer::FromLexical as _;
let slice = &buf[start..end];
match hint {
NumberHint::Float => f64::from_lexical(slice)
.map(ParsedNumber::F64)
.map_err(|_| ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start, end - start),
}),
NumberHint::Signed => {
if let Ok(n) = i64::from_lexical(slice) {
Ok(ParsedNumber::I64(n))
} else if let Ok(n) = i128::from_lexical(slice) {
Ok(ParsedNumber::I128(n))
} else {
Err(ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start, end - start),
})
}
}
NumberHint::Unsigned => {
if let Ok(n) = u64::from_lexical(slice) {
Ok(ParsedNumber::U64(n))
} else if let Ok(n) = u128::from_lexical(slice) {
Ok(ParsedNumber::U128(n))
} else {
Err(ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start, end - start),
})
}
}
}
}
#[cfg(feature = "lexical-parse")]
pub unsafe fn parse_number_unchecked(
buf: &[u8],
start: usize,
end: usize,
hint: NumberHint,
) -> Result<ParsedNumber, ScanError> {
parse_number(buf, start, end, hint)
}
#[cfg(not(feature = "lexical-parse"))]
pub fn parse_number(
buf: &[u8],
start: usize,
end: usize,
hint: NumberHint,
) -> Result<ParsedNumber, ScanError> {
let slice = &buf[start..end];
let s = str::from_utf8(slice).map_err(|_| ScanError {
kind: ScanErrorKind::InvalidUtf8,
span: Span::new(start, end - start),
})?;
parse_number_inner(s, start, end, hint)
}
#[cfg(not(feature = "lexical-parse"))]
pub unsafe fn parse_number_unchecked(
buf: &[u8],
start: usize,
end: usize,
hint: NumberHint,
) -> Result<ParsedNumber, ScanError> {
let slice = &buf[start..end];
let s = unsafe { str::from_utf8_unchecked(slice) };
parse_number_inner(s, start, end, hint)
}
#[cfg(not(feature = "lexical-parse"))]
fn parse_number_inner(
s: &str,
start: usize,
end: usize,
hint: NumberHint,
) -> Result<ParsedNumber, ScanError> {
match hint {
NumberHint::Float => s
.parse::<f64>()
.map(ParsedNumber::F64)
.map_err(|_| ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start, end - start),
}),
NumberHint::Signed => {
if let Ok(n) = s.parse::<i64>() {
Ok(ParsedNumber::I64(n))
} else if let Ok(n) = s.parse::<i128>() {
Ok(ParsedNumber::I128(n))
} else {
Err(ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start, end - start),
})
}
}
NumberHint::Unsigned => {
if let Ok(n) = s.parse::<u64>() {
Ok(ParsedNumber::U64(n))
} else if let Ok(n) = s.parse::<u128>() {
Ok(ParsedNumber::U128(n))
} else {
Err(ScanError {
kind: ScanErrorKind::UnexpectedChar('?'),
span: Span::new(start, end - start),
})
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use facet_testhelpers::test;
#[test]
fn test_simple_tokens() {
let input = b"{}[],:";
let mut scanner = Scanner::new();
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::ObjectStart
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::ObjectEnd
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::ArrayStart
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::ArrayEnd
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::Comma
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::Colon
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::Eof
));
}
#[test]
fn test_string_no_escapes() {
let input = b"\"hello world\"";
let mut scanner = Scanner::new();
let result = scanner.next_token(input).unwrap();
assert!(matches!(
result.token,
Token::String {
start: 1,
end: 12,
has_escapes: false
}
));
}
#[test]
fn test_string_with_escapes() {
let input = br#""hello\nworld""#;
let mut scanner = Scanner::new();
let result = scanner.next_token(input).unwrap();
assert!(matches!(
result.token,
Token::String {
start: 1,
end: 13,
has_escapes: true
}
));
}
#[test]
fn test_numbers() {
let mut scanner = Scanner::new();
let result = scanner.next_token(b"42,").unwrap();
assert!(matches!(
result.token,
Token::Number {
hint: NumberHint::Unsigned,
..
}
));
scanner.set_pos(0);
let result = scanner.next_token(b"-42]").unwrap();
assert!(matches!(
result.token,
Token::Number {
hint: NumberHint::Signed,
..
}
));
scanner.set_pos(0);
let result = scanner.next_token(b"3.14}").unwrap();
assert!(matches!(
result.token,
Token::Number {
hint: NumberHint::Float,
..
}
));
scanner.set_pos(0);
let result = scanner.next_token(b"1e10 ").unwrap();
assert!(matches!(
result.token,
Token::Number {
hint: NumberHint::Float,
..
}
));
scanner.set_pos(0);
let result = scanner.next_token(b"42").unwrap();
assert!(matches!(result.token, Token::NeedMore { .. }));
}
#[test]
fn test_literals() {
let mut scanner = Scanner::new();
let result = scanner.next_token(b"true,").unwrap();
assert!(matches!(result.token, Token::True));
scanner.set_pos(0);
let result = scanner.next_token(b"false]").unwrap();
assert!(matches!(result.token, Token::False));
scanner.set_pos(0);
let result = scanner.next_token(b"null}").unwrap();
assert!(matches!(result.token, Token::Null));
}
#[test]
fn test_whitespace_handling() {
let input = b" {\n\t\"key\" : 42 } ";
let mut scanner = Scanner::new();
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::ObjectStart
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::String { .. }
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::Colon
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::Number { .. }
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::ObjectEnd
));
assert!(matches!(
scanner.next_token(input).unwrap().token,
Token::Eof
));
}
#[test]
fn test_decode_string_no_escapes() {
let input = b"hello world";
let result = decode_string_borrowed(input, 0, input.len());
assert_eq!(result, Some("hello world"));
}
#[test]
fn test_decode_string_with_escapes() {
let input = br#"hello\nworld"#;
let result = decode_string_owned(input, 0, input.len()).unwrap();
assert_eq!(result, "hello\nworld");
}
#[test]
fn test_decode_string_unicode() {
let input = br#"\u0048\u0065\u006C\u006C\u006F"#;
let result = decode_string_owned(input, 0, input.len()).unwrap();
assert_eq!(result, "Hello");
}
#[test]
fn test_decode_string_surrogate_pair() {
let input = br#"\uD83D\uDE00"#;
let result = decode_string_owned(input, 0, input.len()).unwrap();
assert_eq!(result, "😀");
}
#[test]
fn test_decode_cow_borrowed() {
let input = b"simple";
let result = decode_string(input, 0, input.len(), false).unwrap();
assert!(matches!(result, alloc::borrow::Cow::Borrowed(_)));
assert_eq!(&*result, "simple");
}
#[test]
fn test_decode_cow_owned() {
let input = br#"has\tescape"#;
let result = decode_string(input, 0, input.len(), true).unwrap();
assert!(matches!(result, alloc::borrow::Cow::Owned(_)));
assert_eq!(&*result, "has\tescape");
}
#[test]
fn test_parse_numbers() {
assert_eq!(
parse_number(b"42", 0, 2, NumberHint::Unsigned).unwrap(),
ParsedNumber::U64(42)
);
assert_eq!(
parse_number(b"-42", 0, 3, NumberHint::Signed).unwrap(),
ParsedNumber::I64(-42)
);
#[allow(clippy::approx_constant)]
{
assert_eq!(
parse_number(b"3.14", 0, 4, NumberHint::Float).unwrap(),
ParsedNumber::F64(3.14)
);
}
}
}