#[cfg(not(test))]
use alloc::string::String;
use core::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Position {
pub offset: usize,
pub line: usize,
pub column: usize,
}
impl fmt::Display for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"line {}, column {} (offset {})",
self.line, self.column, self.offset
)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValidationErrorKind {
UnexpectedCharacter { expected: &'static str, found: char },
UnexpectedEof { expected: &'static str },
TrailingContent,
UnclosedString,
InvalidEscape { sequence: char },
InvalidUnicodeEscape { reason: &'static str },
UnpairedSurrogate { codepoint: u16 },
ControlCharacter { byte: u8 },
LeadingZero,
LeadingPlus,
InvalidNumber { reason: &'static str },
InvalidKeyword { found: String },
InvalidUtf8,
}
impl fmt::Display for ValidationErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::UnexpectedCharacter { expected, found } => {
write!(f, "expected {}, found {:?}", expected, found)
}
Self::UnexpectedEof { expected } => {
write!(f, "unexpected end of input, expected {}", expected)
}
Self::TrailingContent => write!(f, "trailing content after JSON value"),
Self::UnclosedString => write!(f, "unclosed string"),
Self::InvalidEscape { sequence } => {
write!(f, "invalid escape sequence '\\{}'", sequence)
}
Self::InvalidUnicodeEscape { reason } => {
write!(f, "invalid unicode escape: {}", reason)
}
Self::UnpairedSurrogate { codepoint } => {
write!(f, "unpaired surrogate \\u{:04X}", codepoint)
}
Self::ControlCharacter { byte } => {
write!(f, "unescaped control character 0x{:02X}", byte)
}
Self::LeadingZero => write!(f, "leading zeros not allowed in numbers"),
Self::LeadingPlus => write!(f, "leading plus sign not allowed in numbers"),
Self::InvalidNumber { reason } => write!(f, "invalid number: {}", reason),
Self::InvalidKeyword { found } => {
write!(
f,
"invalid keyword '{}' (expected null, true, or false)",
found
)
}
Self::InvalidUtf8 => write!(f, "invalid UTF-8 sequence"),
}
}
}
#[derive(Debug, Clone)]
pub struct ValidationError {
pub kind: ValidationErrorKind,
pub position: Position,
}
impl fmt::Display for ValidationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at {}", self.kind, self.position)
}
}
#[cfg(feature = "std")]
impl std::error::Error for ValidationError {}
pub struct Validator<'a> {
input: &'a [u8],
offset: usize,
line: usize,
column: usize,
}
impl<'a> Validator<'a> {
pub fn new(input: &'a [u8]) -> Self {
Self {
input,
offset: 0,
line: 1,
column: 1,
}
}
pub fn validate(&mut self) -> Result<(), ValidationError> {
self.skip_whitespace();
if self.is_eof() {
return Err(self.error(ValidationErrorKind::UnexpectedEof {
expected: "JSON value",
}));
}
self.validate_value()?;
self.skip_whitespace();
if !self.is_eof() {
return Err(self.error(ValidationErrorKind::TrailingContent));
}
Ok(())
}
fn validate_value(&mut self) -> Result<(), ValidationError> {
match self.peek() {
Some(b'{') => self.validate_object(),
Some(b'[') => self.validate_array(),
Some(b'"') => self.validate_string(),
Some(b'-') | Some(b'0'..=b'9') => self.validate_number(),
Some(b't') | Some(b'f') | Some(b'n') => self.validate_keyword(),
Some(b'+') => Err(self.error(ValidationErrorKind::LeadingPlus)),
Some(c) => Err(self.error(ValidationErrorKind::UnexpectedCharacter {
expected: "JSON value",
found: c as char,
})),
None => Err(self.error(ValidationErrorKind::UnexpectedEof {
expected: "JSON value",
})),
}
}
fn validate_object(&mut self) -> Result<(), ValidationError> {
self.advance(); self.skip_whitespace();
if self.peek() == Some(b'}') {
self.advance();
return Ok(());
}
loop {
if self.peek() != Some(b'"') {
return Err(self.error(ValidationErrorKind::UnexpectedCharacter {
expected: "string key",
found: self.peek().map(|b| b as char).unwrap_or('\0'),
}));
}
self.validate_string()?;
self.skip_whitespace();
if self.peek() != Some(b':') {
return Err(self.error(ValidationErrorKind::UnexpectedCharacter {
expected: "':'",
found: self.peek().map(|b| b as char).unwrap_or('\0'),
}));
}
self.advance();
self.skip_whitespace();
self.validate_value()?;
self.skip_whitespace();
match self.peek() {
Some(b',') => {
self.advance();
self.skip_whitespace();
if self.peek() == Some(b'}') {
return Err(self.error(ValidationErrorKind::UnexpectedCharacter {
expected: "string key",
found: '}',
}));
}
}
Some(b'}') => {
self.advance();
return Ok(());
}
Some(c) => {
return Err(self.error(ValidationErrorKind::UnexpectedCharacter {
expected: "',' or '}'",
found: c as char,
}));
}
None => {
return Err(self.error(ValidationErrorKind::UnexpectedEof {
expected: "',' or '}'",
}));
}
}
}
}
fn validate_array(&mut self) -> Result<(), ValidationError> {
self.advance(); self.skip_whitespace();
if self.peek() == Some(b']') {
self.advance();
return Ok(());
}
loop {
self.validate_value()?;
self.skip_whitespace();
match self.peek() {
Some(b',') => {
self.advance();
self.skip_whitespace();
if self.peek() == Some(b']') {
return Err(self.error(ValidationErrorKind::UnexpectedCharacter {
expected: "JSON value",
found: ']',
}));
}
}
Some(b']') => {
self.advance();
return Ok(());
}
Some(c) => {
return Err(self.error(ValidationErrorKind::UnexpectedCharacter {
expected: "',' or ']'",
found: c as char,
}));
}
None => {
return Err(self.error(ValidationErrorKind::UnexpectedEof {
expected: "',' or ']'",
}));
}
}
}
}
fn validate_string(&mut self) -> Result<(), ValidationError> {
self.advance();
loop {
match self.peek() {
Some(b'"') => {
self.advance();
return Ok(());
}
Some(b'\\') => {
self.validate_escape()?;
}
Some(b) if b < 0x20 => {
return Err(self.error(ValidationErrorKind::ControlCharacter { byte: b }));
}
Some(_) => {
self.validate_utf8_char()?;
}
None => {
return Err(self.error(ValidationErrorKind::UnclosedString));
}
}
}
}
fn validate_utf8_char(&mut self) -> Result<(), ValidationError> {
let b = self.peek().unwrap();
if b < 0x80 {
self.advance();
return Ok(());
}
let (len, min_cp, max_cp) = if b & 0xE0 == 0xC0 {
(2, 0x80u32, 0x7FFu32)
} else if b & 0xF0 == 0xE0 {
(3, 0x800u32, 0xFFFFu32)
} else if b & 0xF8 == 0xF0 {
(4, 0x10000u32, 0x10FFFFu32)
} else {
return Err(self.error(ValidationErrorKind::InvalidUtf8));
};
if self.offset + len > self.input.len() {
return Err(self.error(ValidationErrorKind::InvalidUtf8));
}
for i in 1..len {
let cont = self.input[self.offset + i];
if cont & 0xC0 != 0x80 {
return Err(self.error(ValidationErrorKind::InvalidUtf8));
}
}
let cp = match len {
2 => ((b as u32 & 0x1F) << 6) | (self.input[self.offset + 1] as u32 & 0x3F),
3 => {
((b as u32 & 0x0F) << 12)
| ((self.input[self.offset + 1] as u32 & 0x3F) << 6)
| (self.input[self.offset + 2] as u32 & 0x3F)
}
4 => {
((b as u32 & 0x07) << 18)
| ((self.input[self.offset + 1] as u32 & 0x3F) << 12)
| ((self.input[self.offset + 2] as u32 & 0x3F) << 6)
| (self.input[self.offset + 3] as u32 & 0x3F)
}
_ => unreachable!(),
};
if cp < min_cp || cp > max_cp {
return Err(self.error(ValidationErrorKind::InvalidUtf8));
}
if (0xD800..=0xDFFF).contains(&cp) {
return Err(self.error(ValidationErrorKind::InvalidUtf8));
}
for _ in 0..len {
self.advance();
}
Ok(())
}
fn validate_escape(&mut self) -> Result<(), ValidationError> {
self.advance();
match self.peek() {
Some(b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't') => {
self.advance();
Ok(())
}
Some(b'u') => {
self.advance();
let high = self.validate_unicode_escape()?;
if (0xD800..=0xDBFF).contains(&high) {
if self.peek() != Some(b'\\') {
return Err(
self.error(ValidationErrorKind::UnpairedSurrogate { codepoint: high })
);
}
self.advance();
if self.peek() != Some(b'u') {
return Err(
self.error(ValidationErrorKind::UnpairedSurrogate { codepoint: high })
);
}
self.advance();
let low = self.validate_unicode_escape()?;
if !(0xDC00..=0xDFFF).contains(&low) {
return Err(
self.error(ValidationErrorKind::UnpairedSurrogate { codepoint: high })
);
}
} else if (0xDC00..=0xDFFF).contains(&high) {
return Err(
self.error(ValidationErrorKind::UnpairedSurrogate { codepoint: high })
);
}
Ok(())
}
Some(c) => Err(self.error(ValidationErrorKind::InvalidEscape {
sequence: c as char,
})),
None => Err(self.error(ValidationErrorKind::UnclosedString)),
}
}
fn validate_unicode_escape(&mut self) -> Result<u16, ValidationError> {
let mut value: u16 = 0;
for _ in 0..4 {
match self.peek() {
Some(b @ b'0'..=b'9') => {
value = value * 16 + (b - b'0') as u16;
self.advance();
}
Some(b @ b'a'..=b'f') => {
value = value * 16 + (b - b'a' + 10) as u16;
self.advance();
}
Some(b @ b'A'..=b'F') => {
value = value * 16 + (b - b'A' + 10) as u16;
self.advance();
}
Some(_) => {
return Err(self.error(ValidationErrorKind::InvalidUnicodeEscape {
reason: "expected 4 hex digits",
}));
}
None => {
return Err(self.error(ValidationErrorKind::InvalidUnicodeEscape {
reason: "unexpected end of input",
}));
}
}
}
Ok(value)
}
fn validate_number(&mut self) -> Result<(), ValidationError> {
if self.peek() == Some(b'-') {
self.advance();
}
match self.peek() {
Some(b'0') => {
self.advance();
if matches!(self.peek(), Some(b'0'..=b'9')) {
return Err(self.error(ValidationErrorKind::LeadingZero));
}
}
Some(b'1'..=b'9') => {
self.advance();
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.advance();
}
}
Some(_) | None => {
return Err(self.error(ValidationErrorKind::InvalidNumber {
reason: "expected digit after minus sign",
}));
}
}
if self.peek() == Some(b'.') {
self.advance();
if !matches!(self.peek(), Some(b'0'..=b'9')) {
return Err(self.error(ValidationErrorKind::InvalidNumber {
reason: "expected digit after decimal point",
}));
}
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.advance();
}
}
if matches!(self.peek(), Some(b'e' | b'E')) {
self.advance();
if matches!(self.peek(), Some(b'+' | b'-')) {
self.advance();
}
if !matches!(self.peek(), Some(b'0'..=b'9')) {
return Err(self.error(ValidationErrorKind::InvalidNumber {
reason: "expected digit in exponent",
}));
}
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.advance();
}
}
Ok(())
}
fn validate_keyword(&mut self) -> Result<(), ValidationError> {
let start = self.offset;
while matches!(self.peek(), Some(b'a'..=b'z')) {
self.advance();
}
let keyword = &self.input[start..self.offset];
match keyword {
b"null" | b"true" | b"false" => Ok(()),
_ => {
let found = String::from_utf8_lossy(keyword).into_owned();
let err_pos = Position {
offset: start,
line: self.line,
column: self.column - (self.offset - start),
};
Err(ValidationError {
kind: ValidationErrorKind::InvalidKeyword { found },
position: err_pos,
})
}
}
}
fn skip_whitespace(&mut self) {
while let Some(b) = self.peek() {
match b {
b' ' | b'\t' => {
self.offset += 1;
self.column += 1;
}
b'\n' => {
self.offset += 1;
self.line += 1;
self.column = 1;
}
b'\r' => {
self.offset += 1;
if self.peek() == Some(b'\n') {
self.offset += 1;
}
self.line += 1;
self.column = 1;
}
_ => break,
}
}
}
#[inline]
fn peek(&self) -> Option<u8> {
self.input.get(self.offset).copied()
}
#[inline]
fn advance(&mut self) -> Option<u8> {
if self.offset >= self.input.len() {
return None;
}
let b = self.input[self.offset];
self.offset += 1;
self.column += 1;
Some(b)
}
#[inline]
fn is_eof(&self) -> bool {
self.offset >= self.input.len()
}
fn position(&self) -> Position {
Position {
offset: self.offset,
line: self.line,
column: self.column,
}
}
fn error(&self, kind: ValidationErrorKind) -> ValidationError {
ValidationError {
kind,
position: self.position(),
}
}
}
pub fn validate(input: &[u8]) -> Result<(), ValidationError> {
Validator::new(input).validate()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_null() {
assert!(validate(b"null").is_ok());
}
#[test]
fn test_valid_true() {
assert!(validate(b"true").is_ok());
}
#[test]
fn test_valid_false() {
assert!(validate(b"false").is_ok());
}
#[test]
fn test_valid_empty_object() {
assert!(validate(b"{}").is_ok());
}
#[test]
fn test_valid_empty_array() {
assert!(validate(b"[]").is_ok());
}
#[test]
fn test_valid_simple_object() {
assert!(validate(br#"{"key": "value"}"#).is_ok());
}
#[test]
fn test_valid_simple_array() {
assert!(validate(b"[1, 2, 3]").is_ok());
}
#[test]
fn test_valid_nested() {
assert!(validate(br#"{"arr": [1, {"nested": true}]}"#).is_ok());
}
#[test]
fn test_valid_string_escapes() {
assert!(validate(br#""hello\nworld""#).is_ok());
assert!(validate(br#""tab\there""#).is_ok());
assert!(validate(br#""quote\"here""#).is_ok());
assert!(validate(br#""backslash\\here""#).is_ok());
assert!(validate(br#""slash\/here""#).is_ok());
assert!(validate(br#""controls\b\f\r""#).is_ok());
}
#[test]
fn test_valid_unicode_escape() {
assert!(validate(br#""\u0041""#).is_ok()); assert!(validate(br#""\u00e9""#).is_ok()); assert!(validate(br#""\u4e2d""#).is_ok()); }
#[test]
fn test_valid_surrogate_pair() {
assert!(validate(br#""\uD83D\uDE00""#).is_ok());
}
#[test]
fn test_valid_numbers() {
assert!(validate(b"0").is_ok());
assert!(validate(b"123").is_ok());
assert!(validate(b"-456").is_ok());
assert!(validate(b"3.14159").is_ok());
assert!(validate(b"-0.5").is_ok());
assert!(validate(b"1e10").is_ok());
assert!(validate(b"1E10").is_ok());
assert!(validate(b"1e+10").is_ok());
assert!(validate(b"1e-10").is_ok());
assert!(validate(b"2.5e3").is_ok());
assert!(validate(b"-1.23e-45").is_ok());
}
#[test]
fn test_valid_whitespace() {
assert!(validate(b" null ").is_ok());
assert!(validate(b"\t\n\r null \t\n\r ").is_ok());
assert!(validate(b"{ \"key\" : \"value\" }").is_ok());
assert!(validate(b"[ 1 , 2 , 3 ]").is_ok());
}
#[test]
fn test_valid_utf8() {
assert!(validate("\"日本語\"".as_bytes()).is_ok());
assert!(validate("\"émoji: 😀\"".as_bytes()).is_ok());
}
#[test]
fn test_invalid_empty() {
let err = validate(b"").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedEof { .. }
));
}
#[test]
fn test_invalid_whitespace_only() {
let err = validate(b" ").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedEof { .. }
));
}
#[test]
fn test_invalid_trailing_content() {
let err = validate(b"null extra").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::TrailingContent));
}
#[test]
fn test_invalid_trailing_comma_object() {
let err = validate(br#"{"key": "value",}"#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedCharacter { found: '}', .. }
));
}
#[test]
fn test_invalid_trailing_comma_array() {
let err = validate(b"[1, 2, 3,]").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedCharacter { found: ']', .. }
));
}
#[test]
fn test_invalid_leading_zero() {
let err = validate(b"01").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::LeadingZero));
let err = validate(b"007").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::LeadingZero));
}
#[test]
fn test_invalid_leading_plus() {
let err = validate(b"+1").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::LeadingPlus));
}
#[test]
fn test_invalid_number_trailing_dot() {
let err = validate(b"1.").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidNumber { .. }
));
}
#[test]
fn test_invalid_number_leading_dot() {
let err = validate(b".5").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedCharacter { .. }
));
}
#[test]
fn test_invalid_number_empty_exponent() {
let err = validate(b"1e").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidNumber { .. }
));
let err = validate(b"1e+").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidNumber { .. }
));
}
#[test]
fn test_invalid_escape_sequence() {
let err = validate(br#""\q""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidEscape { sequence: 'q' }
));
}
#[test]
fn test_invalid_unicode_escape_short() {
let err = validate(br#""\u00""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnclosedString | ValidationErrorKind::InvalidUnicodeEscape { .. }
));
}
#[test]
fn test_invalid_unicode_escape_bad_hex() {
let err = validate(br#""\u00GG""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidUnicodeEscape { .. }
));
}
#[test]
fn test_invalid_lone_high_surrogate() {
let err = validate(br#""\uD83D""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnpairedSurrogate { .. }
));
}
#[test]
fn test_invalid_lone_low_surrogate() {
let err = validate(br#""\uDE00""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnpairedSurrogate { .. }
));
}
#[test]
fn test_invalid_bad_surrogate_pair() {
let err = validate(br#""\uD83D\u0041""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnpairedSurrogate { .. }
));
}
#[test]
fn test_invalid_control_character() {
let err = validate(b"\"hello\x00world\"").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::ControlCharacter { byte: 0x00 }
));
let err = validate(b"\"hello\x1Fworld\"").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::ControlCharacter { byte: 0x1F }
));
}
#[test]
fn test_invalid_unclosed_string() {
let err = validate(br#""unclosed"#).unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::UnclosedString));
}
#[test]
fn test_invalid_unclosed_object() {
let err = validate(br#"{"key": "value""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedEof { .. }
));
}
#[test]
fn test_invalid_unclosed_array() {
let err = validate(b"[1, 2, 3").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedEof { .. }
));
}
#[test]
fn test_invalid_keyword() {
let err = validate(b"nul").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidKeyword { .. }
));
let err = validate(b"tru").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidKeyword { .. }
));
let err = validate(b"fals").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidKeyword { .. }
));
let err = validate(b"undefined").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnexpectedCharacter { .. }
));
}
#[test]
fn test_invalid_utf8() {
let err = validate(b"\"hello\xFF\xFEworld\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
}
#[test]
fn test_error_position_single_line() {
let err = validate(br#"{"key": "value",}"#).unwrap_err();
assert_eq!(err.position.line, 1);
assert_eq!(err.position.column, 17); }
#[test]
fn test_error_position_multiline() {
let input = b"{\n \"key\": \"value\",\n}";
let err = validate(input).unwrap_err();
assert_eq!(err.position.line, 3);
assert_eq!(err.position.column, 1); }
#[test]
fn test_error_position_crlf() {
let input = b"{\r\n \"key\": \"value\",\r\n}";
let err = validate(input).unwrap_err();
assert_eq!(err.position.line, 3);
}
#[test]
fn test_all_control_characters_rejected() {
for byte in 0x00u8..=0x1F {
let input = format!("\"hello{}world\"", byte as char);
let err = validate(input.as_bytes()).unwrap_err();
assert!(
matches!(err.kind, ValidationErrorKind::ControlCharacter { byte: b } if b == byte),
"Control char 0x{:02X} should be rejected",
byte
);
}
}
#[test]
fn test_negative_zero() {
assert!(validate(b"-0").is_ok());
assert!(validate(b"[-0]").is_ok());
assert!(validate(br#"{"value": -0}"#).is_ok());
}
#[test]
fn test_zero_with_exponent() {
assert!(validate(b"0e0").is_ok());
assert!(validate(b"0E0").is_ok());
assert!(validate(b"0e+0").is_ok());
assert!(validate(b"0e-0").is_ok());
assert!(validate(b"0.0e0").is_ok());
}
#[test]
fn test_empty_string_key() {
assert!(validate(br#"{"": 1}"#).is_ok());
assert!(validate(br#"{"": ""}"#).is_ok());
assert!(validate(br#"{"": null, "a": 1}"#).is_ok());
}
#[test]
fn test_high_surrogate_followed_by_regular_escape() {
let err = validate(br#""\uD83D\n""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnpairedSurrogate { .. }
));
let err = validate(br#""\uD83D\t""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnpairedSurrogate { .. }
));
}
#[test]
fn test_high_surrogate_at_string_end() {
let err = validate(br#""\uD83D""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnpairedSurrogate { .. }
));
}
#[test]
fn test_invalid_utf8_standalone_continuation() {
let err = validate(b"\"hello\x80world\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
let err = validate(b"\"hello\xBFworld\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
}
#[test]
fn test_invalid_utf8_overlong_2byte() {
let err = validate(b"\"hello\xC0\x80world\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
let err = validate(b"\"hello\xC1\xBFworld\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
}
#[test]
fn test_invalid_utf8_f5_and_above() {
for lead in 0xF5u8..=0xFF {
let input = [b'"', b'x', lead, 0x80, 0x80, 0x80, b'"'];
let err = validate(&input).unwrap_err();
assert!(
matches!(err.kind, ValidationErrorKind::InvalidUtf8),
"Lead byte 0x{:02X} should be rejected",
lead
);
}
}
#[test]
fn test_invalid_utf8_truncated() {
let err = validate(b"\"hello\xC2\"").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnclosedString | ValidationErrorKind::InvalidUtf8
));
let err = validate(b"\"hello\xE0\xA0\"").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnclosedString | ValidationErrorKind::InvalidUtf8
));
let err = validate(b"\"hello\xF0\x90\"").unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::UnclosedString | ValidationErrorKind::InvalidUtf8
));
}
#[test]
fn test_invalid_utf8_surrogate_codepoints() {
let err = validate(b"\"hello\xED\xA0\x80world\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
let err = validate(b"\"hello\xED\xBF\xBFworld\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
}
#[test]
fn test_invalid_utf8_above_max_codepoint() {
let err = validate(b"\"hello\xF4\x90\x80\x80world\"").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::InvalidUtf8));
}
#[test]
fn test_all_valid_escapes() {
assert!(validate(br#""\"""#).is_ok()); assert!(validate(br#""\\""#).is_ok()); assert!(validate(br#""\/""#).is_ok()); assert!(validate(br#""\b""#).is_ok()); assert!(validate(br#""\f""#).is_ok()); assert!(validate(br#""\n""#).is_ok()); assert!(validate(br#""\r""#).is_ok()); assert!(validate(br#""\t""#).is_ok()); assert!(validate(br#""\u0000""#).is_ok()); }
#[test]
fn test_invalid_escapes() {
let err = validate(br#""\a""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidEscape { sequence: 'a' }
));
let err = validate(br#""\v""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidEscape { sequence: 'v' }
));
let err = validate(br#""\x00""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidEscape { sequence: 'x' }
));
let err = validate(br#""\0""#).unwrap_err();
assert!(matches!(
err.kind,
ValidationErrorKind::InvalidEscape { sequence: '0' }
));
}
#[test]
fn test_number_edge_cases() {
assert!(validate(b"0").is_ok());
assert!(validate(b"-0").is_ok());
assert!(validate(b"0.0").is_ok());
assert!(validate(b"-0.0").is_ok());
assert!(validate(b"1e1").is_ok());
assert!(validate(b"1E1").is_ok());
assert!(validate(b"1e+1").is_ok());
assert!(validate(b"1e-1").is_ok());
assert!(validate(b"0.1e1").is_ok());
assert!(validate(b"123456789012345678901234567890").is_ok());
let err = validate(b"00").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::LeadingZero));
let err = validate(b"01").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::LeadingZero));
let err = validate(b"-01").unwrap_err();
assert!(matches!(err.kind, ValidationErrorKind::LeadingZero));
}
#[test]
fn test_structural_edge_cases() {
let deep_array = "[".repeat(100) + &"]".repeat(100);
assert!(validate(deep_array.as_bytes()).is_ok());
let deep_object = r#"{"a":"#.repeat(50) + "1" + &"}".repeat(50);
assert!(validate(deep_object.as_bytes()).is_ok());
assert!(validate(b"{}").is_ok());
assert!(validate(b"[]").is_ok());
assert!(validate(b"[[]]").is_ok());
assert!(validate(b"{{}}").is_err()); assert!(validate(br#"{"a":{}}"#).is_ok());
}
#[test]
fn test_whitespace_edge_cases() {
assert!(validate(b" null").is_ok());
assert!(validate(b"\tnull").is_ok());
assert!(validate(b"\nnull").is_ok());
assert!(validate(b"\rnull").is_ok());
assert!(validate(b" \t\n\r null \t\n\r ").is_ok());
assert!(validate(b"{ }").is_ok());
assert!(validate(b"[ ]").is_ok());
assert!(validate(br#"{ "a" : 1 }"#).is_ok());
assert!(validate(b"[ 1 , 2 , 3 ]").is_ok());
}
#[test]
fn test_unicode_escape_edge_cases() {
assert!(validate(br#""\u00ff""#).is_ok());
assert!(validate(br#""\u00FF""#).is_ok());
assert!(validate(br#""\u00Ff""#).is_ok());
assert!(validate(br#""\uD83D\uDE00""#).is_ok());
assert!(validate(br#""\uD83D\uDE00\uD83D\uDE01""#).is_ok());
}
#[test]
fn test_printable_ascii_in_string() {
let mut s = String::from("\"");
for c in 0x20u8..=0x7E {
if c != b'"' && c != b'\\' {
s.push(c as char);
}
}
s.push('"');
assert!(validate(s.as_bytes()).is_ok());
}
}