use linked_hash_map::LinkedHashMap;
use smol_str::SmolStr;
use std::borrow::Borrow;
use std::hash::{Hash, Hasher};
use super::err::ParseError;
use super::loc::Loc;
fn decode_json_escapes(raw: &str, loc: &Loc) -> Result<Option<String>, ParseError> {
if !raw.contains('\\') {
return Ok(None);
}
let mut decoded = String::with_capacity(raw.len());
let mut chars = raw.chars();
while let Some(c) = chars.next() {
if c != '\\' {
decoded.push(c);
continue;
}
match chars.next() {
Some('"') => decoded.push('"'),
Some('\\') => decoded.push('\\'),
Some('/') => decoded.push('/'),
Some('b') => decoded.push('\u{0008}'),
Some('f') => decoded.push('\u{000C}'),
Some('n') => decoded.push('\n'),
Some('r') => decoded.push('\r'),
Some('t') => decoded.push('\t'),
Some('u') => {
let hex: String = chars.by_ref().take(4).collect();
#[expect(
clippy::unwrap_used,
reason = "Tokenizer validates exactly 4 hex digits after \\u"
)]
let code_point = u16::from_str_radix(&hex, 16).unwrap();
if (0xD800..=0xDBFF).contains(&code_point) {
let has_low = chars.next() == Some('\\') && chars.next() == Some('u');
if !has_low {
return Err(ParseError::invalid_unicode_escape(
loc.clone(),
"High surrogate not followed by \\uXXXX low surrogate",
));
}
let hex2: String = chars.by_ref().take(4).collect();
#[expect(
clippy::unwrap_used,
reason = "Tokenizer validates exactly 4 hex digits after \\u"
)]
let low = u16::from_str_radix(&hex2, 16).unwrap();
match char::decode_utf16([code_point, low]).next() {
Some(Ok(ch)) => decoded.push(ch),
_ => {
return Err(ParseError::invalid_unicode_escape(
loc.clone(),
"Invalid UTF-16 surrogate pair",
));
}
}
} else if (0xDC00..=0xDFFF).contains(&code_point) {
return Err(ParseError::invalid_unicode_escape(
loc.clone(),
"Unexpected low surrogate without preceding high surrogate",
));
} else if let Some(ch) = char::from_u32(u32::from(code_point)) {
decoded.push(ch);
} else {
return Err(ParseError::invalid_unicode_escape(
loc.clone(),
"Invalid Unicode code point",
));
}
}
Some(other) => {
decoded.push('\\');
decoded.push(other);
}
None => decoded.push('\\'),
}
}
Ok(Some(decoded))
}
#[derive(Debug, Clone)]
pub(crate) enum ValueKind {
Null,
Bool(bool),
Number,
String(SmolStr),
Array(Vec<LocatedValue>),
Object(LinkedHashMap<LocatedString, LocatedValue>),
}
#[derive(Debug, Clone)]
pub(crate) struct LocatedString {
loc: Loc,
decoded: SmolStr,
}
#[derive(Debug, Clone)]
pub(crate) struct LocatedValue {
kind: ValueKind,
loc: Loc,
}
impl LocatedString {
pub(crate) fn new(loc: Loc) -> Result<Self, ParseError> {
let start = loc.start() + 1;
let end = loc.end() - 1;
#[expect(
clippy::string_slice,
reason = r#"By construction `start` and `end` are both at valid character boundaries and `start` <= `end`.
The JSON parser ensures that `self.loc.start()` immediately preceeds a quotation mark (`"`) and
`self.loc.end()` immediately follows a quotation mark (`"`). In addition, the parser ensures that
these are separate distinct quotation marks (`"`)."#
)]
let raw = &loc.src[start..end];
let decoded = match decode_json_escapes(raw, &loc)? {
Some(s) => SmolStr::from(s),
None => SmolStr::from(raw),
};
Ok(Self { loc, decoded })
}
pub(crate) fn as_loc(&self) -> &Loc {
&self.loc
}
pub(crate) fn into_loc(self) -> Loc {
self.loc
}
pub(crate) fn as_str(&self) -> &str {
&self.decoded
}
#[expect(dead_code, reason = "Added for completeness.")]
#[expect(
clippy::inherent_to_string,
reason = "Not provided as a proxy for display."
)]
pub(crate) fn to_string(&self) -> String {
self.decoded.to_string()
}
pub(crate) fn to_smolstr(&self) -> SmolStr {
self.decoded.clone()
}
}
impl Hash for LocatedString {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl PartialEq for LocatedString {
fn eq(&self, other: &Self) -> bool {
self.as_str() == other.as_str()
}
}
impl Eq for LocatedString {}
impl Borrow<str> for LocatedString {
fn borrow(&self) -> &str {
self.as_str()
}
}
impl LocatedValue {
pub(crate) fn new_null(loc: Loc) -> Self {
Self {
kind: ValueKind::Null,
loc,
}
}
pub(crate) fn new_bool(b: bool, loc: Loc) -> Self {
Self {
kind: ValueKind::Bool(b),
loc,
}
}
pub(crate) fn new_number(loc: Loc) -> Self {
Self {
kind: ValueKind::Number,
loc,
}
}
pub(crate) fn new_string(loc: Loc) -> Result<Self, ParseError> {
let start = loc.start() + 1;
let end = loc.end() - 1;
#[expect(
clippy::string_slice,
reason = r#"By construction `start` and `end` are both at valid character boundaries and `start` <= `end`.
The JSON parser ensures that `self.loc.start()` immediately preceeds a quotation mark (`"`) and
`self.loc.end()` immediately follows a quotation mark (`"`). In addition, the parser ensures that
these are separate distinct quotation marks (`"`)."#
)]
let raw = &loc.src[start..end];
let decoded = match decode_json_escapes(raw, &loc)? {
Some(s) => SmolStr::from(s),
None => SmolStr::from(raw),
};
Ok(Self {
kind: ValueKind::String(decoded),
loc,
})
}
pub(crate) fn new_array(items: Vec<LocatedValue>, loc: Loc) -> Self {
Self {
kind: ValueKind::Array(items),
loc,
}
}
pub(crate) fn new_object(items: LinkedHashMap<LocatedString, LocatedValue>, loc: Loc) -> Self {
Self {
kind: ValueKind::Object(items),
loc,
}
}
#[expect(dead_code, reason = "Added for completeness.")]
pub(crate) fn as_kind(&self) -> &ValueKind {
&self.kind
}
#[expect(dead_code, reason = "Added for completeness.")]
pub(crate) fn into_kind(self) -> ValueKind {
self.kind
}
pub(crate) fn as_loc(&self) -> &Loc {
&self.loc
}
#[expect(dead_code, reason = "Added for completeness.")]
pub(crate) fn into_loc(self) -> Loc {
self.loc
}
pub(crate) fn is_null(&self) -> bool {
matches!(self.kind, ValueKind::Null)
}
pub(crate) fn is_bool(&self) -> bool {
matches!(self.kind, ValueKind::Bool(_))
}
pub(crate) fn get_bool(&self) -> Option<bool> {
match self.kind {
ValueKind::Bool(b) => Some(b),
_ => None,
}
}
pub(crate) fn is_number(&self) -> bool {
matches!(self.kind, ValueKind::Number)
}
pub(crate) fn get_numeric_str(&self) -> Option<&str> {
match self.kind {
ValueKind::Number => {
let start = self.loc.start();
let end = self.loc.end();
#[expect(
clippy::string_slice,
reason = r#"By construction the indexes are at character boundaries:
the JSON parser ensures that start is the position immediately
preceeding an ascii digit [0-9] and end is a position that
immediately follows an ascii digit [0-9]."#
)]
Some(&self.loc.src[start..end])
}
_ => None,
}
}
pub(crate) fn is_string(&self) -> bool {
matches!(self.kind, ValueKind::String(_))
}
pub(crate) fn get_str(&self) -> Option<&str> {
match &self.kind {
ValueKind::String(s) => Some(s.as_str()),
_ => None,
}
}
pub(crate) fn get_string(&self) -> Option<String> {
self.get_str().map(|s| s.to_string())
}
pub(crate) fn get_smolstr(&self) -> Option<SmolStr> {
self.get_str().map(|s| s.into())
}
pub(crate) fn is_array(&self) -> bool {
matches!(self.kind, ValueKind::Array(_))
}
pub(crate) fn get_array(&self) -> Option<&[LocatedValue]> {
match &self.kind {
ValueKind::Array(items) => Some(items),
_ => None,
}
}
pub(crate) fn is_object(&self) -> bool {
matches!(self.kind, ValueKind::Object(_))
}
pub(crate) fn get_object(&self) -> Option<&LinkedHashMap<LocatedString, LocatedValue>> {
match &self.kind {
ValueKind::Object(items) => Some(items),
_ => None,
}
}
pub(crate) fn get(&self, key: impl AsRef<str>) -> Option<&LocatedValue> {
self.get_object().and_then(|obj| obj.get(key.as_ref()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use cool_asserts::assert_matches;
fn new_loc(str: &str) -> Loc {
Loc::new((0, str.len()), std::sync::Arc::from(str))
}
#[test]
fn test_isbool() {
assert!(LocatedValue::new_bool(true, new_loc("true")).is_bool());
assert!(LocatedValue::new_bool(false, new_loc("false")).is_bool());
assert!(!LocatedValue::new_null(new_loc("null")).is_bool());
assert!(!LocatedValue::new_number(new_loc("0.1")).is_bool());
assert!(!LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.is_bool());
assert!(!LocatedValue::new_array(Vec::new(), new_loc("[]")).is_bool());
assert!(!LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).is_bool());
}
#[test]
fn test_getbool() {
assert_matches!(
LocatedValue::new_bool(true, new_loc("true")).get_bool(),
Some(true)
);
assert_matches!(
LocatedValue::new_bool(false, new_loc("false")).get_bool(),
Some(false)
);
assert_matches!(LocatedValue::new_null(new_loc("null")).get_bool(), None);
assert_matches!(LocatedValue::new_number(new_loc("0.1")).get_bool(), None);
assert_matches!(
LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.get_bool(),
None
);
assert_matches!(
LocatedValue::new_array(Vec::new(), new_loc("[]")).get_bool(),
None
);
assert_matches!(
LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).get_bool(),
None
);
}
#[test]
fn test_isnull() {
assert!(!LocatedValue::new_bool(true, new_loc("true")).is_null());
assert!(!LocatedValue::new_bool(false, new_loc("false")).is_null());
assert!(LocatedValue::new_null(new_loc("null")).is_null());
assert!(!LocatedValue::new_number(new_loc("0.1")).is_null());
assert!(!LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.is_null());
assert!(!LocatedValue::new_array(Vec::new(), new_loc("[]")).is_null());
assert!(!LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).is_null());
}
#[test]
fn test_isnumber() {
assert!(!LocatedValue::new_bool(true, new_loc("true")).is_number());
assert!(!LocatedValue::new_bool(false, new_loc("false")).is_number());
assert!(!LocatedValue::new_null(new_loc("null")).is_number());
assert!(LocatedValue::new_number(new_loc("0.1")).is_number());
assert!(!LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.is_number());
assert!(!LocatedValue::new_array(Vec::new(), new_loc("[]")).is_number());
assert!(!LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).is_number());
}
#[test]
fn test_get_numeric_str() {
assert_matches!(
LocatedValue::new_bool(true, new_loc("true")).get_numeric_str(),
None
);
assert_matches!(
LocatedValue::new_bool(false, new_loc("false")).get_numeric_str(),
None
);
assert_matches!(
LocatedValue::new_null(new_loc("null")).get_numeric_str(),
None
);
assert_matches!(
LocatedValue::new_number(new_loc("0.1")).get_numeric_str(),
Some(..)
);
assert_matches!(
LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.get_numeric_str(),
None
);
assert_matches!(
LocatedValue::new_array(Vec::new(), new_loc("[]")).get_numeric_str(),
None
);
assert_matches!(
LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).get_numeric_str(),
None
);
}
#[test]
fn test_isstring() {
assert!(!LocatedValue::new_bool(true, new_loc("true")).is_string());
assert!(!LocatedValue::new_bool(false, new_loc("false")).is_string());
assert!(!LocatedValue::new_null(new_loc("null")).is_string());
assert!(!LocatedValue::new_number(new_loc("0.1")).is_string());
assert!(LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.is_string());
assert!(!LocatedValue::new_array(Vec::new(), new_loc("[]")).is_string());
assert!(!LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).is_string());
}
#[test]
fn test_get_str() {
assert_matches!(
LocatedValue::new_bool(true, new_loc("true")).get_str(),
None
);
assert_matches!(
LocatedValue::new_bool(false, new_loc("false")).get_str(),
None
);
assert_matches!(LocatedValue::new_null(new_loc("null")).get_str(), None);
assert_matches!(LocatedValue::new_number(new_loc("0.1")).get_str(), None);
assert_matches!(
LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.get_str(),
Some(..)
);
assert_matches!(
LocatedValue::new_array(Vec::new(), new_loc("[]")).get_str(),
None
);
assert_matches!(
LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).get_str(),
None
);
}
#[test]
fn test_get_string() {
assert_matches!(
LocatedValue::new_bool(true, new_loc("true")).get_string(),
None
);
assert_matches!(
LocatedValue::new_bool(false, new_loc("false")).get_string(),
None
);
assert_matches!(LocatedValue::new_null(new_loc("null")).get_string(), None);
assert_matches!(LocatedValue::new_number(new_loc("0.1")).get_string(), None);
assert_matches!(
LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.get_string(),
Some(..)
);
assert_matches!(
LocatedValue::new_array(Vec::new(), new_loc("[]")).get_string(),
None
);
assert_matches!(
LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).get_string(),
None
);
}
#[test]
fn test_get_smolstr() {
assert_matches!(
LocatedValue::new_bool(true, new_loc("true")).get_smolstr(),
None
);
assert_matches!(
LocatedValue::new_bool(false, new_loc("false")).get_smolstr(),
None
);
assert_matches!(LocatedValue::new_null(new_loc("null")).get_smolstr(), None);
assert_matches!(LocatedValue::new_number(new_loc("0.1")).get_smolstr(), None);
assert_matches!(
LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.get_smolstr(),
Some(..)
);
assert_matches!(
LocatedValue::new_array(Vec::new(), new_loc("[]")).get_smolstr(),
None
);
assert_matches!(
LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).get_smolstr(),
None
);
}
#[test]
fn test_isarray() {
assert!(!LocatedValue::new_bool(true, new_loc("true")).is_array());
assert!(!LocatedValue::new_bool(false, new_loc("false")).is_array());
assert!(!LocatedValue::new_null(new_loc("null")).is_array());
assert!(!LocatedValue::new_number(new_loc("0.1")).is_array());
assert!(!LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.is_array());
assert!(LocatedValue::new_array(Vec::new(), new_loc("[]")).is_array());
assert!(!LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).is_array());
}
#[test]
fn test_get_array() {
assert_matches!(
LocatedValue::new_bool(true, new_loc("true")).get_array(),
None
);
assert_matches!(
LocatedValue::new_bool(false, new_loc("false")).get_array(),
None
);
assert_matches!(LocatedValue::new_null(new_loc("null")).get_array(), None);
assert_matches!(LocatedValue::new_number(new_loc("0.1")).get_array(), None);
assert_matches!(
LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.get_array(),
None
);
assert_matches!(
LocatedValue::new_array(Vec::new(), new_loc("[]")).get_array(),
Some(..)
);
assert_matches!(
LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).get_array(),
None
);
}
#[test]
fn test_isobject() {
assert!(!LocatedValue::new_bool(true, new_loc("true")).is_object());
assert!(!LocatedValue::new_bool(false, new_loc("false")).is_object());
assert!(!LocatedValue::new_null(new_loc("null")).is_object());
assert!(!LocatedValue::new_number(new_loc("0.1")).is_object());
assert!(!LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.is_object());
assert!(!LocatedValue::new_array(Vec::new(), new_loc("[]")).is_object());
assert!(LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).is_object());
}
#[test]
fn test_get_object() {
assert_matches!(
LocatedValue::new_bool(true, new_loc("true")).get_object(),
None
);
assert_matches!(
LocatedValue::new_bool(false, new_loc("false")).get_object(),
None
);
assert_matches!(LocatedValue::new_null(new_loc("null")).get_object(), None);
assert_matches!(LocatedValue::new_number(new_loc("0.1")).get_object(), None);
assert_matches!(
LocatedValue::new_string(new_loc("my cool str"))
.unwrap()
.get_object(),
None
);
assert_matches!(
LocatedValue::new_array(Vec::new(), new_loc("[]")).get_object(),
None
);
assert_matches!(
LocatedValue::new_object(LinkedHashMap::new(), new_loc("{}")).get_object(),
Some(..)
);
}
}