pub mod decode;
mod parser;
pub(crate) mod walk;
pub mod write;
#[cfg(test)]
pub(crate) mod test;
#[cfg(test)]
mod test_line_col;
#[cfg(test)]
mod test_path;
#[cfg(test)]
mod test_path_matches_glob;
#[cfg(test)]
mod test_source_json;
use std::{
borrow::{Borrow, Cow},
collections::{btree_set, BTreeMap, BTreeSet},
fmt::{self, Write as _},
rc::Rc,
};
use crate::warning::{Caveat, IntoCaveat, Set, Verdict};
use crate::{
string,
warning::{self, CaveatDeferred},
};
pub(crate) use parser::parse;
pub use parser::{Error, ErrorKind as ParseErrorKind};
pub fn parse_object(json: &str) -> Result<Document<'_>, ParseError> {
let json = string::ReasonableLen::new(json).map_err(|_e| ParseError::SizeExceedsMax)?;
let doc = parse(json).map_err(ParseError::Json)?;
if !doc.root().is_object() {
return Err(ParseError::ShouldBeAnObject);
}
Ok(doc)
}
#[derive(Debug)]
pub enum ParseError {
Json(Error),
ShouldBeAnObject,
SizeExceedsMax,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Json(error) => write!(f, "{error}"),
Self::ShouldBeAnObject => f.write_str("The CDR should be an object."),
Self::SizeExceedsMax => write!(
f,
"The input `&str` exceeds the reasonable maximum `{} MB`.",
string::ReasonableLen::FACTOR
),
}
}
}
impl std::error::Error for ParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match &self {
ParseError::Json(err) => Some(err),
ParseError::ShouldBeAnObject | ParseError::SizeExceedsMax => None,
}
}
}
#[doc(hidden)]
#[macro_export]
macro_rules! required_field_or_bail {
($elem:expr, $fields:expr, $field_name:literal, $warnings:expr) => {
match $fields.get($field_name) {
Some(field_elem) => field_elem,
None => {
return $warnings.bail(
$elem,
Warning::FieldRequired {
field_name: $field_name.into(),
},
);
}
}
};
}
#[doc(hidden)]
#[macro_export]
macro_rules! required_field {
($elem:expr, $fields:expr, $field_name:literal, $warnings:expr) => {{
let field = $fields.get($field_name);
if field.is_none() {
$warnings.insert(
$elem,
Warning::FieldRequired {
field_name: $field_name.into(),
},
);
}
field
}};
}
#[doc(hidden)]
#[macro_export]
macro_rules! expect_object_or_bail {
($elem:expr, $warnings:expr) => {
match $elem.as_object_fields() {
Some(fields) => fields,
None => {
return $warnings.bail(
$elem,
Warning::FieldInvalidType {
expected_type: json::ValueKind::Object,
},
);
}
}
};
}
#[doc(hidden)]
#[macro_export]
macro_rules! expect_array_or_bail {
($elem:expr, $warnings:expr) => {
match $elem.as_array() {
Some(fields) => fields,
None => {
return $warnings.bail(
$elem,
Warning::FieldInvalidType {
expected_type: json::ValueKind::Array,
},
);
}
}
};
}
#[doc(hidden)]
#[macro_export]
macro_rules! parse_required_or_bail {
($elem:expr, $fields:expr, $elem_name:literal, $target:ty, $warnings:expr) => {{
#[expect(clippy::allow_attributes, reason = "The allow attribute is needed here as the callers scope determines if the imports are used or not")]
#[allow(
unused,
reason = "The macro uses the import but maybe the outside scope does too."
)]
use $crate::json::FromJson;
use $crate::warning::GatherWarnings as _;
let elem = $crate::required_field_or_bail!($elem, $fields, $elem_name, $warnings);
<$target as FromJson>::from_json(elem)?.gather_warnings_into(&mut $warnings)
}};
}
#[doc(hidden)]
#[macro_export]
macro_rules! parse_required {
($elem:expr, $fields:expr, $elem_name:literal, $target:ty, $warnings:expr) => {{
#[expect(
clippy::allow_attributes,
reason = "The allow attribute is needed here as the callers scope determines if the imports are used or not"
)]
#[allow(
unused,
reason = "The macro uses the import but maybe the outside scope does too."
)]
use $crate::json::FromJson;
use $crate::warning::GatherWarnings as _;
let elem = $crate::required_field!($elem, $fields, $elem_name, $warnings);
if let Some(elem) = elem {
let value =
<$target as FromJson>::from_json(elem)?.gather_warnings_into(&mut $warnings);
Some(value)
} else {
None
}
}};
}
#[doc(hidden)]
#[macro_export]
macro_rules! parse_nullable_or_bail {
($fields:expr, $elem_name:literal, $target:ty, $warnings:expr) => {{
#[expect(
clippy::allow_attributes,
reason = "The allow attribute is needed here as the callers scope determines if the imports are used or not"
)]
#[allow(
unused,
reason = "The macro uses the import but maybe the outside scope does too."
)]
use $crate::json::FromJson as _;
use $crate::warning::GatherWarnings as _;
match $fields.get($elem_name) {
Some(elem) => Option::<$target>::from_json(elem)?.gather_warnings_into(&mut $warnings),
None => None,
}
}};
}
#[derive(Clone, Debug)]
pub struct Document<'buf> {
inner: Rc<DocumentInner<'buf>>,
root: Element<'buf>,
}
impl<'buf> Document<'buf> {
pub fn source(&self) -> &'buf str {
self.inner.source
}
pub fn root(&self) -> &Element<'buf> {
&self.root
}
}
#[derive(Clone, Debug)]
pub struct Element<'buf> {
doc: Rc<DocumentInner<'buf>>,
id: ElemId,
span: Span,
full_span_end: u32,
value: Value<'buf>,
}
impl PartialEq for Element<'_> {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
&& self.span == other.span
&& self.full_span_end == other.full_span_end
&& self.value == other.value
}
}
impl Eq for Element<'_> {}
impl<'buf> Element<'buf> {
pub fn id(&self) -> ElemId {
self.id
}
pub fn span(&self) -> Span {
self.span
}
pub fn full_span(&self) -> Span {
Span {
start: self.span.start,
end: self.full_span_end,
}
}
pub fn value(&self) -> &Value<'buf> {
&self.value
}
pub fn path(&self) -> Path {
self.doc.paths.path_of(self)
}
#[expect(
clippy::string_slice,
reason = "spans are produced by the parser from the same source, so slices are always valid"
)]
#[expect(
clippy::as_conversions,
reason = "The index is guaranteed within bounds by the parser"
)]
pub fn source_json_value(&self) -> &'buf str {
&self.doc.source[self.span.start as usize..self.span.end as usize]
}
pub fn source(&self) -> &'buf str {
self.doc.source
}
#[expect(
clippy::string_slice,
reason = "spans are produced by the parser from the same source, so slices are always valid"
)]
#[expect(
clippy::as_conversions,
reason = "The index is guaranteed within bounds by the parser"
)]
pub fn location(&self) -> Location {
let source = self.doc.source;
let lead_in = &source[..self.span.start as usize];
line_col(lead_in)
}
pub fn as_value(&self) -> &Value<'buf> {
&self.value
}
pub fn to_raw_str(&self) -> Option<RawStr<'buf>> {
self.value.to_raw_str()
}
pub fn as_object_fields(&self) -> Option<&[Field<'buf>]> {
self.value.as_object_fields()
}
pub fn as_array(&self) -> Option<&[Element<'buf>]> {
self.value.as_array()
}
pub fn as_number_str(&self) -> Option<&str> {
self.value.as_number()
}
pub fn is_null(&self) -> bool {
self.value.is_null()
}
pub fn is_object(&self) -> bool {
self.value.is_object()
}
pub fn is_array(&self) -> bool {
self.value.is_array()
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Value<'buf> {
Null,
True,
False,
String(RawStr<'buf>),
Number(&'buf str),
Array(Vec<Element<'buf>>),
Object(Vec<Field<'buf>>),
}
impl fmt::Display for Value<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Null => write!(f, "null"),
Self::True => write!(f, "true"),
Self::False => write!(f, "false"),
Self::String(s) => write!(f, "{}", s.as_unescaped_str()),
Self::Number(s) => write!(f, "{s}"),
Self::Array(..) => f.write_str("[...]"),
Self::Object(..) => f.write_str("{...}"),
}
}
}
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
pub struct Span {
pub start: u32,
pub end: u32,
}
impl Span {
fn new(start: u32, end: u32) -> Self {
Self { start, end }
}
}
#[derive(Clone, Debug)]
pub struct Location {
pub line: u32,
pub col: u32,
}
impl From<(u32, u32)> for Location {
fn from(value: (u32, u32)) -> Self {
Self {
line: value.0,
col: value.1,
}
}
}
impl From<Location> for (u32, u32) {
fn from(value: Location) -> Self {
(value.line, value.col)
}
}
impl PartialEq<(u32, u32)> for Location {
fn eq(&self, other: &(u32, u32)) -> bool {
self.line == other.0 && self.col == other.1
}
}
impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}", self.line, self.col)
}
}
pub fn line_col(s: &str) -> Location {
let mut chars = s.chars().rev();
let mut line = 0_u32;
let mut col = 0_u32;
for c in chars.by_ref() {
if c == '\n' {
let Some(n) = line.checked_add(1) else {
break;
};
line = n;
break;
}
let Some(n) = col.checked_add(1) else {
break;
};
col = n;
}
for c in chars {
if c == '\n' {
let Some(n) = line.checked_add(1) else {
break;
};
line = n;
}
}
Location { line, col }
}
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ord, PartialOrd)]
pub struct ElemId(usize);
#[derive(Debug)]
enum PathEntry<'buf> {
Root,
Field {
parent: ElemId,
key: RawStr<'buf>,
},
Item {
parent: ElemId,
index: u32,
},
}
#[derive(Debug)]
struct DocumentInner<'buf> {
source: &'buf str,
paths: PathTable<'buf>,
}
#[derive(Debug, Default)]
struct PathTable<'buf> {
entries: Vec<PathEntry<'buf>>,
}
impl<'buf> PathTable<'buf> {
fn push(&mut self, entry: PathEntry<'buf>) {
self.entries.push(entry);
}
fn path_of(&self, element: &Element<'buf>) -> Path {
let mut entries: Vec<&PathEntry<'buf>> = Vec::new();
let mut elem_id = element.id;
loop {
let entry = self
.entries
.get(elem_id.0)
.expect("ElemId always refers to a valid PathEntry");
match entry {
PathEntry::Root => {
entries.push(entry);
break;
}
PathEntry::Field { parent, key: _ } | PathEntry::Item { parent, index: _ } => {
entries.push(entry);
elem_id = *parent;
}
}
}
entries.reverse();
let mut out = String::with_capacity(30);
for entry in entries {
let res = match entry {
PathEntry::Root => write!(out, "$"),
PathEntry::Field { parent: _, key } => {
write!(out, ".{}", key.as_unescaped_str())
}
PathEntry::Item { parent: _, index } => {
write!(out, "[{index}]")
}
};
res.expect("Writing to a String can only fail if the system runs out of heap memory");
}
Path(out)
}
}
#[derive(Clone, PartialOrd, Ord, PartialEq, Eq)]
pub struct Path(String);
impl Path {
pub fn into_string(self) -> String {
self.0
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn components(&self) -> Components<'_> {
Components::over(&self.0)
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Component<'a> {
Member(&'a str),
Index(&'a str),
}
#[derive(Clone, Debug)]
pub struct Components<'a> {
rest: &'a str,
}
impl<'a> Components<'a> {
pub(crate) fn over(path: &'a str) -> Self {
Self {
rest: path.strip_prefix('$').unwrap_or(path),
}
}
}
impl<'a> Iterator for Components<'a> {
type Item = Component<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(after) = self.rest.strip_prefix('.') {
let end = after.find(['.', '[']).unwrap_or(after.len());
let (name, tail) = after.split_at(end);
self.rest = tail;
Some(Component::Member(name))
} else if let Some(after) = self.rest.strip_prefix('[') {
let end = after.find(']').unwrap_or(after.len());
let (index, tail) = after.split_at(end);
self.rest = tail.strip_prefix(']').unwrap_or(tail);
Some(Component::Index(index))
} else {
None
}
}
}
impl PartialEq<str> for Path {
fn eq(&self, other: &str) -> bool {
self.0 == other
}
}
impl PartialEq<&str> for Path {
fn eq(&self, other: &&str) -> bool {
self.0 == *other
}
}
impl fmt::Debug for Path {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl fmt::Display for Path {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}
#[derive(Debug)]
pub struct PathSet<'set>(BTreeSet<&'set Path>);
impl<'set> PathSet<'set> {
pub(crate) fn new(paths: BTreeSet<&'set Path>) -> Self {
Self(paths)
}
pub fn to_strings(&self) -> Vec<String> {
self.0.iter().map(ToString::to_string).collect()
}
pub fn into_strings(self) -> Vec<String> {
self.0.into_iter().map(ToString::to_string).collect()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn iter(&self) -> btree_set::Iter<'_, &Path> {
self.0.iter()
}
}
impl<'set> IntoIterator for PathSet<'set> {
type Item = &'set Path;
type IntoIter = btree_set::IntoIter<&'set Path>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'a, 'set> IntoIterator for &'a PathSet<'set> {
type Item = &'a &'set Path;
type IntoIter = btree_set::Iter<'a, &'set Path>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub enum ValueKind {
Null,
Bool,
Number,
String,
Array,
Object,
}
impl fmt::Display for ValueKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ValueKind::Null => write!(f, "null"),
ValueKind::Bool => write!(f, "bool"),
ValueKind::Number => write!(f, "number"),
ValueKind::String => write!(f, "string"),
ValueKind::Array => write!(f, "array"),
ValueKind::Object => write!(f, "object"),
}
}
}
impl<'buf> Value<'buf> {
pub fn kind(&self) -> ValueKind {
match self {
Value::Null => ValueKind::Null,
Value::True | Value::False => ValueKind::Bool,
Value::String(_) => ValueKind::String,
Value::Number(_) => ValueKind::Number,
Value::Array(_) => ValueKind::Array,
Value::Object(_) => ValueKind::Object,
}
}
pub fn is_null(&self) -> bool {
matches!(self, Value::Null)
}
pub fn is_array(&self) -> bool {
matches!(self, Value::Array(..))
}
pub fn is_object(&self) -> bool {
matches!(self, Value::Object(..))
}
pub fn is_scalar(&self) -> bool {
matches!(
self,
Value::Null | Value::True | Value::False | Value::String(_) | Value::Number(_)
)
}
pub fn as_array(&self) -> Option<&[Element<'buf>]> {
if let Value::Array(elems) = self {
Some(elems)
} else {
None
}
}
pub fn as_number(&self) -> Option<&str> {
if let Value::Number(s) = self {
Some(s)
} else {
None
}
}
pub fn to_raw_str(&self) -> Option<RawStr<'buf>> {
if let Value::String(s) = self {
Some(*s)
} else {
None
}
}
pub fn as_object_fields(&self) -> Option<&[Field<'buf>]> {
if let Value::Object(fields) = self {
Some(fields)
} else {
None
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Field<'buf> {
key_span: Span,
element: Element<'buf>,
}
impl<'buf> Field<'buf> {
pub fn into_element(self) -> Element<'buf> {
self.element
}
pub fn element(&self) -> &Element<'buf> {
&self.element
}
pub fn key_span(&self) -> Span {
self.key_span
}
pub fn full_span(&self) -> Span {
Span {
start: self.key_span.start,
end: self.element.full_span_end,
}
}
#[expect(
clippy::arithmetic_side_effects,
reason = "key_span always spans a quoted string, so +1/-1 to strip the surrounding quote bytes is safe"
)]
#[expect(
clippy::string_slice,
reason = "key_span is produced by the parser from the same source; +1/-1 strips the ASCII quote bytes"
)]
#[expect(
clippy::as_conversions,
reason = "The index is guaranteed within bounds by the parser"
)]
pub fn key(&self) -> RawStr<'buf> {
let src = self.element.source();
let s = &src[self.key_span.start as usize + 1..self.key_span.end as usize - 1];
RawStr::from_str(s)
}
#[expect(
clippy::string_slice,
reason = "spans are produced by the parser from the same source, so slices are always valid"
)]
#[expect(
clippy::as_conversions,
reason = "The index is guaranteed within bounds by the parser"
)]
pub fn source_json(&self) -> &'buf str {
let src = self.element.source();
&src[self.key_span.start as usize..self.element.span.end as usize]
}
}
pub type RawMap<'buf> = BTreeMap<RawStr<'buf>, Element<'buf>>;
pub type RawRefMap<'a, 'buf> = BTreeMap<RawStr<'buf>, &'a Element<'buf>>;
#[expect(dead_code, reason = "pending use in `tariff::lint`")]
pub(crate) trait FieldsIntoExt<'buf> {
fn into_map(self) -> RawMap<'buf>;
}
pub(crate) trait FieldsAsExt<'buf> {
fn as_raw_map(&self) -> RawRefMap<'_, 'buf>;
fn find_field(&self, key: &str) -> Option<&Field<'buf>>;
}
impl<'buf> FieldsIntoExt<'buf> for Vec<Field<'buf>> {
fn into_map(self) -> RawMap<'buf> {
self.into_iter()
.map(|field| (field.key(), field.into_element()))
.collect()
}
}
impl<'buf> FieldsAsExt<'buf> for Vec<Field<'buf>> {
fn as_raw_map(&self) -> RawRefMap<'_, 'buf> {
self.iter()
.map(|field| (field.key(), field.element()))
.collect()
}
fn find_field(&self, key: &str) -> Option<&Field<'buf>> {
self.iter()
.find(|field| field.key().eq_escape_aware(key).ok().unwrap_or(false))
}
}
impl<'buf> FieldsAsExt<'buf> for [Field<'buf>] {
fn as_raw_map(&self) -> RawRefMap<'_, 'buf> {
self.iter()
.map(|field| (field.key(), field.element()))
.collect()
}
fn find_field(&self, key: &str) -> Option<&Field<'buf>> {
self.iter()
.find(|field| field.key().eq_escape_aware(key).ok().unwrap_or(false))
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct RawStr<'buf>(&'buf str);
impl Borrow<str> for RawStr<'_> {
fn borrow(&self) -> &str {
self.0
}
}
impl Borrow<str> for &RawStr<'_> {
fn borrow(&self) -> &str {
self.0
}
}
impl<'buf> RawStr<'buf> {
fn from_str(source: &'buf str) -> Self {
Self(source)
}
pub fn eq_escape_aware(&self, other: &str) -> Result<bool, decode::Warning> {
decode::eq(self.0, other)
}
pub fn eq_any_escape_aware(&self, other: &[&str]) -> bool {
other
.iter()
.any(|s| decode::eq(self.0, s).ok().unwrap_or(false))
}
pub fn eq_any_escape_aware_ignore_ascii_case(&self, other: &[&str]) -> bool {
other.iter().any(|s| {
decode::eq_ignore_ascii_case(self.0, s)
.ok()
.unwrap_or(false)
})
}
pub fn as_unescaped_str(&self) -> &'buf str {
self.0
}
pub fn decode_escapes(&self) -> CaveatDeferred<Cow<'_, str>, decode::Warning> {
decode::from_raw(self.0)
}
pub fn has_escapes(&self, elem: &Element<'buf>) -> Caveat<PendingStr<'buf>, decode::Warning> {
decode::analyze(self.0, elem)
}
}
pub enum PendingStr<'buf> {
NoEscapes(&'buf str),
HasEscapes(EscapeStr<'buf>),
}
pub struct EscapeStr<'buf>(&'buf str);
impl<'buf> EscapeStr<'buf> {
pub fn decode_escapes(&self) -> CaveatDeferred<Cow<'buf, str>, decode::Warning> {
decode::from_raw(self.0)
}
pub fn into_raw(self) -> &'buf str {
self.0
}
}
pub(crate) trait FromJson<'buf>: Sized {
type Warning: warning::Warning;
fn from_json(elem: &Element<'buf>) -> Verdict<Self, Self::Warning>;
}
impl<'buf, T> FromJson<'buf> for Option<T>
where
T: FromJson<'buf> + IntoCaveat,
{
type Warning = T::Warning;
fn from_json(elem: &Element<'buf>) -> Verdict<Self, Self::Warning> {
let value = elem.as_value();
if value.is_null() {
Ok(None.into_caveat(Set::new()))
} else {
let v = T::from_json(elem)?;
Ok(v.map(Some))
}
}
}