pub mod json_ref;
use crate::sax::Sax;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum DomEntryKind {
Null = 0,
Bool = 1,
Number = 2,
String = 3,
EscapedString = 4,
Key = 5,
EscapedKey = 6,
StartObject = 7,
EndObject = 8,
StartArray = 9,
EndArray = 10,
}
const KIND_SHIFT: u64 = 60;
const PAYLOAD_MASK: u64 = u64::MAX >> 4;
#[repr(C)]
pub struct DomEntry<'a> {
pub(crate) tag_payload: u64,
pub(crate) ptr: *const u8,
_marker: std::marker::PhantomData<&'a str>,
}
unsafe impl<'a> Send for DomEntry<'a> {}
unsafe impl<'a> Sync for DomEntry<'a> {}
impl<'a> Drop for DomEntry<'a> {
fn drop(&mut self) {
let kind = self.kind();
if kind == DomEntryKind::EscapedString || kind == DomEntryKind::EscapedKey {
if !self.ptr.is_null() {
let len = self.payload() as usize;
unsafe {
let slice = std::slice::from_raw_parts_mut(self.ptr as *mut u8, len);
drop(Box::from_raw(slice as *mut [u8] as *mut str));
}
}
}
}
}
impl<'a> Clone for DomEntry<'a> {
fn clone(&self) -> Self {
let kind = self.kind();
if kind == DomEntryKind::EscapedString || kind == DomEntryKind::EscapedKey {
let s = self.as_escaped_str_unchecked();
let boxed: Box<str> = s.into();
let len = boxed.len() as u64;
let ptr = Box::into_raw(boxed) as *mut u8 as *const u8;
Self {
tag_payload: ((kind as u64) << KIND_SHIFT) | (len & PAYLOAD_MASK),
ptr,
_marker: std::marker::PhantomData,
}
} else {
Self {
tag_payload: self.tag_payload,
ptr: self.ptr,
_marker: std::marker::PhantomData,
}
}
}
}
impl<'a> std::fmt::Debug for DomEntry<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.kind() {
DomEntryKind::Null => write!(f, "Null"),
DomEntryKind::Bool => write!(f, "Bool({})", self.payload() != 0),
DomEntryKind::Number => write!(f, "Number({:?})", self.as_str_unchecked()),
DomEntryKind::String => write!(f, "String({:?})", self.as_str_unchecked()),
DomEntryKind::EscapedString => {
write!(f, "EscapedString({:?})", self.as_escaped_str_unchecked())
}
DomEntryKind::Key => write!(f, "Key({:?})", self.as_str_unchecked()),
DomEntryKind::EscapedKey => {
write!(f, "EscapedKey({:?})", self.as_escaped_str_unchecked())
}
DomEntryKind::StartObject => write!(f, "StartObject({})", self.payload()),
DomEntryKind::EndObject => write!(f, "EndObject"),
DomEntryKind::StartArray => write!(f, "StartArray({})", self.payload()),
DomEntryKind::EndArray => write!(f, "EndArray"),
}
}
}
impl<'a> PartialEq for DomEntry<'a> {
fn eq(&self, other: &Self) -> bool {
if self.kind() != other.kind() {
return false;
}
match self.kind() {
DomEntryKind::Null | DomEntryKind::EndObject | DomEntryKind::EndArray => true,
DomEntryKind::Bool => self.payload() == other.payload(),
DomEntryKind::StartObject | DomEntryKind::StartArray => {
self.payload() == other.payload()
}
DomEntryKind::Number | DomEntryKind::String | DomEntryKind::Key => {
self.as_str_unchecked() == other.as_str_unchecked()
}
DomEntryKind::EscapedString | DomEntryKind::EscapedKey => {
self.as_escaped_str_unchecked() == other.as_escaped_str_unchecked()
}
}
}
}
impl<'a> DomEntry<'a> {
#[inline]
fn make(kind: DomEntryKind, payload: u64, ptr: *const u8) -> Self {
Self {
tag_payload: ((kind as u64) << KIND_SHIFT) | (payload & PAYLOAD_MASK),
ptr,
_marker: std::marker::PhantomData,
}
}
#[inline]
pub fn kind(&self) -> DomEntryKind {
unsafe { std::mem::transmute((self.tag_payload >> KIND_SHIFT) as u8) }
}
#[inline]
pub(crate) fn payload(&self) -> u64 {
self.tag_payload & PAYLOAD_MASK
}
#[inline]
fn as_str_unchecked(&self) -> &'a str {
let len = self.payload() as usize;
unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr, len)) }
}
#[inline]
fn as_escaped_str_unchecked(&self) -> &str {
let len = self.payload() as usize;
unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr, len)) }
}
#[inline]
pub fn null_entry() -> Self {
Self::make(DomEntryKind::Null, 0, std::ptr::null())
}
#[inline]
pub fn bool_entry(v: bool) -> Self {
Self::make(DomEntryKind::Bool, v as u64, std::ptr::null())
}
#[inline]
pub fn number_entry(s: &'a str) -> Self {
Self::make(DomEntryKind::Number, s.len() as u64, s.as_ptr())
}
#[inline]
pub fn string_entry(s: &'a str) -> Self {
Self::make(DomEntryKind::String, s.len() as u64, s.as_ptr())
}
#[inline]
pub fn escaped_string_entry(s: Box<str>) -> Self {
let len = s.len() as u64;
let ptr = Box::into_raw(s) as *mut u8 as *const u8;
Self::make(DomEntryKind::EscapedString, len, ptr)
}
#[inline]
pub fn key_entry(s: &'a str) -> Self {
Self::make(DomEntryKind::Key, s.len() as u64, s.as_ptr())
}
#[inline]
pub fn escaped_key_entry(s: Box<str>) -> Self {
let len = s.len() as u64;
let ptr = Box::into_raw(s) as *mut u8 as *const u8;
Self::make(DomEntryKind::EscapedKey, len, ptr)
}
#[inline]
pub fn start_object_entry(end_idx: usize) -> Self {
Self::make(DomEntryKind::StartObject, end_idx as u64, std::ptr::null())
}
#[inline]
pub fn end_object_entry() -> Self {
Self::make(DomEntryKind::EndObject, 0, std::ptr::null())
}
#[inline]
pub fn start_array_entry(end_idx: usize) -> Self {
Self::make(DomEntryKind::StartArray, end_idx as u64, std::ptr::null())
}
#[inline]
pub fn end_array_entry() -> Self {
Self::make(DomEntryKind::EndArray, 0, std::ptr::null())
}
#[inline]
pub(crate) fn set_payload(&mut self, v: usize) {
self.tag_payload = (self.tag_payload & !(PAYLOAD_MASK)) | ((v as u64) & PAYLOAD_MASK);
}
#[inline]
pub fn as_start_object(&self) -> Option<usize> {
if self.kind() == DomEntryKind::StartObject {
Some(self.payload() as usize)
} else {
None
}
}
#[inline]
pub fn as_start_array(&self) -> Option<usize> {
if self.kind() == DomEntryKind::StartArray {
Some(self.payload() as usize)
} else {
None
}
}
#[inline]
pub fn as_bool(&self) -> Option<bool> {
if self.kind() == DomEntryKind::Bool {
Some(self.payload() != 0)
} else {
None
}
}
#[inline]
pub fn as_number(&self) -> Option<&'a str> {
if self.kind() == DomEntryKind::Number {
Some(self.as_str_unchecked())
} else {
None
}
}
#[inline]
pub fn as_string(&self) -> Option<&str> {
match self.kind() {
DomEntryKind::String => Some(self.as_str_unchecked()),
DomEntryKind::EscapedString => Some(self.as_escaped_str_unchecked()),
_ => None,
}
}
#[cfg(feature = "serde")]
#[inline]
pub(crate) fn source_string(&self) -> Option<&'a str> {
if self.kind() == DomEntryKind::String {
Some(self.as_str_unchecked())
} else {
None
}
}
#[inline]
pub fn as_key(&self) -> Option<&str> {
match self.kind() {
DomEntryKind::Key => Some(self.as_str_unchecked()),
DomEntryKind::EscapedKey => Some(self.as_escaped_str_unchecked()),
_ => None,
}
}
}
#[allow(non_snake_case, non_upper_case_globals)]
impl<'a> DomEntry<'a> {
pub const Null: DomEntry<'static> = DomEntry {
tag_payload: 0,
ptr: std::ptr::null(),
_marker: std::marker::PhantomData,
};
pub const EndObject: DomEntry<'static> = DomEntry {
tag_payload: (DomEntryKind::EndObject as u64) << KIND_SHIFT,
ptr: std::ptr::null(),
_marker: std::marker::PhantomData,
};
pub const EndArray: DomEntry<'static> = DomEntry {
tag_payload: (DomEntryKind::EndArray as u64) << KIND_SHIFT,
ptr: std::ptr::null(),
_marker: std::marker::PhantomData,
};
#[inline]
pub fn Bool(v: bool) -> Self {
Self::bool_entry(v)
}
#[inline]
pub fn Number(s: &'a str) -> Self {
Self::number_entry(s)
}
#[inline]
pub fn String(s: &'a str) -> Self {
Self::string_entry(s)
}
#[inline]
pub fn EscapedString(s: Box<str>) -> Self {
Self::escaped_string_entry(s)
}
#[inline]
pub fn Key(s: &'a str) -> Self {
Self::key_entry(s)
}
#[inline]
pub fn EscapedKey(s: Box<str>) -> Self {
Self::escaped_key_entry(s)
}
#[inline]
pub fn StartObject(end_idx: usize) -> Self {
Self::start_object_entry(end_idx)
}
#[inline]
pub fn StartArray(end_idx: usize) -> Self {
Self::start_array_entry(end_idx)
}
}
#[derive(Debug)]
pub struct Dom<'a> {
pub entries: Vec<DomEntry<'a>>,
pub(crate) has_escapes: bool,
}
impl<'a> Drop for Dom<'a> {
fn drop(&mut self) {
if !self.has_escapes {
unsafe { self.entries.set_len(0) };
}
}
}
pub(crate) struct DomWriter<'a> {
entries: Vec<DomEntry<'a>>,
open: Vec<usize>,
has_escapes: bool,
}
impl<'a> DomWriter<'a> {
pub(crate) fn with_capacity(cap: usize) -> Self {
Self {
entries: Vec::with_capacity(cap),
open: Vec::new(),
has_escapes: false,
}
}
}
impl<'a> Sax<'a> for DomWriter<'a> {
type Output = Dom<'a>;
fn null(&mut self) {
self.entries.push(DomEntry::null_entry());
}
fn bool_val(&mut self, v: bool) {
self.entries.push(DomEntry::bool_entry(v));
}
fn number(&mut self, s: &'a str) {
self.entries.push(DomEntry::number_entry(s));
}
fn string(&mut self, s: &'a str) {
self.entries.push(DomEntry::string_entry(s));
}
fn escaped_string(&mut self, s: &str) {
self.has_escapes = true;
let mut buf = String::new();
crate::unescape_str(s, &mut buf);
self.entries
.push(DomEntry::escaped_string_entry(buf.into_boxed_str()));
}
fn key(&mut self, s: &'a str) {
self.entries.push(DomEntry::key_entry(s));
}
fn escaped_key(&mut self, s: &str) {
self.has_escapes = true;
let mut buf = String::new();
crate::unescape_str(s, &mut buf);
self.entries
.push(DomEntry::escaped_key_entry(buf.into_boxed_str()));
}
fn start_object(&mut self) {
let idx = self.entries.len();
self.open.push(idx);
self.entries.push(DomEntry::start_object_entry(0)); }
fn end_object(&mut self) {
let end_idx = self.entries.len();
self.entries.push(DomEntry::end_object_entry());
if let Some(start_idx) = self.open.pop() {
self.entries[start_idx].set_payload(end_idx);
}
}
fn start_array(&mut self) {
let idx = self.entries.len();
self.open.push(idx);
self.entries.push(DomEntry::start_array_entry(0)); }
fn end_array(&mut self) {
let end_idx = self.entries.len();
self.entries.push(DomEntry::end_array_entry());
if let Some(start_idx) = self.open.pop() {
self.entries[start_idx].set_payload(end_idx);
}
}
fn finish(self) -> Option<Dom<'a>> {
if self.open.is_empty() {
Some(Dom {
entries: self.entries,
has_escapes: self.has_escapes,
})
} else {
None
}
}
}
#[derive(Clone, Copy)]
pub struct DomRef<'t, 'src: 't> {
pub(crate) tape: &'t [DomEntry<'src>],
pub(crate) pos: usize,
}
impl<'src> Dom<'src> {
pub fn root<'t>(&'t self) -> Option<DomRef<'t, 'src>> {
if self.entries.is_empty() {
None
} else {
Some(DomRef {
tape: &self.entries,
pos: 0,
})
}
}
}
pub(crate) fn dom_skip(entries: &[DomEntry<'_>], pos: usize) -> usize {
let e = &entries[pos];
match e.kind() {
DomEntryKind::StartObject | DomEntryKind::StartArray => e.payload() as usize + 1,
_ => pos + 1,
}
}
pub struct DomObjectIter<'t, 'src: 't> {
tape: &'t [DomEntry<'src>],
pos: usize,
end: usize,
}
impl<'t, 'src: 't> Iterator for DomObjectIter<'t, 'src> {
type Item = (&'t str, DomRef<'t, 'src>);
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.end {
return None;
}
let key: &'t str = self.tape[self.pos].as_key()?;
let val_pos = self.pos + 1;
self.pos = dom_skip(self.tape, val_pos);
Some((
key,
DomRef {
tape: self.tape,
pos: val_pos,
},
))
}
}
pub struct DomArrayIter<'t, 'src: 't> {
tape: &'t [DomEntry<'src>],
pos: usize,
end: usize,
}
impl<'t, 'src: 't> Iterator for DomArrayIter<'t, 'src> {
type Item = DomRef<'t, 'src>;
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.end {
return None;
}
let item = DomRef {
tape: self.tape,
pos: self.pos,
};
self.pos = dom_skip(self.tape, self.pos);
Some(item)
}
}
impl<'t, 'src: 't> DomRef<'t, 'src> {
pub fn object_iter(self) -> Option<DomObjectIter<'t, 'src>> {
self.tape[self.pos]
.as_start_object()
.map(|end| DomObjectIter {
tape: self.tape,
pos: self.pos + 1,
end,
})
}
pub fn array_iter(self) -> Option<DomArrayIter<'t, 'src>> {
self.tape[self.pos]
.as_start_array()
.map(|end| DomArrayIter {
tape: self.tape,
pos: self.pos + 1,
end,
})
}
}
#[cfg(test)]
mod tests {
use crate::{JsonRef, parse_to_dom};
use super::{Dom, DomEntry};
fn run_tape(json: &'static str) -> Option<Dom<'static>> {
parse_to_dom(json, None)
}
fn te_str(s: &'static str) -> DomEntry<'static> {
DomEntry::String(s)
}
fn te_key(s: &'static str) -> DomEntry<'static> {
DomEntry::Key(s)
}
fn te_num(s: &'static str) -> DomEntry<'static> {
DomEntry::Number(s)
}
#[test]
fn tape_scalar_values() {
assert_eq!(run_tape("null").unwrap().entries, vec![DomEntry::Null]);
assert_eq!(
run_tape("true").unwrap().entries,
vec![DomEntry::Bool(true)]
);
assert_eq!(
run_tape("false").unwrap().entries,
vec![DomEntry::Bool(false)]
);
assert_eq!(run_tape("42").unwrap().entries, vec![te_num("42")]);
assert_eq!(run_tape(r#""hi""#).unwrap().entries, vec![te_str("hi")]);
}
#[test]
fn tape_empty_object() {
let t = run_tape("{}").unwrap();
assert_eq!(
t.entries,
vec![DomEntry::StartObject(1), DomEntry::EndObject]
);
assert_eq!(t.entries[0], DomEntry::StartObject(1));
}
#[test]
fn tape_empty_array() {
let t = run_tape("[]").unwrap();
assert_eq!(t.entries, vec![DomEntry::StartArray(1), DomEntry::EndArray]);
assert_eq!(t.entries[0], DomEntry::StartArray(1));
}
#[test]
fn tape_simple_object() {
let t = run_tape(r#"{"a":1}"#).unwrap();
assert_eq!(
t.entries,
vec![
DomEntry::StartObject(3),
te_key("a"),
te_num("1"),
DomEntry::EndObject,
]
);
assert_eq!(t.entries[0], DomEntry::StartObject(3));
}
#[test]
fn tape_simple_array() {
let t = run_tape(r#"[1,2,3]"#).unwrap();
assert_eq!(
t.entries,
vec![
DomEntry::StartArray(4),
te_num("1"),
te_num("2"),
te_num("3"),
DomEntry::EndArray,
]
);
}
#[test]
fn tape_nested() {
let t = run_tape(r#"{"a":[1,2]}"#).unwrap();
assert_eq!(
t.entries,
vec![
DomEntry::StartObject(6), te_key("a"), DomEntry::StartArray(5), te_num("1"), te_num("2"), DomEntry::EndArray, DomEntry::EndObject, ]
);
assert_eq!(t.entries[0], DomEntry::StartObject(6));
assert_eq!(t.entries[2], DomEntry::StartArray(5));
}
#[test]
fn tape_multi_key_object() {
let t = run_tape(r#"{"x":1,"y":2}"#).unwrap();
assert_eq!(
t.entries,
vec![
DomEntry::StartObject(5), te_key("x"), te_num("1"), te_key("y"), te_num("2"), DomEntry::EndObject, ]
);
assert_eq!(t.entries[0], DomEntry::StartObject(5));
}
#[test]
fn tape_invalid_returns_none() {
assert!(run_tape("[1,2,]").is_none());
assert!(run_tape(r#"{"a":1,}"#).is_none());
assert!(run_tape("{bad}").is_none());
}
#[test]
fn tape_skip_object() {
let t = run_tape(r#"[{"x":1},2]"#).unwrap();
assert_eq!(t.entries.len(), 7);
let end = t.entries[1]
.as_start_object()
.expect("expected StartObject at index 1");
assert_eq!(end, 4);
assert_eq!(t.entries[5], te_num("2"));
}
#[test]
fn tape_object_iter() {
let t = run_tape(r#"{"x":1,"y":true,"z":"hi"}"#).unwrap();
let root = t.root().unwrap();
let pairs: Vec<_> = root
.object_iter()
.expect("should be object")
.map(|(k, v)| (k.to_string(), (v.as_number_str(), v.as_bool(), v.as_str())))
.collect();
assert_eq!(pairs.len(), 3);
assert_eq!(pairs[0].0, "x");
assert_eq!(pairs[0].1, (Some("1"), None, None));
assert_eq!(pairs[1].0, "y");
assert_eq!(pairs[1].1, (None, Some(true), None));
assert_eq!(pairs[2].0, "z");
assert_eq!(pairs[2].1, (None, None, Some("hi")));
let at = parse_to_dom("[1]", None).unwrap();
assert!(at.root().unwrap().object_iter().is_none());
}
#[test]
fn tape_array_iter() {
let t = run_tape(r#"[1,"two",false,null]"#).unwrap();
let root = t.root().unwrap();
let items: Vec<_> = root.array_iter().expect("should be array").collect();
assert_eq!(items.len(), 4);
assert_eq!(items[0].as_number_str(), Some("1"));
assert_eq!(items[1].as_str(), Some("two"));
assert_eq!(items[2].as_bool(), Some(false));
assert!(items[3].is_null());
let nt = run_tape(r#"[[1,2],{"a":3}]"#).unwrap();
let nelems: Vec<_> = nt.root().unwrap().array_iter().unwrap().collect();
assert_eq!(nelems.len(), 2);
assert!(nelems[0].is_array());
assert!(nelems[1].is_object());
let ot = parse_to_dom(r#"{"a":1}"#, None).unwrap();
assert!(ot.root().unwrap().array_iter().is_none());
}
}