use crate::{
Buf, IntoBuf, lexical,
pointer::{
Group, Pointer,
group::{InnerNode, Node},
},
};
#[cfg(feature = "ignore_case")]
use caseless::Caseless;
use smallvec::{SmallVec, smallvec};
use std::{cmp::Ordering, iter::Peekable, ops::Range};
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
enum State {
#[default]
Root,
Arr {
node_index: Option<usize>,
item_index: usize,
},
Obj {
node_index: Option<usize>,
},
MemberName {
node_index: Option<usize>,
},
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum StructAction {
Enter,
Skip,
}
#[derive(Debug)]
pub struct Machine<G = Group> {
group: G,
unescape: bool,
state: SmallVec<[State; 8]>,
scratch: Vec<u8>,
}
impl<G: AsRef<Group>> Machine<G> {
pub fn new(group: G, unescape: bool) -> Self {
Self {
group,
unescape,
state: smallvec![State::default()],
scratch: Vec::new(),
}
}
pub fn arr_begin(&mut self) -> (StructAction, Option<&Pointer>) {
self.value();
let entered_node_index = self.enter();
let new_state = State::Arr {
node_index: entered_node_index,
item_index: 0,
};
self.push_state(new_state);
if let Some(n) = entered_node_index {
let entered_node = self.node_at_index(n);
let action = if entered_node.num_index_children > 0 {
StructAction::Enter
} else {
StructAction::Skip
};
let entered_pointer = entered_node.match_index.map(|p| self.pointer_at_index(p));
(action, entered_pointer)
} else {
(StructAction::Skip, None)
}
}
pub fn arr_end(&mut self) -> Option<&Pointer> {
let state = self.state();
if !matches!(state, State::Arr { .. }) {
panic!("no array to end");
}
self.pop_state();
self.exit(state)
}
pub fn obj_begin(&mut self) -> (StructAction, Option<&Pointer>) {
self.value();
let entered_node_index = self.enter();
let new_state = State::Obj {
node_index: entered_node_index,
};
self.push_state(new_state);
if let Some(n) = entered_node_index {
let entered_node = self.node_at_index(n);
let action = if entered_node.num_name_children > 0 {
StructAction::Enter
} else {
StructAction::Skip
};
let entered_pointer = entered_node.match_index.map(|p| self.pointer_at_index(p));
(action, entered_pointer)
} else {
(StructAction::Skip, None)
}
}
pub fn obj_end(&mut self) -> Option<&Pointer> {
let state = self.state();
if !matches!(state, State::Obj { .. }) {
panic!("no object to end");
}
self.pop_state();
self.exit(state)
}
pub fn member_name<C: lexical::Content>(&mut self, name: C) {
let state = self.state();
match state {
State::Obj {
node_index: Some(n),
} if self.node_at_index(n).num_name_children > 0 => {
let child_index = self.find_name_child(n, name);
self.push_state(State::MemberName {
node_index: child_index,
})
}
State::Obj { .. } => {
panic!("member name not allowed in skipped object")
}
State::MemberName { .. } => {
panic!("member value required before next member name")
}
_ => panic!("member name not allowed here"),
}
}
pub fn primitive(&mut self) -> Option<&Pointer> {
self.value();
let entered_node_index = self.enter();
self.exit(State::default());
entered_node_index
.and_then(|n| self.node_at_index(n).match_index)
.map(|p| self.pointer_at_index(p))
}
pub fn into_inner(self) -> G {
self.group
}
#[inline(always)]
fn group(&self) -> &Group {
self.group.as_ref()
}
fn node_at_index(&self, node_index: usize) -> &Node {
&self.group().nodes[node_index]
}
fn pointer_at_index(&self, pointer_index: usize) -> &Pointer {
&self.group().pointers[pointer_index]
}
fn state(&self) -> State {
*self.state.last().expect("state should never be empty")
}
fn state_mut(&mut self) -> &mut State {
self.state.last_mut().expect("state should never be empty")
}
fn value(&self) {
match self.state() {
State::Arr {
node_index: None, ..
} => panic!("value not allowed in skipped array"),
State::Arr {
node_index: Some(n),
..
} if self.node_at_index(n).num_index_children == 0 => {
panic!("value not allowed in skipped array")
}
State::Obj {
node_index: None, ..
} => panic!("value not allowed in skipped object"),
State::Obj {
node_index: Some(n),
} if self.node_at_index(n).num_name_children == 0 => {
panic!("value not allowed in skipped object")
}
State::Obj {
node_index: Some(_),
} => panic!("missing object member name"),
_ => (),
};
}
fn push_state(&mut self, new_state: State) {
self.state.push(new_state);
}
fn pop_state(&mut self) {
self.state
.pop()
.expect("state should not be empty before popping");
}
fn enter(&mut self) -> Option<usize> {
match self.state() {
State::Root => Some(0),
State::Arr {
node_index: Some(n),
item_index: i,
} => self.find_index_child(n, i),
State::Obj {
node_index: Some(_),
} => unreachable!("expected `member_name(...)` call (state={:?})", self.state),
State::Arr {
node_index: None, ..
}
| State::Obj { node_index: None } => {
unreachable!("can't enter a skipped array or object")
}
State::MemberName { node_index } => node_index,
}
}
fn exit(&mut self, prev_state: State) -> Option<&Pointer> {
let current_state = self.state_mut();
match current_state {
State::Arr { item_index, .. } => *item_index += 1,
State::MemberName { .. } => self.pop_state(),
_ => (),
};
if let State::Arr {
node_index: Some(n),
..
}
| State::Obj {
node_index: Some(n),
} = prev_state
&& let Some(p) = self.node_at_index(n).match_index
{
Some(self.pointer_at_index(p))
} else {
None
}
}
#[cfg(not(test))]
const MAX_LINEAR_SEARCH_LEN: usize = 8;
#[cfg(test)]
const MAX_LINEAR_SEARCH_LEN: usize = 2;
fn find_index_child(&self, node_index: usize, item_index: usize) -> Option<usize> {
let node = self.node_at_index(node_index);
let i = node
.child_index
.expect("node for non-skipped array must have a child")
.get() as usize
+ node.num_trie_children as usize
+ node.num_name_children as usize;
let j = i + node.num_index_children as usize;
let item_index_u64 = item_index as u64;
if j - i <= Self::MAX_LINEAR_SEARCH_LEN {
self.group()
.nodes
.iter()
.take(j)
.skip(i)
.position(|c| matches!(c.inner, InnerNode::Index(idx) if idx == item_index_u64))
.map(|pos| i + pos)
} else {
self.group().nodes[i..j]
.binary_search_by_key(&item_index_u64, |c| {
if let InnerNode::Index(n) = c.inner {
n
} else {
panic!("logic error: expected an index node, got {c:?}")
}
})
.ok()
.map(|idx| i + idx)
}
}
fn find_name_child<C: lexical::Content>(
&mut self,
node_index: usize,
name: C,
) -> Option<usize> {
let node = self.node_at_index(node_index);
let start = node
.child_index
.expect("name node for non-skipped object must have a child")
.get() as usize;
let end = start + node.num_trie_children as usize + node.num_name_children as usize;
if !self.unescape || !name.is_escaped() {
self.find_name_child_in_buf(start..end, name.literal().into_buf())
} else {
self.scratch.clear();
lexical::unescape(name.literal(), &mut self.scratch);
let buf: Vec<u8> = self.scratch.clone();
self.find_name_child_in_buf(start..end, buf.as_slice())
}
}
fn find_name_child_in_buf<B: Buf>(
&self,
node_range: Range<usize>,
mut name_buf: B,
) -> Option<usize> {
Self::consume_quote(&mut name_buf);
let name_iter = BufIter::new(&mut name_buf);
let child_node = {
#[cfg(not(feature = "ignore_case"))]
{
let mut name_iter = name_iter.peekable();
self.find_name_child_iter(node_range, &mut name_iter)
}
#[cfg(feature = "ignore_case")]
if !self.group().ignore_case {
let mut name_iter = name_iter.peekable();
self.find_name_child_iter(node_range, &mut name_iter)
} else {
let mut name_iter = name_iter.default_case_fold().peekable();
self.find_name_child_iter(node_range, &mut name_iter)
}
};
if name_buf.remaining() > 1 {
name_buf.advance(name_buf.remaining() - 1)
}
Self::consume_quote(&mut name_buf);
child_node
}
fn find_name_child_iter<I>(
&self,
node_range: Range<usize>,
name_iter: &mut Peekable<I>,
) -> Option<usize>
where
I: Iterator<Item = char>,
{
let current_index: usize = if node_range.len() <= Self::MAX_LINEAR_SEARCH_LEN {
self.iter_search_linear(node_range.clone(), name_iter)
} else {
self.iter_search_binary(node_range.clone(), name_iter)
}?;
let has_more_chars = name_iter.peek().is_some();
let current_node = &self.group().nodes[current_index];
if !has_more_chars
&& (current_node.match_index.is_some() || current_node.num_trie_children == 0)
{
Some(current_index)
} else if has_more_chars && current_node.num_trie_children > 0 {
let i = current_node
.child_index
.expect("node with trie children must have child index")
.get() as usize;
let j = i + current_node.num_trie_children as usize;
self.find_name_child_iter(i..j, name_iter)
} else {
None
}
}
fn iter_search_linear<I>(
&self,
node_range: Range<usize>,
name_iter: &mut Peekable<I>,
) -> Option<usize>
where
I: Iterator<Item = char>,
{
let mut prefix: &str = "";
for i in node_range {
let s = self.group().nodes[i].name_part();
match self.iter_search_compare(s, &mut prefix, name_iter) {
Ordering::Equal => return Some(i),
Ordering::Less => continue,
Ordering::Greater => return None,
}
}
None
}
fn iter_search_binary<I>(
&self,
node_range: Range<usize>,
name_iter: &mut Peekable<I>,
) -> Option<usize>
where
I: Iterator<Item = char>,
{
let mut prefix: &str = "";
let mut lo = node_range.start;
let mut hi = node_range.end;
while lo < hi {
let mid = lo + (hi - lo) / 2;
match self.iter_search_compare(
self.group().nodes[mid].name_part(),
&mut prefix,
name_iter,
) {
Ordering::Less => lo = mid + 1,
Ordering::Greater => hi = mid,
Ordering::Equal => return Some(mid),
}
}
None
}
fn iter_search_compare<'a, I>(
&self,
s: &'a str,
prefix: &mut &'a str,
name_iter: &mut Peekable<I>,
) -> Ordering
where
I: Iterator<Item = char>,
{
if !s.starts_with(*prefix) {
return s.cmp(prefix);
}
let mut s_iter = s.chars().skip(prefix.len());
let mut n = prefix.len();
let ord = loop {
match (s_iter.next(), name_iter.peek()) {
(None, _) => break Ordering::Equal,
(Some(want), Some(have)) if want == *have => {
name_iter.next();
n += want.len_utf8();
}
(Some(want), Some(have)) => break want.cmp(have),
(Some(_), None) => break Ordering::Greater,
}
};
*prefix = &s[..n];
ord
}
fn consume_quote<B: Buf>(name: &mut B) {
let mut quote = [0u8; 1];
if name.try_copy_to_slice(&mut quote).is_err() || quote[0] != b'"' {
panic!("member name must be a valid JSON string enclosed in double quotes ('\"')");
}
}
}
#[derive(Debug)]
pub struct BufIter<'a, B> {
buf: &'a mut B, pos: usize, }
impl<'a, B: Buf> BufIter<'a, B> {
pub fn new(buf: &'a mut B) -> Self {
Self { buf, pos: 0 }
}
fn has_more_chars(&self) -> bool {
let n = self.buf.chunk().len();
if self.pos + 1 < n {
true
} else if self.pos < n {
n < self.buf.remaining()
} else {
n + 1 < self.buf.remaining()
}
}
}
impl<'a, B: Buf> Iterator for BufIter<'a, B> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if !self.has_more_chars() {
return None;
}
let mut chunk = self.buf.chunk();
if self.pos == chunk.len() {
let n = chunk.len();
#[allow(unused)]
{
chunk = &[]; }
self.buf.advance(n);
self.pos = 0;
chunk = self.buf.chunk();
}
let b = chunk[self.pos];
if b.is_ascii() {
self.pos += 1;
return Some(char::from(b));
}
let m = match b >> 4 {
0b1100 | 0b1101 => 2,
0b1110 => 3,
0b1111 => 4,
_ => panic!("unexpected UTF-8 continuation byte {b:02x}"),
};
let rem = self.buf.remaining() - self.pos;
if rem < m + 1 {
panic!(
"only {rem} bytes remaining, not enough to complete {m}-byte sequenced started by {b:02x}"
);
}
let mut tmp = [b, 0, 0, 0];
self.pos += 1;
for b in tmp.iter_mut().take(m).skip(1) {
if self.pos == chunk.len() {
let n = chunk.len();
#[allow(unused)]
{
chunk = &[]; }
self.buf.advance(n);
self.pos = 0;
chunk = self.buf.chunk();
}
*b = chunk[self.pos];
self.pos += 1;
}
let code_point = match m {
2 => ((tmp[0] as u32 & 0x1f) << 6) | (tmp[1] as u32 & 0x3f),
3 => {
((tmp[0] as u32 & 0x0f) << 12)
| ((tmp[1] as u32 & 0x3f) << 6)
| (tmp[2] as u32 & 0x3f)
}
4 => {
((tmp[0] as u32 & 0x07) << 18)
| ((tmp[1] as u32 & 0x3f) << 12)
| ((tmp[2] as u32 & 0x3f) << 6)
| (tmp[3] as u32 & 0x3f)
}
_ => unreachable!(),
};
let c = char::from_u32(code_point);
if c.is_some() {
c
} else {
panic!("invalid {m}-byte UTF-8 character: {:02x?}", &tmp[..m]);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{BufUnderflow, lexical::fixed, pointer::Pointer};
#[cfg(feature = "ignore_case")]
use caseless::default_case_fold_str;
use rstest::rstest;
macro_rules! for_ignore_case_options {
($pointers:expr, $unescape:expr, |$mach:ident, $ignore_case:ident| $b:block) => {
#[cfg(not(feature = "ignore_case"))]
{
let $ignore_case = false;
let group: Group = $pointers.clone().into_iter().collect();
let mut $mach = Machine::new(group, $unescape);
$b
}
#[cfg(feature = "ignore_case")]
for $ignore_case in [false, true] {
let group = if $ignore_case {
Group::from_pointers_ignore_case($pointers.clone())
} else {
Group::from_pointers($pointers.clone())
};
let mut $mach = Machine::new(group, $unescape);
$b
}
};
}
macro_rules! for_all_options {
($pointers:expr, |$mach:ident, $unescape:ident, $ignore_case:ident| $b:block) => {
for $unescape in [false, true] {
for_ignore_case_options!($pointers, $unescape, |$mach, $ignore_case| $b);
}
};
}
#[rstest]
#[case::array(|m: &mut Machine| { m.arr_begin(); })]
#[case::object(|m: &mut Machine| { m.obj_begin(); })]
#[case::primitive(|m: &mut Machine| { m.primitive(); })]
#[should_panic(expected = "value not allowed in skipped array")]
fn test_arr_begin_panics_when_should_skip<T>(#[case] trigger: T)
where
T: Fn(&mut Machine),
{
for_all_options!(Vec::<Pointer>::new(), |mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Skip, None), mach.arr_begin());
trigger(&mut mach)
});
}
#[rstest]
#[case::array(|m: &mut Machine| { m.arr_begin(); })]
#[case::object(|m: &mut Machine| { m.obj_begin(); })]
#[case::primitive(|m: &mut Machine| { m.primitive(); })]
#[should_panic(expected = "value not allowed in skipped array")]
fn test_arr_begin_panics_when_should_skip_outer_obj<T>(#[case] trigger: T)
where
T: Fn(&mut Machine),
{
for_all_options!(
[Pointer::from_static("/not_in_doc")],
|mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Enter, None), mach.obj_begin());
mach.member_name(fixed::Content::from_static(r#""foo""#));
assert_eq!((StructAction::Skip, None), mach.arr_begin());
trigger(&mut mach)
}
);
}
#[rstest]
#[case::root(|_: &mut Machine| { })]
#[case::object(|m: &mut Machine| { m.obj_begin(); })]
#[case::object_member(|m: &mut Machine| { m.obj_begin(); m.member_name(fixed::Content::from_static(r#""""#)); })]
#[should_panic(expected = "no array to end")]
fn test_arr_end_panics_when_no_arr<S>(#[case] setup: S)
where
S: Fn(&mut Machine),
{
for_all_options!(
[Pointer::from_static("/foo")],
|mach, _unescape, _ignore_case| {
setup(&mut mach);
let _ = mach.arr_end();
}
);
}
#[rstest]
#[case::array(|m: &mut Machine| { m.arr_begin(); })]
#[case::object(|m: &mut Machine| { m.obj_begin(); })]
#[case::primitive(|m: &mut Machine| { m.primitive(); })]
#[should_panic(expected = "value not allowed in skipped object")]
fn test_obj_begin_panics_when_should_skip<T>(#[case] trigger: T)
where
T: Fn(&mut Machine),
{
for_all_options!(Vec::<Pointer>::new(), |mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Skip, None), mach.obj_begin());
trigger(&mut mach)
});
}
#[rstest]
#[case::array(|m: &mut Machine| { m.arr_begin(); })]
#[case::object(|m: &mut Machine| { m.obj_begin(); })]
#[case::primitive(|m: &mut Machine| { m.primitive(); })]
#[should_panic(expected = "value not allowed in skipped object")]
fn test_obj_begin_panics_when_should_skip_outer_obj<T>(#[case] trigger: T)
where
T: Fn(&mut Machine),
{
for_all_options!(
[Pointer::from_static("/not_in_doc")],
|mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Enter, None), mach.obj_begin());
mach.member_name(fixed::Content::from_static(r#""foo""#));
assert_eq!((StructAction::Skip, None), mach.obj_begin());
trigger(&mut mach)
}
);
}
#[rstest]
#[case::root(|_: &mut Machine| { })]
#[case::array(|m: &mut Machine| { m.arr_begin(); })]
#[should_panic(expected = "no object to end")]
fn test_obj_end_panics_when_no_obj<S>(#[case] setup: S)
where
S: Fn(&mut Machine),
{
for_all_options!(
[Pointer::from_static("/foo")],
|mach, _unescape, _ignore_case| {
setup(&mut mach);
let _ = mach.obj_end();
}
);
}
#[test]
#[should_panic(expected = "member name not allowed in skipped object")]
fn test_member_name_panics_when_should_skip() {
for_all_options!(Vec::<Pointer>::new(), |mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Skip, None), mach.obj_begin());
mach.member_name(fixed::Content::default());
});
}
#[rstest]
#[case("/something")]
#[case("/something/else")]
#[case("/something_else")]
#[should_panic(expected = "member value required before next member name")]
fn test_member_name_panics_when_repeated(#[case] pointer: &'static str) {
for_all_options!(
[Pointer::from_static(pointer)],
|mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Enter, None), mach.obj_begin());
mach.member_name(fixed::Content::from_static(r#""foo""#));
mach.member_name(fixed::Content::default());
}
);
}
#[rstest]
#[case::no_pointer_root(None, |_: &mut Machine| {})]
#[case::root_pointer_root(Some(""), |_: &mut Machine| {})]
#[case::no_pointer_root_after_primitive(None, |_: &mut Machine| {})]
#[case::root_pointer_root_after_primitive(Some(""), |_: &mut Machine| {})]
#[case::index_pointer_array(Some("/0"), |m: &mut Machine| { m.arr_begin(); })]
#[should_panic(expected = "member name not allowed here")]
fn test_member_name_panics_when_not_allowed<S>(
#[case] pointer: Option<&'static str>,
#[case] setup: S,
) where
S: Fn(&mut Machine),
{
for_all_options!(
pointer.map(Pointer::from_static),
|mach, _unescape, _ignore_case| {
setup(&mut mach);
mach.member_name(fixed::Content::default());
}
);
}
#[rstest]
#[case::empty("")]
#[case::no_quote_a("a")]
#[case::no_quote_ab("ab")]
#[case::no_quote_utf8_2_byte("\u{0080}")]
#[case::no_quote_utf8_3_byte("\u{0800}")]
#[case::no_quote_utf8_4_byte("\u{10000}")]
#[case::lonesome_quote(r#"""#)]
#[case::no_trailing_quote_2(r#""a"#)]
#[case::no_trailing_quote_2(r#""ab"#)]
#[case::no_leading_quote_2(r#"a""#)]
#[case::no_leading_quote_2(r#"ab""#)]
#[should_panic(
expected = r#"member name must be a valid JSON string enclosed in double quotes ('"')"#
)]
fn test_member_name_panics_when_not_double_quoted(#[case] name: &'static str) {
#[derive(Debug)]
struct BadContent(&'static str);
impl lexical::Content for BadContent {
type Literal<'a> = &'static str;
fn literal<'a>(&'a self) -> Self::Literal<'a> {
self.0
}
fn is_escaped(&self) -> bool {
false
}
fn unescaped<'a>(&'a self) -> lexical::Unescaped<Self::Literal<'a>> {
unreachable!("this branch is not under test")
}
}
for_all_options!(
[Pointer::from_static("/a")],
|mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Enter, None), mach.obj_begin());
mach.member_name(BadContent(name));
}
);
}
#[rstest]
#[case::array(|m: &mut Machine| { m.arr_begin(); })]
#[case::object(|m: &mut Machine| { m.obj_begin(); })]
#[case::primitive(|m: &mut Machine| { m.primitive(); })]
#[should_panic(expected = "missing object member name")]
fn test_value_methods_panic_when_obj_missing_member_name_first_value<T>(#[case] trigger: T)
where
T: Fn(&mut Machine),
{
for_all_options!(
[Pointer::from_static("/anything")],
|mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Enter, None), mach.obj_begin());
trigger(&mut mach)
}
);
}
#[rstest]
#[case::array(|m: &mut Machine| { m.arr_begin(); })]
#[case::object(|m: &mut Machine| { m.obj_begin(); })]
#[case::primitive(|m: &mut Machine| { m.primitive(); })]
#[should_panic(expected = "missing object member name")]
fn test_value_methods_panic_when_obj_missing_member_name_second_value<T>(#[case] trigger: T)
where
T: Fn(&mut Machine),
{
for_all_options!(
[Pointer::from_static("/anything")],
|mach, _unescape, _ignore_case| {
assert_eq!((StructAction::Enter, None), mach.obj_begin());
mach.member_name(fixed::Content::from_static(r#""anything""#));
assert_eq!(Some(&Pointer::from_static("/anything")), mach.primitive());
trigger(&mut mach)
}
);
}
#[rstest]
#[case(Vec::<Pointer>::new(), StructAction::Skip)]
#[case([Pointer::from_static("/")], StructAction::Skip)]
#[case([Pointer::from_static("/0")], StructAction::Enter)]
#[case([Pointer::from_static("/a")], StructAction::Skip)]
#[case([Pointer::from_static("/a"), Pointer::from_static("/1")], StructAction::Enter)]
fn test_empty_array_does_not_match<I>(#[case] pointers: I, #[case] expect_action: StructAction)
where
I: IntoIterator<Item = Pointer> + Clone,
{
for_all_options!(pointers, |mach, _unescape, _ignore_case| {
assert_eq!((expect_action, None), mach.arr_begin());
assert_eq!(None, mach.arr_end());
assert_eq!((expect_action, None), mach.arr_begin());
assert_eq!(None, mach.arr_end());
});
}
#[rstest]
#[case(Vec::<Pointer>::new(), StructAction::Skip)]
#[case([Pointer::from_static("/")], StructAction::Enter)]
#[case([Pointer::from_static("/0")], StructAction::Enter)]
fn test_empty_object_does_not_match<I>(#[case] pointers: I, #[case] expect_action: StructAction)
where
I: IntoIterator<Item = Pointer> + Clone,
{
for_all_options!(pointers, |mach, _unescape, _ignore_case| {
assert_eq!((expect_action, None), mach.obj_begin());
assert_eq!(None, mach.obj_end());
assert_eq!((expect_action, None), mach.obj_begin());
assert_eq!(None, mach.obj_end());
});
}
#[rstest]
#[case(Vec::<Pointer>::new())]
#[case([Pointer::from_static("/")])]
#[case([Pointer::from_static("/0")])]
fn test_primitive_does_not_match<I>(#[case] pointers: I)
where
I: IntoIterator<Item = Pointer> + Clone,
{
for_all_options!(pointers, |mach, _unescape, _ignore_case| {
assert_eq!(None, mach.primitive());
assert_eq!(None, mach.primitive());
});
}
#[rstest]
#[case([Pointer::default()], StructAction::Skip, StructAction::Skip)]
#[case([Pointer::default(), Pointer::from_static("/")], StructAction::Skip, StructAction::Enter)]
#[case([Pointer::default(), Pointer::from_static("/0")], StructAction::Enter, StructAction::Enter)]
fn test_root_value_matches<I>(
#[case] pointers: I,
#[case] arr_action: StructAction,
#[case] obj_action: StructAction,
) where
I: IntoIterator<Item = Pointer> + Clone,
{
for_all_options!(pointers, |mach, unescape, ignore_case| {
assert_eq!(
Some(&Pointer::default()),
mach.primitive(),
"root pointer should match primitive but doesn't (unescape={unescape}, ignore_case={ignore_case})"
);
assert_eq!(
(arr_action, Some(&Pointer::default())),
mach.arr_begin(),
"root pointer should trigger enter event on array begin but doesn't (unescape={unescape}, ignore_case={ignore_case})"
);
assert_eq!(
Some(&Pointer::default()),
mach.arr_end(),
"root pointer should trigger exit on array end but doesn't (unescape={unescape}, ignore_case={ignore_case})"
);
assert_eq!(
(obj_action, Some(&Pointer::default())),
mach.obj_begin(),
"root pointer should trigger enter event on object begin but doesn't (unescape={unescape}, ignore_case={ignore_case})"
);
assert_eq!(
Some(&Pointer::default()),
mach.obj_end(),
"root pointer should trigger exit event on object end but doesn't (unescape={unescape}, ignore_case={ignore_case})"
);
});
}
#[rstest]
#[case::empty([""], ["foo"])]
#[case::escape_not_expanded(
["\\", "\\\\", "\\\"", "\\t", "\\n", "\\r", "\\u1234"],
["", "a", "\"", "\t", "\n", "\r", "\u{1234}"])
]
#[case::a(["a"], ["", "ab", " a", "foo"])]
#[case::ab(["ab"], ["", "A", "a", " a", "aB", "ac", "abc", "foo"])]
#[case::abc(["abc"], ["", "A", "a", " a", "ab", "ac", "foo"])]
#[case::a_ab(["a", "ab"], ["", "abc", "foo", "foo"])]
#[case::f_mostly([
"a", "air", "b", "bar", "bat", "baz", "c", "d", "e", "f", "fan", "fanatical", "fang", "fig",
"fight", "foal", "fob", "fog", "folly", "foo", "food", "fool", "foolery", "foolhardy",
"fooling", "foolish", "foolishness", "fools", "foolscap", "foot", "football", "footie",
"footsie", "for", "forecast", "foreign", "foreigner", "fork", "fox", "foxy", "g", "h",
], [
"A", "aim", "ban", "fanatic", "farm", "figure", "foe", "fool of a Took!", "fooled",
"foolhardiness", "foggy", "foxbat", "fulsome", "hardy",
])]
#[case::g([
"grand", "grand piano", "grandeur", "grandiose", "grandiloquently", "grandstanding",
], [
"grandfather", "grandiloquent", "granite", "grandma", "grandmaster", "grandson", "grant"
])]
#[case::utf8_2_byte([
"\u{0080}", "\u{07ff}", "\u{0080}\u{07ff}", "\u{0080}foo", "bar\u{0080}"
], [
"a", "\u{0081}", "\u{0800}"
])]
#[case::utf8_3_byte(["\u{0800}", "\u{ffff}"], ["a", "\u{0080}"])]
#[case::utf8_4_byte(["\u{10000}", "\u{10ffff}"], ["a", "\u{0080}", "{\u{0800}"])]
fn test_chunked_name_matches<I, J>(#[case] ref_tokens: I, #[case] non_matches: J)
where
I: IntoIterator<Item = &'static str> + Clone,
J: IntoIterator<Item = &'static str> + Clone,
{
let pointers: Vec<Pointer> = ref_tokens
.clone()
.into_iter()
.map(|t| format!("/{t}").try_into().unwrap())
.collect();
let g: Group = pointers.clone().into_iter().collect();
for n in 1..=3 {
let mut mach = Machine::new(&g, false);
assert_eq!((StructAction::Enter, None), mach.obj_begin());
for (i, t) in ref_tokens.clone().into_iter().enumerate() {
let quoted = format!(r#""{t}""#);
mach.member_name(ChunkyContent::new("ed, n));
assert_eq!(
Some(&pointers[i]),
mach.primitive(),
"n={n}, i={i}/{}, ref_token={t:?}, pointer={}",
pointers.len(),
pointers[i]
);
}
for (j, x) in non_matches.clone().into_iter().enumerate() {
let quoted = format!(r#""{x}""#);
mach.member_name(ChunkyContent::new("ed, n));
assert_eq!(None, mach.primitive(), "n={n}, j={j}, non_match={x:?}",);
}
}
}
#[rstest]
#[case::empty([""], [], ["foo"])]
#[case::simple_tab(["\t"], ["\\t"], ["\\ta", "\\t\\t"])]
#[case::simple_nl(["\n"], ["\\n"], ["\\na", "\\n\\n"])]
#[case::simple_cr(["\r"], ["\\r"], ["\\ra", "\\r\\r"])]
#[case::simple_backslash(["\\"], ["\\\\"], ["\\\\a", "\\\\\\\\"])]
#[case::simple_double_quote(["\""], ["\\\""], ["\"a", "\"\""])]
#[case::unicode_utf8_2_bytes(
["\u{0080}", "\u{07ff}", "\u{0080}\u{07ff}", "\u{0080}foo", "bar\u{0080}"],
["\\u0080", "\\u07ff", "\\u07FF", "\\u0080\\u07Ff", "\\u0080foo", "bar\\u0080"],
["a", "\\u0081", "\\u0800"],
)]
#[case::unicode_utf8_3_bytes(
["\u{0800}", "\u{ffff}"],
["\\u0800", "\\uffff", "\\ufFfF"],
["a", r#"\u0080"#]
)]
#[case::unicode_utf8_4_bytes(
["\u{10000}", "\u{10ffff}"],
["\\ud800\\uDC00", "\\uDBFF\\udffF"],
["a", "\\uffff", "\\uDBFF\\uDFFE"],
)]
#[case::json_pointer_escapes(
["~0", "~1", "~0~0", "~0~1", "~1~0", "~1~1"],
["~", "/", "~~", "~/", "/~", "//"],
["~0", "~1", "~0~0", "~0~1", "~1~0", "~1~1"],
)]
#[case::multiple_pointers(
[
"\t", "\n", "\r", "\\", "\"", "~0", "~1", "hello, world", r#""hello, world""#,
"hello\nworld", "hello\r\nworld", "hello~1world", "hello\\world",
"hello\t\nworld", "hello\n\nworld", "hello\r~0world~0",
],
[
"\\t", "\\n", "\\r", "\\\\", "\\\"", "~", "/", "hello, world", r#"\"hello, world\""#,
"hello\\nworld", "hello\\r\\nworld", "hello/world", "hello\\\\world",
"hello\\t\\nworld", "hello\\n\\nworld", "hello\\r~world~",
],
[
"~~", "//", "hello", "hello,", "hello, ", "hello, w", "hello, wo", "hello, wor",
"hello, worl", r#"\"hello, world"#, r#"hello, world\""#, "hello\\n", "hello\\r",
"hello\\nw", "hello\\nwo", "hello\\nwor", "hello\\nworl", "hello\\nworld",
]
)]
fn test_chunked_name_matches_unescape<I, J, K>(
#[case] ref_tokens: I,
#[case] matches: J,
#[case] non_matches: K,
) where
I: IntoIterator<Item = &'static str>,
J: IntoIterator<Item = &'static str> + Clone,
K: IntoIterator<Item = &'static str> + Clone,
{
let pointers: Vec<Pointer> = ref_tokens
.into_iter()
.map(|t| format!("/{t}").try_into().unwrap())
.collect();
let g: Group = pointers.clone().into_iter().collect();
for n in 1..=3 {
let mut mach = Machine::new(&g, true);
assert_eq!((StructAction::Enter, None), mach.obj_begin());
for (i, m) in matches.clone().into_iter().enumerate() {
let quoted = format!(r#""{m}""#);
mach.member_name(ChunkyContent::new_escaped("ed, n));
if let Some(p) = mach.primitive() {
let ref_token = p
.ref_tokens()
.next()
.expect("matched pointer should have a ref token");
let mut unescape_buf = Vec::new();
lexical::unescape(m, &mut unescape_buf);
let unescaped_match = String::from_utf8(unescape_buf).unwrap();
assert_eq!(
ref_token, unescaped_match,
"n={n}, i={i}, ref_token={ref_token:?}, match={m:?}, unescaped_match={unescaped_match:?}, pointer={p}",
);
} else {
panic!("expected match, but didn't get it: n={n}, i={i}, match={m:?}");
}
}
for (j, x) in non_matches.clone().into_iter().enumerate() {
let quoted = format!(r#""{x}""#);
mach.member_name(ChunkyContent::new("ed, n));
assert_eq!(None, mach.primitive(), "n={n}, j={j}, non_match={x:?}");
}
}
}
#[cfg(feature = "ignore_case")]
#[rstest]
#[case::empty([""], None::<&str>, ["foo"])]
#[case::escape_not_expanded(
["\\", "\\\\", "\\\"", "\\t", "\\n", "\\r", "\\u1234"],
["\\T", "\\N", "\\R", "\\U1234"],
["", "a", "\"", "\t", "\n", "\r", "\u{1234}"]
)]
#[case::a(["a"], ["A"], ["", "aa", "aA", "Aa", "AA"])]
#[case::ab(["ab"], ["aB", "Ab", "AB"], ["", "aa", "aA", "Aa", "AA"])]
#[case::a_upper(["A"], ["a"], ["", "aa", "aA", "Aa", "AA"])]
#[case::ab_upper(["AB"], ["ab", "aB", "Ab"], ["", "aa", "aA", "Aa", "AA"])]
#[case::friedrichstraße(
["friedrichstraße"],
["Friedrichstraße", "Friedrichstrasse", "FRIEDRICHSTRASSE", "FRIEDRICHSTRASSE"],
["f", "friedrich", "friedrichstrase"]
)]
#[case::f_mostly([
"A", "air", "b", "bar", "bat", "baz", "c", "d", "e", "f", "fan", "fanatical", "fang", "fig",
"fight", "foal", "fob", "fog", "folly", "foo", "food", "fool", "foolery", "foolhardy",
"fooling", "foolish", "foolishness", "fools", "foolscap", "foot", "football", "footie",
"footsie", "for", "forecast", "foreign", "foreigner", "fork", "fox", "foxy",
"friar", "Friar Tuck", "fried", "fried eggs",
"Friedrichsplatz", "Friedrichstraße", "Friedrichswall", "frill", "frills", "fritter",
"frothy",
"g", "h",
], [
"a", "Baz", "FaNg", "friar tuck", "FRIED EGGS", "friedrichstrasse", "friedrichstraSSe",
"frILL",
], [
"aim", "ban", "FAANGS", "fanatic", "farm", "figure", "foe", "fool of a Took!", "fooled",
"foolhardiness", "foggy", "foxbat", "fried EGG", "friedrich", "Friedrich", "fulsome",
"hardy",
])]
#[case::utf8_2_byte([
"\u{0080}", "\u{07ff}", "\u{0080}\u{07ff}", "\u{0080}foo", "bar\u{0080}"
], ["\u{0080}fOo", "BAR\u{0080}"], [
"a", "\u{0081}", "\u{0800}"
])]
#[case::utf8_3_byte(["\u{0800}", "\u{ffff}"], [], ["a", "\u{0080}"])]
#[case::utf8_4_byte(["\u{10000}", "\u{10ffff}"], [], ["a", "\u{0080}", "{\u{0800}"])]
fn test_chunked_name_matches_ignore_case<I, J, K>(
#[case] ref_tokens: I,
#[case] extra_matches: J,
#[case] non_matches: K,
) where
I: IntoIterator<Item = &'static str> + Clone,
J: IntoIterator<Item = &'static str> + Clone,
K: IntoIterator<Item = &'static str> + Clone,
{
let pointers: Vec<Pointer> = ref_tokens
.clone()
.into_iter()
.map(|t| format!("/{t}").try_into().unwrap())
.collect();
let g: Group = Group::from_pointers_ignore_case(pointers.clone());
for n in 1..=3 {
let mut mach = Machine::new(&g, false);
assert_eq!((StructAction::Enter, None), mach.obj_begin());
for (i, t) in ref_tokens.clone().into_iter().enumerate() {
let quoted = format!(r#""{t}""#);
mach.member_name(ChunkyContent::new("ed, n));
assert_eq!(
Some(&pointers[i]),
mach.primitive(),
"n={n}, i={i}/{}, ref_token={t:?}, pointer={}",
pointers.len(),
pointers[i]
);
}
for (j, x) in extra_matches.clone().into_iter().enumerate() {
let quoted = format!(r#""{x}""#);
mach.member_name(ChunkyContent::new("ed, n));
if let Some(p) = mach.primitive() {
let ref_token = p
.ref_tokens()
.next()
.expect("matched pointer should have a ref token");
let ref_token_case_folded = default_case_fold_str(ref_token.as_ref());
let extra_case_folded = default_case_fold_str(x);
assert_eq!(
ref_token_case_folded, extra_case_folded,
"n={n}, j={j}, ref_token={ref_token:?}, extra_match={x:?}, pointer={p}"
);
} else {
panic!(
"expected extra match, but didn't get it: n={n}, j={j}, extra_match={x:?}"
);
}
}
for (k, x) in non_matches.clone().into_iter().enumerate() {
let quoted = format!(r#""{x}""#);
mach.member_name(ChunkyContent::new("ed, n));
assert_eq!(None, mach.primitive(), "n={n}, k={k}, non_match={x:?}",);
}
}
}
#[derive(Copy, Clone, Debug)]
struct Chunky<'a> {
s: &'a str,
n: usize,
}
impl<'a> Chunky<'a> {
fn new(s: &'a str, n: usize) -> Self {
if n == 0 {
panic!("n can't be zero")
}
Self { s, n }
}
}
#[derive(Debug)]
struct ChunkyContent<'a> {
chunky: Chunky<'a>,
is_escaped: bool,
}
impl<'a> ChunkyContent<'a> {
fn new(s: &'a str, n: usize) -> Self {
Self {
chunky: Chunky::new(s, n),
is_escaped: false,
}
}
fn new_escaped(s: &'a str, n: usize) -> Self {
Self {
chunky: Chunky::new(s, n),
is_escaped: true,
}
}
}
impl<'a> lexical::Content for ChunkyContent<'a> {
type Literal<'b>
= ChunkyLit<'a>
where
Self: 'b;
fn literal<'b>(&'b self) -> Self::Literal<'b> {
ChunkyLit(self.chunky)
}
fn is_escaped(&self) -> bool {
self.is_escaped
}
fn unescaped<'b>(&'b self) -> lexical::Unescaped<Self::Literal<'b>> {
panic!("not implemented: not needed")
}
}
struct ChunkyLit<'a>(Chunky<'a>);
impl<'a> IntoBuf for ChunkyLit<'a> {
type Buf = ChunkyBuf<'a>;
fn into_buf(self) -> Self::Buf {
ChunkyBuf {
chunky: self.0,
pos: 0,
}
}
}
#[derive(Debug)]
struct ChunkyBuf<'a> {
chunky: Chunky<'a>,
pos: usize,
}
impl<'a> Buf for ChunkyBuf<'a> {
fn advance(&mut self, n: usize) {
if n > self.remaining() {
panic!(
"{}",
&BufUnderflow {
requested: n,
remaining: self.remaining()
}
);
}
self.pos += n;
}
fn chunk(&self) -> &[u8] {
let chunk_start = (self.pos / self.chunky.n) * self.chunky.n;
let chunk_end = (chunk_start + self.chunky.n).min(self.chunky.s.len());
&self.chunky.s.as_bytes()[self.pos..chunk_end]
}
fn remaining(&self) -> usize {
self.chunky.s.len() - self.pos
}
fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), BufUnderflow> {
if self.remaining() < dst.len() {
Err(BufUnderflow {
requested: dst.len(),
remaining: self.remaining(),
})
} else {
dst.copy_from_slice(&self.chunky.s.as_bytes()[self.pos..self.pos + dst.len()]);
self.pos += dst.len();
Ok(())
}
}
}
}