use std::sync::Arc;
use crate::index::StructIndex;
use crate::keys::{value_for_key as keys_value_for_key, KeyBitmaps, Role};
use crate::stage1::Kind;
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct TokenId(pub(crate) u32);
impl TokenId {
#[inline]
pub fn raw(self) -> u32 {
self.0
}
}
impl From<u32> for TokenId {
#[inline]
fn from(v: u32) -> Self {
Self(v)
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
#[non_exhaustive]
pub enum TokenKind {
Object,
Array,
Key,
String,
Scalar,
ObjectEnd,
ArrayEnd,
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub struct ByteSpan {
pub start: u32,
pub end: u32,
}
impl ByteSpan {
#[inline]
pub fn len(self) -> u32 {
self.end.saturating_sub(self.start)
}
#[inline]
pub fn is_empty(self) -> bool {
self.end <= self.start
}
#[inline]
pub fn slice<'a>(self, bytes: &'a [u8]) -> &'a [u8] {
&bytes[self.start as usize..self.end as usize]
}
}
#[derive(Debug)]
pub enum Error {
Parse(String),
UnbalancedClose,
Truncated,
InvalidUtf8,
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::Parse(s) => write!(f, "parse error: {s}"),
Error::UnbalancedClose => write!(f, "unbalanced container close"),
Error::Truncated => write!(f, "truncated input"),
Error::InvalidUtf8 => write!(f, "invalid UTF-8"),
}
}
}
impl std::error::Error for Error {}
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct BuildOptions {
pub keys: bool,
pub roles: bool,
pub close_of: bool,
pub tape_alignment: bool,
}
impl Default for BuildOptions {
fn default() -> Self {
Self {
keys: true,
roles: true,
close_of: true,
tape_alignment: false,
}
}
}
impl BuildOptions {
pub fn minimal() -> Self {
Self {
keys: false,
roles: false,
close_of: false,
tape_alignment: false,
}
}
pub fn keys_only() -> Self {
Self {
keys: true,
roles: true,
close_of: false,
tape_alignment: false,
}
}
pub fn for_jetro_tape() -> Self {
Self {
keys: true,
roles: true,
close_of: true,
tape_alignment: true,
}
}
}
pub struct StructuralIndex {
pub(crate) inner: Arc<Inner>,
}
pub(crate) struct Inner {
pub idx: StructIndex,
pub keys: Option<KeyBitmaps>,
}
const _: fn() = || {
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
assert_send::<StructuralIndex>();
assert_sync::<StructuralIndex>();
assert_send::<TokenId>();
assert_sync::<TokenId>();
assert_send::<ByteSpan>();
assert_sync::<ByteSpan>();
assert_send::<Error>();
assert_sync::<Error>();
assert_send::<BuildOptions>();
assert_sync::<BuildOptions>();
};
impl StructuralIndex {
pub fn token_count(&self) -> u32 {
self.inner.idx.stage1.len() as u32
}
pub fn max_depth(&self) -> u16 {
self.inner.idx.stage1.depth.iter().copied().max().unwrap_or(0)
}
pub fn tokens(&self) -> Tokens<'_> {
Tokens {
idx: self,
cur: 0,
end: self.token_count(),
}
}
#[inline]
pub fn kind(&self, tok: TokenId) -> TokenKind {
let i = tok.0 as usize;
let k = self.inner.idx.stage1.kind[i];
let role = self
.inner
.keys
.as_ref()
.map(|kb| kb.role[i])
.unwrap_or(Role::None);
match (k, role) {
(Kind::ObjOpen, _) => TokenKind::Object,
(Kind::ArrOpen, _) => TokenKind::Array,
(Kind::ObjClose, _) => TokenKind::ObjectEnd,
(Kind::ArrClose, _) => TokenKind::ArrayEnd,
(Kind::Quote, Role::Key) => TokenKind::Key,
(Kind::Quote, _) => TokenKind::String,
(Kind::Scalar, _) => TokenKind::Scalar,
(Kind::Colon | Kind::Comma, _) => TokenKind::Scalar, }
}
#[inline]
pub fn depth(&self, tok: TokenId) -> u16 {
self.inner.idx.stage1.depth[tok.0 as usize]
}
#[inline]
pub fn byte_offset(&self, tok: TokenId) -> u32 {
self.inner.idx.stage1.offset[tok.0 as usize]
}
pub fn byte_span(&self, tok: TokenId) -> ByteSpan {
let s = &self.inner.idx.stage1;
let i = tok.0 as usize;
let start = s.offset[i];
let end = match s.kind[i] {
Kind::ObjOpen | Kind::ArrOpen => {
let close = self.inner.idx.close_of[i];
if close >= 0 {
s.offset[close as usize] + 1
} else {
start + 1
}
}
Kind::ObjClose | Kind::ArrClose => start + 1,
Kind::Quote | Kind::Scalar => {
if i + 1 < s.offset.len() {
s.offset[i + 1]
} else {
start + 1
}
}
Kind::Colon | Kind::Comma => start + 1,
};
ByteSpan { start, end }
}
pub fn byte_span_in(&self, tok: TokenId, bytes: &[u8]) -> ByteSpan {
let s = &self.inner.idx.stage1;
let i = tok.0 as usize;
let start = s.offset[i];
let end = match s.kind[i] {
Kind::ObjOpen | Kind::ArrOpen => {
let close = self.inner.idx.close_of[i];
if close >= 0 {
s.offset[close as usize] + 1
} else {
start + 1
}
}
Kind::ObjClose | Kind::ArrClose => start + 1,
Kind::Quote => scan_string_end(bytes, start),
Kind::Scalar => scan_scalar_end(bytes, start),
Kind::Colon | Kind::Comma => start + 1,
};
ByteSpan { start, end }
}
pub fn parent(&self, tok: TokenId) -> Option<TokenId> {
let p = self.inner.idx.parent[tok.0 as usize];
if p < 0 {
None
} else {
Some(TokenId(p as u32))
}
}
pub fn close_of(&self, container: TokenId) -> Option<TokenId> {
let c = self.inner.idx.close_of[container.0 as usize];
if c < 0 {
None
} else {
Some(TokenId(c as u32))
}
}
pub fn tape_index(&self, tok: TokenId) -> Option<u32> {
let t = self.inner.idx.tape_of[tok.0 as usize];
if t == u32::MAX {
None
} else {
Some(t)
}
}
pub fn container_at_byte(&self, pos: u32) -> Option<TokenId> {
self.inner.idx.container_at(pos).map(TokenId)
}
pub fn ancestors(&self, tok: TokenId) -> Ancestors<'_> {
Ancestors {
parent: &self.inner.idx.parent,
cur: self.inner.idx.parent[tok.0 as usize],
}
}
pub fn slice<'a>(&self, bytes: &'a [u8], tok: TokenId) -> &'a [u8] {
let span = self.byte_span_in(tok, bytes);
&bytes[span.start as usize..(span.end as usize).min(bytes.len())]
}
pub fn has_keys(&self) -> bool {
self.inner.keys.is_some()
}
pub fn keys_named<'a>(&'a self, name: &str, depth: Option<u16>) -> KeyHits<'a> {
let kb = match &self.inner.keys {
Some(k) => k,
None => return KeyHits::empty(),
};
let id = match kb.by_name.get(name) {
Some(&id) => id,
None => return KeyHits::empty(),
};
let bm = &kb.bitmaps[id as usize];
let result = match depth.and_then(|d| kb.depth_bitmaps.get(d as usize)) {
Some(dm) => bm.and(dm),
None => bm.clone(),
};
KeyHits::from_bitmap(result)
}
pub fn has_key(&self, name: &str) -> bool {
self.inner
.keys
.as_ref()
.map(|k| k.by_name.contains_key(name))
.unwrap_or(false)
}
pub fn keys_seen(&self) -> impl Iterator<Item = &str> + '_ {
self.inner
.keys
.as_ref()
.into_iter()
.flat_map(|k| k.dict.iter().map(|s| &**s))
}
pub fn value_for_key(&self, key_tok: TokenId) -> Option<TokenId> {
keys_value_for_key(&self.inner.idx, key_tok.0).map(TokenId)
}
pub fn keys_named_in<'a>(&'a self, name: &str, root: TokenId) -> KeyHits<'a> {
let kb = match &self.inner.keys {
Some(k) => k,
None => return KeyHits::empty(),
};
let id = match kb.by_name.get(name) {
Some(&id) => id,
None => return KeyHits::empty(),
};
let close = self
.close_of(root)
.map(|t| t.0)
.unwrap_or(self.token_count().saturating_sub(1));
let bm = &kb.bitmaps[id as usize];
KeyHits::bounded(bm, root.0, close)
}
pub fn field_of(&self, parent: TokenId, name: &str) -> Option<TokenId> {
let kb = self.inner.keys.as_ref()?;
let id = *kb.by_name.get(name)?;
let bm = &kb.bitmaps[id as usize];
let close = self
.close_of(parent)
.map(|t| t.0)
.unwrap_or_else(|| self.token_count().saturating_sub(1));
let lo = parent.0.saturating_add(1);
if lo > close {
return None;
}
let mut cur = bm.cursor();
cur.reset_at_or_after(lo);
while let Some(v) = cur.current() {
if v > close {
break;
}
let k_tok = TokenId(v);
if self.parent(k_tok) == Some(parent) {
return self.value_for_key(k_tok);
}
cur.move_next();
}
None
}
pub fn subtree_bitmap(&self, root: TokenId) -> croaring::Bitmap {
let close = self
.close_of(root)
.map(|t| t.0)
.unwrap_or(self.token_count().saturating_sub(1));
let mut out = croaring::Bitmap::new();
out.add_range(root.0..=close);
out
}
}
pub struct Tokens<'a> {
idx: &'a StructuralIndex,
cur: u32,
end: u32,
}
impl<'a> Iterator for Tokens<'a> {
type Item = TokenId;
fn next(&mut self) -> Option<TokenId> {
if self.cur >= self.end {
return None;
}
let _ = self.idx;
let t = TokenId(self.cur);
self.cur += 1;
Some(t)
}
}
pub struct Ancestors<'a> {
parent: &'a [i32],
cur: i32,
}
impl<'a> Iterator for Ancestors<'a> {
type Item = TokenId;
fn next(&mut self) -> Option<TokenId> {
if self.cur < 0 {
return None;
}
let out = TokenId(self.cur as u32);
self.cur = self.parent[self.cur as usize];
Some(out)
}
}
pub struct KeyHits<'a> {
state: KeyHitsState<'a>,
}
enum KeyHitsState<'a> {
Empty,
Owned {
bitmap: croaring::Bitmap,
cache: Option<Vec<u32>>,
pos: usize,
},
Bounded {
bitmap: &'a croaring::Bitmap,
cursor: Option<croaring::bitmap::BitmapCursor<'a>>,
lo: u32,
hi: u32,
started: bool,
},
}
impl<'a> KeyHits<'a> {
fn from_bitmap(bm: croaring::Bitmap) -> Self {
Self {
state: KeyHitsState::Owned {
bitmap: bm,
cache: None,
pos: 0,
},
}
}
pub(crate) fn bounded(bitmap: &'a croaring::Bitmap, lo: u32, hi: u32) -> Self {
if lo > hi {
return Self::empty();
}
Self {
state: KeyHitsState::Bounded {
bitmap,
cursor: None,
lo,
hi,
started: false,
},
}
}
fn empty() -> Self {
Self {
state: KeyHitsState::Empty,
}
}
fn into_owned_bitmap(self) -> Option<croaring::Bitmap> {
match self.state {
KeyHitsState::Empty => None,
KeyHitsState::Owned { bitmap, .. } => Some(bitmap),
KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
let mut range = croaring::Bitmap::new();
range.add_range(lo..=hi);
let mut out = bitmap.clone();
out.and_inplace(&range);
Some(out)
}
}
}
pub fn at_depth(self, _depth: u16) -> Self {
self
}
pub fn and(self, other: KeyHits<'a>) -> Self {
match (self.into_owned_bitmap(), other.into_owned_bitmap()) {
(Some(mut a), Some(b)) => {
a.and_inplace(&b);
Self::from_bitmap(a)
}
_ => Self::empty(),
}
}
pub fn or(self, other: KeyHits<'a>) -> Self {
match (self.into_owned_bitmap(), other.into_owned_bitmap()) {
(Some(mut a), Some(b)) => {
a.or_inplace(&b);
Self::from_bitmap(a)
}
(Some(a), None) | (None, Some(a)) => Self::from_bitmap(a),
_ => Self::empty(),
}
}
pub fn count(self) -> u64 {
match self.state {
KeyHitsState::Empty => 0,
KeyHitsState::Owned { bitmap, .. } => bitmap.cardinality(),
KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
bitmap.range_cardinality(lo..=hi)
}
}
}
pub fn first(self) -> Option<TokenId> {
match self.state {
KeyHitsState::Empty => None,
KeyHitsState::Owned { bitmap, .. } => bitmap.minimum().map(TokenId),
KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
let mut cur = bitmap.cursor();
cur.reset_at_or_after(lo);
cur.current().filter(|&v| v <= hi).map(TokenId)
}
}
}
pub fn last(self) -> Option<TokenId> {
match self.state {
KeyHitsState::Empty => None,
KeyHitsState::Owned { bitmap, .. } => bitmap.maximum().map(TokenId),
KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
let mut cur = bitmap.cursor();
cur.reset_at_or_after(lo);
let mut last = None;
while let Some(v) = cur.current() {
if v > hi {
break;
}
last = Some(TokenId(v));
cur.move_next();
}
last
}
}
}
pub fn collect_into(self, buf: &mut Vec<TokenId>) {
match self.state {
KeyHitsState::Empty => {}
KeyHitsState::Owned { bitmap, .. } => {
buf.extend(bitmap.to_vec().into_iter().map(TokenId));
}
KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
let mut cur = bitmap.cursor();
cur.reset_at_or_after(lo);
while let Some(v) = cur.current() {
if v > hi {
break;
}
buf.push(TokenId(v));
cur.move_next();
}
}
}
}
}
impl<'a> Iterator for KeyHits<'a> {
type Item = TokenId;
fn next(&mut self) -> Option<TokenId> {
match &mut self.state {
KeyHitsState::Empty => None,
KeyHitsState::Owned { bitmap, cache, pos } => {
if cache.is_none() {
*cache = Some(bitmap.to_vec());
}
let v = cache.as_ref()?.get(*pos).copied()?;
*pos += 1;
Some(TokenId(v))
}
KeyHitsState::Bounded {
bitmap,
cursor,
lo,
hi,
started,
} => {
if !*started {
let mut c = bitmap.cursor();
c.reset_at_or_after(*lo);
*cursor = Some(c);
*started = true;
} else if let Some(c) = cursor.as_mut() {
c.move_next();
}
let c = cursor.as_ref()?;
let v = c.current()?;
if v > *hi {
return None;
}
Some(TokenId(v))
}
}
}
}
pub fn from_bytes(bytes: &[u8]) -> Result<StructuralIndex, Error> {
from_bytes_with(bytes, BuildOptions::default())
}
pub fn from_bytes_with(bytes: &[u8], opts: BuildOptions) -> Result<StructuralIndex, Error> {
let idx = StructIndex::build(bytes).map_err(|s| Error::Parse(s.to_string()))?;
let keys = if opts.keys {
Some(KeyBitmaps::build(&idx, bytes))
} else {
None
};
Ok(StructuralIndex {
inner: Arc::new(Inner { idx, keys }),
})
}
pub fn find_eq<'a>(
idx: &'a StructuralIndex,
bytes: &'a [u8],
key: &str,
literal: &[u8],
) -> impl Iterator<Item = TokenId> + 'a {
let key_hits = idx.keys_named(key, None);
let bytes_ref = bytes;
let literal_ref = literal.to_vec();
let idx_ref = idx;
key_hits
.filter_map(move |k_tok| {
let v_tok = idx_ref.value_for_key(k_tok)?;
let span = idx_ref.byte_span_in(v_tok, bytes_ref);
let v_bytes = &bytes_ref[span.start as usize..span.end as usize];
if value_matches(v_bytes, &literal_ref) {
idx_ref.parent(k_tok)
} else {
None
}
})
}
pub fn count_key(idx: &StructuralIndex, key: &str) -> u64 {
idx.keys_named(key, None).count()
}
pub fn find_eq_compound<'a>(
idx: &'a StructuralIndex,
bytes: &'a [u8],
conds: &'a [(&str, &[u8])],
) -> impl Iterator<Item = TokenId> + 'a {
let mut cands: Vec<TokenId> = Vec::new();
if let Some((first_key, _)) = conds.first() {
idx.keys_named(first_key, None).collect_into(&mut cands);
}
cands.into_iter().filter_map(move |k_tok| {
let parent_obj = idx.parent(k_tok)?;
for (key, lit) in conds.iter() {
let mut matched = false;
for k in idx.keys_named(key, None) {
if idx.parent(k) == Some(parent_obj) {
if let Some(v) = idx.value_for_key(k) {
let span = idx.byte_span_in(v, bytes);
let v_bytes = &bytes[span.start as usize..span.end as usize];
if value_matches(v_bytes, lit) {
matched = true;
break;
}
}
}
}
if !matched {
return None;
}
}
Some(parent_obj)
})
}
pub fn json_string_eq(value: &[u8], literal: &[u8]) -> bool {
value_matches(value, literal)
}
fn value_matches(value: &[u8], literal: &[u8]) -> bool {
if value.len() >= 2 && value[0] == b'"' && value[value.len() - 1] == b'"' {
let body = &value[1..value.len() - 1];
if memchr::memchr(b'\\', body).is_some() {
return slow_decode_eq(body, literal);
}
body == literal
} else {
value == literal
}
}
fn slow_decode_eq(body: &[u8], literal: &[u8]) -> bool {
let mut i = 0;
let mut j = 0;
while i < body.len() && j < literal.len() {
let (decoded, consumed) = match body[i] {
b'\\' if i + 1 < body.len() => match body[i + 1] {
b'"' => (b'"', 2),
b'\\' => (b'\\', 2),
b'/' => (b'/', 2),
b'n' => (b'\n', 2),
b't' => (b'\t', 2),
b'r' => (b'\r', 2),
b'b' => (b'\x08', 2),
b'f' => (b'\x0c', 2),
_ => return false, },
c => (c, 1),
};
if decoded != literal[j] {
return false;
}
i += consumed;
j += 1;
}
i == body.len() && j == literal.len()
}
fn scan_string_end(bytes: &[u8], start: u32) -> u32 {
let mut i = (start + 1) as usize;
while i < bytes.len() {
match bytes[i] {
b'\\' => i = i.saturating_add(2),
b'"' => return (i + 1) as u32,
_ => i += 1,
}
}
bytes.len() as u32
}
fn scan_scalar_end(bytes: &[u8], start: u32) -> u32 {
let mut i = start as usize;
while i < bytes.len() {
match bytes[i] {
b',' | b'}' | b']' | b' ' | b'\t' | b'\n' | b'\r' => return i as u32,
_ => i += 1,
}
}
bytes.len() as u32
}
pub fn parse_f64(bytes: &[u8]) -> Option<f64> {
#[cfg(feature = "fast-numbers")]
{
fast_float::parse(bytes).ok()
}
#[cfg(not(feature = "fast-numbers"))]
{
std::str::from_utf8(bytes).ok()?.parse::<f64>().ok()
}
}
pub fn parse_i64(bytes: &[u8]) -> Option<i64> {
if let Ok(s) = std::str::from_utf8(bytes) {
if let Ok(n) = s.parse::<i64>() {
return Some(n);
}
}
parse_f64(bytes).map(|f| f as i64)
}
pub fn json_number_eq(value: &[u8], literal: &[u8]) -> bool {
match (parse_f64(value), parse_f64(literal)) {
(Some(a), Some(b)) => a == b,
_ => false,
}
}
#[cfg(feature = "multi-key")]
pub fn multi_key_finder(keys: &[&str]) -> aho_corasick::AhoCorasick {
use aho_corasick::AhoCorasickBuilder;
let patterns: Vec<String> = keys.iter().map(|k| format!("\"{}\":", k)).collect();
AhoCorasickBuilder::new()
.ascii_case_insensitive(false)
.build(&patterns)
.expect("aho-corasick build")
}
#[cfg(feature = "multi-key")]
pub fn multi_key_scan<'a>(
finder: &'a aho_corasick::AhoCorasick,
bytes: &'a [u8],
) -> impl Iterator<Item = (usize, usize)> + 'a {
finder.find_iter(bytes).map(|m| (m.pattern().as_usize(), m.start()))
}
#[cfg(feature = "validate-utf8")]
pub fn validate_utf8(bytes: &[u8]) -> Result<(), Error> {
simdutf8::basic::from_utf8(bytes)
.map(|_| ())
.map_err(|_| Error::InvalidUtf8)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn from_bytes_basic_roundtrip() {
let buf = br#"{"a":1,"b":"hi","c":[1,2,3]}"#;
let idx = from_bytes(buf).unwrap();
assert!(idx.token_count() > 0);
assert!(idx.has_keys());
let mut keys: Vec<&str> = idx.keys_seen().collect();
keys.sort();
assert_eq!(keys, vec!["a", "b", "c"]);
}
#[test]
fn keys_named_returns_token_ids() {
let buf = br#"{"x":1,"y":2,"x":3}"#;
let idx = from_bytes(buf).unwrap();
let xs: Vec<TokenId> = idx.keys_named("x", None).collect();
assert!(!xs.is_empty(), "expected at least one 'x' match");
for t in &xs {
assert_eq!(idx.kind(*t), TokenKind::Key);
}
}
#[test]
fn count_key_uses_popcount() {
let buf = br#"{"a":{"x":1},"b":{"x":2},"c":{"x":3,"y":4}}"#;
let idx = from_bytes(buf).unwrap();
let c = count_key(&idx, "x");
assert_eq!(c, 3);
let cy = count_key(&idx, "y");
assert_eq!(cy, 1);
}
#[test]
fn key_hits_first_short_circuits() {
let buf = br#"{"a":1,"b":2,"c":3,"d":4}"#;
let idx = from_bytes(buf).unwrap();
let first = idx.keys_named("c", None).first();
assert!(first.is_some());
assert_eq!(idx.kind(first.unwrap()), TokenKind::Key);
}
#[test]
fn find_eq_returns_enclosing_objects() {
let buf = br#"{"a":{"x":"test"},"b":{"x":"nope"},"c":{"x":"test"}}"#;
let idx = from_bytes(buf).unwrap();
let hits: Vec<TokenId> = find_eq(&idx, buf, "x", b"test").collect();
assert_eq!(hits.len(), 2);
for t in &hits {
assert_eq!(idx.kind(*t), TokenKind::Object);
}
}
#[test]
fn container_at_byte_works() {
let buf = br#"{"a":{"x":1},"b":2}"#;
let idx = from_bytes(buf).unwrap();
let c = idx.container_at_byte(9).unwrap();
assert_eq!(idx.kind(c), TokenKind::Object);
assert!(idx.depth(c) >= 1);
}
#[test]
fn build_options_minimal_skips_keys() {
let buf = br#"{"a":1}"#;
let idx = from_bytes_with(buf, BuildOptions::minimal()).unwrap();
assert!(!idx.has_keys());
assert_eq!(idx.keys_named("a", None).count(), 0);
}
#[test]
fn ancestors_walks_to_root() {
let buf = br#"{"a":{"b":{"c":42}}}"#;
let idx = from_bytes(buf).unwrap();
let scalar_tok = idx
.tokens()
.find(|t| idx.kind(*t) == TokenKind::Scalar)
.unwrap();
let chain: Vec<TokenId> = idx.ancestors(scalar_tok).collect();
assert!(!chain.is_empty());
assert!(idx.parent(*chain.last().unwrap()).is_none());
}
#[test]
fn json_string_eq_handles_quotes() {
assert!(json_string_eq(b"\"hello\"", b"hello"));
assert!(!json_string_eq(b"\"hello\"", b"world"));
assert!(json_string_eq(b"\"he\\nllo\"", b"he\nllo"));
assert!(json_string_eq(b"42", b"42"));
assert!(json_string_eq(b"true", b"true"));
}
#[test]
fn find_eq_compound_intersect() {
let buf = br#"[{"k1":"a","k2":"b"},{"k1":"a","k2":"c"},{"k1":"x","k2":"b"}]"#;
let idx = from_bytes(buf).unwrap();
let conds: &[(&str, &[u8])] = &[("k1", b"a"), ("k2", b"b")];
let hits: Vec<TokenId> = find_eq_compound(&idx, buf, conds).collect();
assert_eq!(hits.len(), 1, "exactly one obj should match both conds");
}
#[test]
fn json_string_eq_handles_simple_escapes() {
assert!(json_string_eq(b"\"a\\nb\"", b"a\nb"));
assert!(json_string_eq(b"\"a\\\\b\"", b"a\\b"));
assert!(json_string_eq(b"\"a\\\"b\"", b"a\"b"));
assert!(!json_string_eq(b"\"a\\nb\"", b"axb"));
assert!(!json_string_eq(b"\"\\u0041\"", b"A"));
}
#[test]
fn parse_f64_works_with_or_without_fast_numbers() {
assert_eq!(parse_f64(b"3.14"), Some(3.14));
assert_eq!(parse_f64(b"-1e10"), Some(-1e10));
assert_eq!(parse_f64(b"42"), Some(42.0));
assert_eq!(parse_f64(b"not a number"), None);
}
#[test]
fn parse_i64_falls_back_to_f64_truncate() {
assert_eq!(parse_i64(b"42"), Some(42));
assert_eq!(parse_i64(b"-7"), Some(-7));
assert_eq!(parse_i64(b"3.9"), Some(3)); }
#[test]
fn json_number_eq_compares_numerically() {
assert!(json_number_eq(b"42", b"42"));
assert!(json_number_eq(b"42.0", b"42")); assert!(!json_number_eq(b"42", b"43"));
assert!(!json_number_eq(b"abc", b"42"));
}
#[cfg(feature = "multi-key")]
#[test]
fn multi_key_finder_matches_top_level_keys() {
let keys = ["type", "actor", "repo"];
let finder = multi_key_finder(&keys);
let bytes = br#"{"type":"PushEvent","actor":{"login":"x"},"repo":{"name":"y"}}"#;
let hits: Vec<(usize, usize)> = multi_key_scan(&finder, bytes).collect();
assert_eq!(hits.len(), 3);
let pattern_indices: Vec<usize> = hits.iter().map(|(p, _)| *p).collect();
assert_eq!(pattern_indices, vec![0, 1, 2]);
}
#[cfg(feature = "validate-utf8")]
#[test]
fn validate_utf8_accepts_valid_input() {
assert!(validate_utf8(b"hello").is_ok());
assert!(validate_utf8(b"\xff\xfe").is_err()); }
}