1use std::sync::Arc;
31
32use crate::index::StructIndex;
33use crate::keys::{value_for_key as keys_value_for_key, KeyBitmaps, Role};
34use crate::stage1::Kind;
35
36#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
38pub struct TokenId(pub(crate) u32);
39
40impl TokenId {
41 #[inline]
42 pub fn raw(self) -> u32 {
43 self.0
44 }
45}
46
47impl From<u32> for TokenId {
48 #[inline]
49 fn from(v: u32) -> Self {
50 Self(v)
51 }
52}
53
54#[derive(Copy, Clone, Eq, PartialEq, Debug)]
55#[non_exhaustive]
56pub enum TokenKind {
57 Object,
58 Array,
59 Key,
60 String,
61 Scalar,
62 ObjectEnd,
63 ArrayEnd,
64}
65
66#[derive(Copy, Clone, Eq, PartialEq, Debug)]
67pub struct ByteSpan {
68 pub start: u32,
69 pub end: u32,
70}
71
72impl ByteSpan {
73 #[inline]
74 pub fn len(self) -> u32 {
75 self.end.saturating_sub(self.start)
76 }
77 #[inline]
78 pub fn is_empty(self) -> bool {
79 self.end <= self.start
80 }
81 #[inline]
82 pub fn slice<'a>(self, bytes: &'a [u8]) -> &'a [u8] {
83 &bytes[self.start as usize..self.end as usize]
84 }
85}
86
87#[derive(Debug)]
89pub enum Error {
90 Parse(String),
91 UnbalancedClose,
92 Truncated,
93 InvalidUtf8,
94}
95
96impl std::fmt::Display for Error {
97 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98 match self {
99 Error::Parse(s) => write!(f, "parse error: {s}"),
100 Error::UnbalancedClose => write!(f, "unbalanced container close"),
101 Error::Truncated => write!(f, "truncated input"),
102 Error::InvalidUtf8 => write!(f, "invalid UTF-8"),
103 }
104 }
105}
106
107impl std::error::Error for Error {}
108
109#[derive(Clone, Debug)]
110#[non_exhaustive]
111pub struct BuildOptions {
112 pub keys: bool,
113 pub roles: bool,
114 pub close_of: bool,
115 pub tape_alignment: bool,
116}
117
118impl Default for BuildOptions {
119 fn default() -> Self {
120 Self {
121 keys: true,
122 roles: true,
123 close_of: true,
124 tape_alignment: false,
125 }
126 }
127}
128
129impl BuildOptions {
130 pub fn minimal() -> Self {
131 Self {
132 keys: false,
133 roles: false,
134 close_of: false,
135 tape_alignment: false,
136 }
137 }
138
139 pub fn keys_only() -> Self {
140 Self {
141 keys: true,
142 roles: true,
143 close_of: false,
144 tape_alignment: false,
145 }
146 }
147
148 pub fn for_jetro_tape() -> Self {
149 Self {
150 keys: true,
151 roles: true,
152 close_of: true,
153 tape_alignment: true,
154 }
155 }
156}
157
158
159pub struct StructuralIndex {
162 pub(crate) inner: Arc<Inner>,
163}
164
165pub(crate) struct Inner {
166 pub idx: StructIndex,
167 pub keys: Option<KeyBitmaps>,
168}
169
170const _: fn() = || {
175 fn assert_send<T: Send>() {}
176 fn assert_sync<T: Sync>() {}
177 assert_send::<StructuralIndex>();
178 assert_sync::<StructuralIndex>();
179 assert_send::<TokenId>();
180 assert_sync::<TokenId>();
181 assert_send::<ByteSpan>();
182 assert_sync::<ByteSpan>();
183 assert_send::<Error>();
184 assert_sync::<Error>();
185 assert_send::<BuildOptions>();
186 assert_sync::<BuildOptions>();
187};
188
189impl StructuralIndex {
190 pub fn token_count(&self) -> u32 {
192 self.inner.idx.stage1.len() as u32
193 }
194
195 pub fn max_depth(&self) -> u16 {
197 self.inner.idx.stage1.depth.iter().copied().max().unwrap_or(0)
198 }
199
200 pub fn tokens(&self) -> Tokens<'_> {
202 Tokens {
203 idx: self,
204 cur: 0,
205 end: self.token_count(),
206 }
207 }
208
209 #[inline]
213 pub fn kind(&self, tok: TokenId) -> TokenKind {
214 let i = tok.0 as usize;
215 let k = self.inner.idx.stage1.kind[i];
216 let role = self
217 .inner
218 .keys
219 .as_ref()
220 .map(|kb| kb.role[i])
221 .unwrap_or(Role::None);
222 match (k, role) {
223 (Kind::ObjOpen, _) => TokenKind::Object,
224 (Kind::ArrOpen, _) => TokenKind::Array,
225 (Kind::ObjClose, _) => TokenKind::ObjectEnd,
226 (Kind::ArrClose, _) => TokenKind::ArrayEnd,
227 (Kind::Quote, Role::Key) => TokenKind::Key,
228 (Kind::Quote, _) => TokenKind::String,
229 (Kind::Scalar, _) => TokenKind::Scalar,
230 (Kind::Colon | Kind::Comma, _) => TokenKind::Scalar, }
232 }
233
234 #[inline]
236 pub fn depth(&self, tok: TokenId) -> u16 {
237 self.inner.idx.stage1.depth[tok.0 as usize]
238 }
239
240 #[inline]
242 pub fn byte_offset(&self, tok: TokenId) -> u32 {
243 self.inner.idx.stage1.offset[tok.0 as usize]
244 }
245
246 pub fn byte_span(&self, tok: TokenId) -> ByteSpan {
255 let s = &self.inner.idx.stage1;
256 let i = tok.0 as usize;
257 let start = s.offset[i];
258
259 let end = match s.kind[i] {
260 Kind::ObjOpen | Kind::ArrOpen => {
261 let close = self.inner.idx.close_of[i];
262 if close >= 0 {
263 s.offset[close as usize] + 1
264 } else {
265 start + 1
266 }
267 }
268 Kind::ObjClose | Kind::ArrClose => start + 1,
269 Kind::Quote | Kind::Scalar => {
270 if i + 1 < s.offset.len() {
271 s.offset[i + 1]
272 } else {
273 start + 1
274 }
275 }
276 Kind::Colon | Kind::Comma => start + 1,
277 };
278 ByteSpan { start, end }
279 }
280
281 pub fn byte_span_in(&self, tok: TokenId, bytes: &[u8]) -> ByteSpan {
284 let s = &self.inner.idx.stage1;
285 let i = tok.0 as usize;
286 let start = s.offset[i];
287 let end = match s.kind[i] {
288 Kind::ObjOpen | Kind::ArrOpen => {
289 let close = self.inner.idx.close_of[i];
290 if close >= 0 {
291 s.offset[close as usize] + 1
292 } else {
293 start + 1
294 }
295 }
296 Kind::ObjClose | Kind::ArrClose => start + 1,
297 Kind::Quote => scan_string_end(bytes, start),
298 Kind::Scalar => scan_scalar_end(bytes, start),
299 Kind::Colon | Kind::Comma => start + 1,
300 };
301 ByteSpan { start, end }
302 }
303
304 pub fn parent(&self, tok: TokenId) -> Option<TokenId> {
305 let p = self.inner.idx.parent[tok.0 as usize];
306 if p < 0 {
307 None
308 } else {
309 Some(TokenId(p as u32))
310 }
311 }
312
313 pub fn close_of(&self, container: TokenId) -> Option<TokenId> {
314 let c = self.inner.idx.close_of[container.0 as usize];
315 if c < 0 {
316 None
317 } else {
318 Some(TokenId(c as u32))
319 }
320 }
321
322 pub fn tape_index(&self, tok: TokenId) -> Option<u32> {
323 let t = self.inner.idx.tape_of[tok.0 as usize];
324 if t == u32::MAX {
325 None
326 } else {
327 Some(t)
328 }
329 }
330
331 pub fn container_at_byte(&self, pos: u32) -> Option<TokenId> {
334 self.inner.idx.container_at(pos).map(TokenId)
335 }
336
337 pub fn ancestors(&self, tok: TokenId) -> Ancestors<'_> {
339 Ancestors {
340 parent: &self.inner.idx.parent,
341 cur: self.inner.idx.parent[tok.0 as usize],
342 }
343 }
344
345 pub fn slice<'a>(&self, bytes: &'a [u8], tok: TokenId) -> &'a [u8] {
346 let span = self.byte_span_in(tok, bytes);
347 &bytes[span.start as usize..(span.end as usize).min(bytes.len())]
348 }
349
350 pub fn has_keys(&self) -> bool {
353 self.inner.keys.is_some()
354 }
355
356 pub fn keys_named<'a>(&'a self, name: &str, depth: Option<u16>) -> KeyHits<'a> {
357 let kb = match &self.inner.keys {
358 Some(k) => k,
359 None => return KeyHits::empty(),
360 };
361 let id = match kb.by_name.get(name) {
362 Some(&id) => id,
363 None => return KeyHits::empty(),
364 };
365 let bm = &kb.bitmaps[id as usize];
366 let result = match depth.and_then(|d| kb.depth_bitmaps.get(d as usize)) {
367 Some(dm) => bm.and(dm),
368 None => bm.clone(),
369 };
370 KeyHits::from_bitmap(result)
371 }
372
373 pub fn has_key(&self, name: &str) -> bool {
374 self.inner
375 .keys
376 .as_ref()
377 .map(|k| k.by_name.contains_key(name))
378 .unwrap_or(false)
379 }
380
381 pub fn keys_seen(&self) -> impl Iterator<Item = &str> + '_ {
382 self.inner
383 .keys
384 .as_ref()
385 .into_iter()
386 .flat_map(|k| k.dict.iter().map(|s| &**s))
387 }
388
389 pub fn value_for_key(&self, key_tok: TokenId) -> Option<TokenId> {
390 keys_value_for_key(&self.inner.idx, key_tok.0).map(TokenId)
391 }
392
393 pub fn keys_named_in<'a>(&'a self, name: &str, root: TokenId) -> KeyHits<'a> {
401 let kb = match &self.inner.keys {
402 Some(k) => k,
403 None => return KeyHits::empty(),
404 };
405 let id = match kb.by_name.get(name) {
406 Some(&id) => id,
407 None => return KeyHits::empty(),
408 };
409 let close = self
410 .close_of(root)
411 .map(|t| t.0)
412 .unwrap_or(self.token_count().saturating_sub(1));
413 let bm = &kb.bitmaps[id as usize];
414 KeyHits::bounded(bm, root.0, close)
415 }
416
417 pub fn field_of(&self, parent: TokenId, name: &str) -> Option<TokenId> {
426 let kb = self.inner.keys.as_ref()?;
427 let id = *kb.by_name.get(name)?;
428 let bm = &kb.bitmaps[id as usize];
429 let close = self
430 .close_of(parent)
431 .map(|t| t.0)
432 .unwrap_or_else(|| self.token_count().saturating_sub(1));
433 let lo = parent.0.saturating_add(1);
434 if lo > close {
435 return None;
436 }
437 let mut cur = bm.cursor();
438 cur.reset_at_or_after(lo);
439 while let Some(v) = cur.current() {
440 if v > close {
441 break;
442 }
443 let k_tok = TokenId(v);
444 if self.parent(k_tok) == Some(parent) {
445 return self.value_for_key(k_tok);
446 }
447 cur.move_next();
448 }
449 None
450 }
451
452 pub fn subtree_bitmap(&self, root: TokenId) -> croaring::Bitmap {
455 let close = self
456 .close_of(root)
457 .map(|t| t.0)
458 .unwrap_or(self.token_count().saturating_sub(1));
459 let mut out = croaring::Bitmap::new();
460 out.add_range(root.0..=close);
461 out
462 }
463}
464
465
466pub struct Tokens<'a> {
467 idx: &'a StructuralIndex,
468 cur: u32,
469 end: u32,
470}
471
472impl<'a> Iterator for Tokens<'a> {
473 type Item = TokenId;
474 fn next(&mut self) -> Option<TokenId> {
475 if self.cur >= self.end {
476 return None;
477 }
478 let _ = self.idx;
479 let t = TokenId(self.cur);
480 self.cur += 1;
481 Some(t)
482 }
483}
484
485pub struct Ancestors<'a> {
486 parent: &'a [i32],
487 cur: i32,
488}
489
490impl<'a> Iterator for Ancestors<'a> {
491 type Item = TokenId;
492 fn next(&mut self) -> Option<TokenId> {
493 if self.cur < 0 {
494 return None;
495 }
496 let out = TokenId(self.cur as u32);
497 self.cur = self.parent[self.cur as usize];
498 Some(out)
499 }
500}
501
502pub struct KeyHits<'a> {
515 state: KeyHitsState<'a>,
516}
517
518enum KeyHitsState<'a> {
519 Empty,
520 Owned {
521 bitmap: croaring::Bitmap,
522 cache: Option<Vec<u32>>,
523 pos: usize,
524 },
525 Bounded {
526 bitmap: &'a croaring::Bitmap,
527 cursor: Option<croaring::bitmap::BitmapCursor<'a>>,
528 lo: u32,
529 hi: u32,
530 started: bool,
531 },
532}
533
534impl<'a> KeyHits<'a> {
535 fn from_bitmap(bm: croaring::Bitmap) -> Self {
536 Self {
537 state: KeyHitsState::Owned {
538 bitmap: bm,
539 cache: None,
540 pos: 0,
541 },
542 }
543 }
544
545 pub(crate) fn bounded(bitmap: &'a croaring::Bitmap, lo: u32, hi: u32) -> Self {
546 if lo > hi {
547 return Self::empty();
548 }
549 Self {
550 state: KeyHitsState::Bounded {
551 bitmap,
552 cursor: None,
553 lo,
554 hi,
555 started: false,
556 },
557 }
558 }
559
560 fn empty() -> Self {
561 Self {
562 state: KeyHitsState::Empty,
563 }
564 }
565
566 fn into_owned_bitmap(self) -> Option<croaring::Bitmap> {
569 match self.state {
570 KeyHitsState::Empty => None,
571 KeyHitsState::Owned { bitmap, .. } => Some(bitmap),
572 KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
573 let mut range = croaring::Bitmap::new();
574 range.add_range(lo..=hi);
575 let mut out = bitmap.clone();
576 out.and_inplace(&range);
577 Some(out)
578 }
579 }
580 }
581
582 pub fn at_depth(self, _depth: u16) -> Self {
583 self
584 }
585
586 pub fn and(self, other: KeyHits<'a>) -> Self {
587 match (self.into_owned_bitmap(), other.into_owned_bitmap()) {
588 (Some(mut a), Some(b)) => {
589 a.and_inplace(&b);
590 Self::from_bitmap(a)
591 }
592 _ => Self::empty(),
593 }
594 }
595
596 pub fn or(self, other: KeyHits<'a>) -> Self {
597 match (self.into_owned_bitmap(), other.into_owned_bitmap()) {
598 (Some(mut a), Some(b)) => {
599 a.or_inplace(&b);
600 Self::from_bitmap(a)
601 }
602 (Some(a), None) | (None, Some(a)) => Self::from_bitmap(a),
603 _ => Self::empty(),
604 }
605 }
606
607 pub fn count(self) -> u64 {
608 match self.state {
609 KeyHitsState::Empty => 0,
610 KeyHitsState::Owned { bitmap, .. } => bitmap.cardinality(),
611 KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
612 bitmap.range_cardinality(lo..=hi)
613 }
614 }
615 }
616
617 pub fn first(self) -> Option<TokenId> {
618 match self.state {
619 KeyHitsState::Empty => None,
620 KeyHitsState::Owned { bitmap, .. } => bitmap.minimum().map(TokenId),
621 KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
622 let mut cur = bitmap.cursor();
623 cur.reset_at_or_after(lo);
624 cur.current().filter(|&v| v <= hi).map(TokenId)
625 }
626 }
627 }
628
629 pub fn last(self) -> Option<TokenId> {
630 match self.state {
631 KeyHitsState::Empty => None,
632 KeyHitsState::Owned { bitmap, .. } => bitmap.maximum().map(TokenId),
633 KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
634 let mut cur = bitmap.cursor();
639 cur.reset_at_or_after(lo);
640 let mut last = None;
641 while let Some(v) = cur.current() {
642 if v > hi {
643 break;
644 }
645 last = Some(TokenId(v));
646 cur.move_next();
647 }
648 last
649 }
650 }
651 }
652
653 pub fn collect_into(self, buf: &mut Vec<TokenId>) {
654 match self.state {
655 KeyHitsState::Empty => {}
656 KeyHitsState::Owned { bitmap, .. } => {
657 buf.extend(bitmap.to_vec().into_iter().map(TokenId));
658 }
659 KeyHitsState::Bounded { bitmap, lo, hi, .. } => {
660 let mut cur = bitmap.cursor();
661 cur.reset_at_or_after(lo);
662 while let Some(v) = cur.current() {
663 if v > hi {
664 break;
665 }
666 buf.push(TokenId(v));
667 cur.move_next();
668 }
669 }
670 }
671 }
672}
673
674impl<'a> Iterator for KeyHits<'a> {
675 type Item = TokenId;
676 fn next(&mut self) -> Option<TokenId> {
677 match &mut self.state {
678 KeyHitsState::Empty => None,
679 KeyHitsState::Owned { bitmap, cache, pos } => {
680 if cache.is_none() {
681 *cache = Some(bitmap.to_vec());
682 }
683 let v = cache.as_ref()?.get(*pos).copied()?;
684 *pos += 1;
685 Some(TokenId(v))
686 }
687 KeyHitsState::Bounded {
688 bitmap,
689 cursor,
690 lo,
691 hi,
692 started,
693 } => {
694 if !*started {
695 let mut c = bitmap.cursor();
696 c.reset_at_or_after(*lo);
697 *cursor = Some(c);
698 *started = true;
699 } else if let Some(c) = cursor.as_mut() {
700 c.move_next();
701 }
702 let c = cursor.as_ref()?;
703 let v = c.current()?;
704 if v > *hi {
705 return None;
706 }
707 Some(TokenId(v))
708 }
709 }
710 }
711}
712
713
714pub fn from_bytes(bytes: &[u8]) -> Result<StructuralIndex, Error> {
715 from_bytes_with(bytes, BuildOptions::default())
716}
717
718pub fn from_bytes_with(bytes: &[u8], opts: BuildOptions) -> Result<StructuralIndex, Error> {
719 let idx = StructIndex::build(bytes).map_err(|s| Error::Parse(s.to_string()))?;
720 let keys = if opts.keys {
721 Some(KeyBitmaps::build(&idx, bytes))
722 } else {
723 None
724 };
725 Ok(StructuralIndex {
726 inner: Arc::new(Inner { idx, keys }),
727 })
728}
729
730pub fn find_eq<'a>(
735 idx: &'a StructuralIndex,
736 bytes: &'a [u8],
737 key: &str,
738 literal: &[u8],
739) -> impl Iterator<Item = TokenId> + 'a {
740 let key_hits = idx.keys_named(key, None);
741 let bytes_ref = bytes;
742 let literal_ref = literal.to_vec();
743 let idx_ref = idx;
744 key_hits
745 .filter_map(move |k_tok| {
746 let v_tok = idx_ref.value_for_key(k_tok)?;
747 let span = idx_ref.byte_span_in(v_tok, bytes_ref);
748 let v_bytes = &bytes_ref[span.start as usize..span.end as usize];
749 if value_matches(v_bytes, &literal_ref) {
750 idx_ref.parent(k_tok)
751 } else {
752 None
753 }
754 })
755}
756
757pub fn count_key(idx: &StructuralIndex, key: &str) -> u64 {
759 idx.keys_named(key, None).count()
760}
761
762pub fn find_eq_compound<'a>(
765 idx: &'a StructuralIndex,
766 bytes: &'a [u8],
767 conds: &'a [(&str, &[u8])],
768) -> impl Iterator<Item = TokenId> + 'a {
769 let mut cands: Vec<TokenId> = Vec::new();
773 if let Some((first_key, _)) = conds.first() {
774 idx.keys_named(first_key, None).collect_into(&mut cands);
775 }
776 cands.into_iter().filter_map(move |k_tok| {
777 let parent_obj = idx.parent(k_tok)?;
778 for (key, lit) in conds.iter() {
780 let mut matched = false;
781 for k in idx.keys_named(key, None) {
782 if idx.parent(k) == Some(parent_obj) {
783 if let Some(v) = idx.value_for_key(k) {
784 let span = idx.byte_span_in(v, bytes);
785 let v_bytes = &bytes[span.start as usize..span.end as usize];
786 if value_matches(v_bytes, lit) {
787 matched = true;
788 break;
789 }
790 }
791 }
792 }
793 if !matched {
794 return None;
795 }
796 }
797 Some(parent_obj)
798 })
799}
800
801
802pub fn json_string_eq(value: &[u8], literal: &[u8]) -> bool {
811 value_matches(value, literal)
812}
813
814fn value_matches(value: &[u8], literal: &[u8]) -> bool {
815 if value.len() >= 2 && value[0] == b'"' && value[value.len() - 1] == b'"' {
816 let body = &value[1..value.len() - 1];
817 if memchr::memchr(b'\\', body).is_some() {
820 return slow_decode_eq(body, literal);
823 }
824 body == literal
825 } else {
826 value == literal
827 }
828}
829
830fn slow_decode_eq(body: &[u8], literal: &[u8]) -> bool {
834 let mut i = 0;
835 let mut j = 0;
836 while i < body.len() && j < literal.len() {
837 let (decoded, consumed) = match body[i] {
838 b'\\' if i + 1 < body.len() => match body[i + 1] {
839 b'"' => (b'"', 2),
840 b'\\' => (b'\\', 2),
841 b'/' => (b'/', 2),
842 b'n' => (b'\n', 2),
843 b't' => (b'\t', 2),
844 b'r' => (b'\r', 2),
845 b'b' => (b'\x08', 2),
846 b'f' => (b'\x0c', 2),
847 _ => return false, },
849 c => (c, 1),
850 };
851 if decoded != literal[j] {
852 return false;
853 }
854 i += consumed;
855 j += 1;
856 }
857 i == body.len() && j == literal.len()
858}
859
860fn scan_string_end(bytes: &[u8], start: u32) -> u32 {
863 let mut i = (start + 1) as usize;
864 while i < bytes.len() {
865 match bytes[i] {
866 b'\\' => i = i.saturating_add(2),
867 b'"' => return (i + 1) as u32,
868 _ => i += 1,
869 }
870 }
871 bytes.len() as u32
872}
873
874fn scan_scalar_end(bytes: &[u8], start: u32) -> u32 {
876 let mut i = start as usize;
877 while i < bytes.len() {
878 match bytes[i] {
879 b',' | b'}' | b']' | b' ' | b'\t' | b'\n' | b'\r' => return i as u32,
880 _ => i += 1,
881 }
882 }
883 bytes.len() as u32
884}
885
886
887pub fn parse_f64(bytes: &[u8]) -> Option<f64> {
892 #[cfg(feature = "fast-numbers")]
893 {
894 fast_float::parse(bytes).ok()
895 }
896 #[cfg(not(feature = "fast-numbers"))]
897 {
898 std::str::from_utf8(bytes).ok()?.parse::<f64>().ok()
899 }
900}
901
902pub fn parse_i64(bytes: &[u8]) -> Option<i64> {
905 if let Ok(s) = std::str::from_utf8(bytes) {
906 if let Ok(n) = s.parse::<i64>() {
907 return Some(n);
908 }
909 }
910 parse_f64(bytes).map(|f| f as i64)
911}
912
913pub fn json_number_eq(value: &[u8], literal: &[u8]) -> bool {
917 match (parse_f64(value), parse_f64(literal)) {
918 (Some(a), Some(b)) => a == b,
919 _ => false,
920 }
921}
922
923
924#[cfg(feature = "multi-key")]
933pub fn multi_key_finder(keys: &[&str]) -> aho_corasick::AhoCorasick {
934 use aho_corasick::AhoCorasickBuilder;
935 let patterns: Vec<String> = keys.iter().map(|k| format!("\"{}\":", k)).collect();
936 AhoCorasickBuilder::new()
937 .ascii_case_insensitive(false)
938 .build(&patterns)
939 .expect("aho-corasick build")
940}
941
942#[cfg(feature = "multi-key")]
946pub fn multi_key_scan<'a>(
947 finder: &'a aho_corasick::AhoCorasick,
948 bytes: &'a [u8],
949) -> impl Iterator<Item = (usize, usize)> + 'a {
950 finder.find_iter(bytes).map(|m| (m.pattern().as_usize(), m.start()))
951}
952
953
954#[cfg(feature = "validate-utf8")]
957pub fn validate_utf8(bytes: &[u8]) -> Result<(), Error> {
958 simdutf8::basic::from_utf8(bytes)
959 .map(|_| ())
960 .map_err(|_| Error::InvalidUtf8)
961}
962
963
964#[cfg(test)]
965mod tests {
966 use super::*;
967
968 #[test]
969 fn from_bytes_basic_roundtrip() {
970 let buf = br#"{"a":1,"b":"hi","c":[1,2,3]}"#;
971 let idx = from_bytes(buf).unwrap();
972 assert!(idx.token_count() > 0);
973 assert!(idx.has_keys());
974
975 let mut keys: Vec<&str> = idx.keys_seen().collect();
976 keys.sort();
977 assert_eq!(keys, vec!["a", "b", "c"]);
978 }
979
980 #[test]
981 fn keys_named_returns_token_ids() {
982 let buf = br#"{"x":1,"y":2,"x":3}"#;
983 let idx = from_bytes(buf).unwrap();
984 let xs: Vec<TokenId> = idx.keys_named("x", None).collect();
985 assert!(!xs.is_empty(), "expected at least one 'x' match");
986 for t in &xs {
987 assert_eq!(idx.kind(*t), TokenKind::Key);
988 }
989 }
990
991 #[test]
992 fn count_key_uses_popcount() {
993 let buf = br#"{"a":{"x":1},"b":{"x":2},"c":{"x":3,"y":4}}"#;
994 let idx = from_bytes(buf).unwrap();
995 let c = count_key(&idx, "x");
996 assert_eq!(c, 3);
997 let cy = count_key(&idx, "y");
998 assert_eq!(cy, 1);
999 }
1000
1001 #[test]
1002 fn key_hits_first_short_circuits() {
1003 let buf = br#"{"a":1,"b":2,"c":3,"d":4}"#;
1004 let idx = from_bytes(buf).unwrap();
1005 let first = idx.keys_named("c", None).first();
1006 assert!(first.is_some());
1007 assert_eq!(idx.kind(first.unwrap()), TokenKind::Key);
1008 }
1009
1010 #[test]
1011 fn find_eq_returns_enclosing_objects() {
1012 let buf = br#"{"a":{"x":"test"},"b":{"x":"nope"},"c":{"x":"test"}}"#;
1013 let idx = from_bytes(buf).unwrap();
1014 let hits: Vec<TokenId> = find_eq(&idx, buf, "x", b"test").collect();
1015 assert_eq!(hits.len(), 2);
1016 for t in &hits {
1017 assert_eq!(idx.kind(*t), TokenKind::Object);
1018 }
1019 }
1020
1021 #[test]
1022 fn container_at_byte_works() {
1023 let buf = br#"{"a":{"x":1},"b":2}"#;
1024 let idx = from_bytes(buf).unwrap();
1025 let c = idx.container_at_byte(9).unwrap();
1027 assert_eq!(idx.kind(c), TokenKind::Object);
1029 assert!(idx.depth(c) >= 1);
1030 }
1031
1032 #[test]
1033 fn build_options_minimal_skips_keys() {
1034 let buf = br#"{"a":1}"#;
1035 let idx = from_bytes_with(buf, BuildOptions::minimal()).unwrap();
1036 assert!(!idx.has_keys());
1037 assert_eq!(idx.keys_named("a", None).count(), 0);
1038 }
1039
1040 #[test]
1041 fn ancestors_walks_to_root() {
1042 let buf = br#"{"a":{"b":{"c":42}}}"#;
1043 let idx = from_bytes(buf).unwrap();
1044 let scalar_tok = idx
1046 .tokens()
1047 .find(|t| idx.kind(*t) == TokenKind::Scalar)
1048 .unwrap();
1049 let chain: Vec<TokenId> = idx.ancestors(scalar_tok).collect();
1050 assert!(!chain.is_empty());
1051 assert!(idx.parent(*chain.last().unwrap()).is_none());
1053 }
1054
1055 #[test]
1056 fn json_string_eq_handles_quotes() {
1057 assert!(json_string_eq(b"\"hello\"", b"hello"));
1058 assert!(!json_string_eq(b"\"hello\"", b"world"));
1059 assert!(json_string_eq(b"\"he\\nllo\"", b"he\nllo"));
1061 assert!(json_string_eq(b"42", b"42"));
1063 assert!(json_string_eq(b"true", b"true"));
1064 }
1065
1066 #[test]
1067 fn find_eq_compound_intersect() {
1068 let buf = br#"[{"k1":"a","k2":"b"},{"k1":"a","k2":"c"},{"k1":"x","k2":"b"}]"#;
1069 let idx = from_bytes(buf).unwrap();
1070 let conds: &[(&str, &[u8])] = &[("k1", b"a"), ("k2", b"b")];
1071 let hits: Vec<TokenId> = find_eq_compound(&idx, buf, conds).collect();
1072 assert_eq!(hits.len(), 1, "exactly one obj should match both conds");
1073 }
1074
1075 #[test]
1076 fn json_string_eq_handles_simple_escapes() {
1077 assert!(json_string_eq(b"\"a\\nb\"", b"a\nb"));
1078 assert!(json_string_eq(b"\"a\\\\b\"", b"a\\b"));
1079 assert!(json_string_eq(b"\"a\\\"b\"", b"a\"b"));
1080 assert!(!json_string_eq(b"\"a\\nb\"", b"axb"));
1081 assert!(!json_string_eq(b"\"\\u0041\"", b"A"));
1083 }
1084
1085 #[test]
1086 fn parse_f64_works_with_or_without_fast_numbers() {
1087 assert_eq!(parse_f64(b"3.14"), Some(3.14));
1088 assert_eq!(parse_f64(b"-1e10"), Some(-1e10));
1089 assert_eq!(parse_f64(b"42"), Some(42.0));
1090 assert_eq!(parse_f64(b"not a number"), None);
1091 }
1092
1093 #[test]
1094 fn parse_i64_falls_back_to_f64_truncate() {
1095 assert_eq!(parse_i64(b"42"), Some(42));
1096 assert_eq!(parse_i64(b"-7"), Some(-7));
1097 assert_eq!(parse_i64(b"3.9"), Some(3)); }
1099
1100 #[test]
1101 fn json_number_eq_compares_numerically() {
1102 assert!(json_number_eq(b"42", b"42"));
1103 assert!(json_number_eq(b"42.0", b"42")); assert!(!json_number_eq(b"42", b"43"));
1105 assert!(!json_number_eq(b"abc", b"42"));
1106 }
1107
1108 #[cfg(feature = "multi-key")]
1109 #[test]
1110 fn multi_key_finder_matches_top_level_keys() {
1111 let keys = ["type", "actor", "repo"];
1112 let finder = multi_key_finder(&keys);
1113 let bytes = br#"{"type":"PushEvent","actor":{"login":"x"},"repo":{"name":"y"}}"#;
1114 let hits: Vec<(usize, usize)> = multi_key_scan(&finder, bytes).collect();
1115 assert_eq!(hits.len(), 3);
1116 let pattern_indices: Vec<usize> = hits.iter().map(|(p, _)| *p).collect();
1118 assert_eq!(pattern_indices, vec![0, 1, 2]);
1119 }
1120
1121 #[cfg(feature = "validate-utf8")]
1122 #[test]
1123 fn validate_utf8_accepts_valid_input() {
1124 assert!(validate_utf8(b"hello").is_ok());
1125 assert!(validate_utf8(b"\xff\xfe").is_err()); }
1127
1128}