1use memchr::{memchr, memchr2};
33use serde_json::Value;
34
35pub fn find_key_positions(bytes: &[u8], key: &str) -> Vec<usize> {
39 let needle = {
40 let mut s = String::with_capacity(key.len() + 3);
41 s.push('"');
42 s.push_str(key);
43 s.push_str("\":");
44 s
45 };
46 let needle_b = needle.as_bytes();
47 if needle_b.len() > bytes.len() { return Vec::new(); }
48
49 let mut out = Vec::new();
50 let mut i = 0usize;
51 let mut in_string = false;
52 let mut escape = false;
53
54 while i < bytes.len() {
55 if escape {
56 escape = false;
57 i += 1;
58 continue;
59 }
60 if in_string {
61 let rest = &bytes[i..];
64 match memchr2(b'\\', b'"', rest) {
65 Some(off) => {
66 i += off;
67 match bytes[i] {
68 b'\\' => { escape = true; i += 1; }
69 b'"' => { in_string = false; i += 1; }
70 _ => unreachable!(),
71 }
72 }
73 None => break,
74 }
75 } else {
76 let rest = &bytes[i..];
79 match memchr(b'"', rest) {
80 Some(off) => {
81 let q = i + off;
82 if q + needle_b.len() <= bytes.len()
83 && &bytes[q..q + needle_b.len()] == needle_b {
84 out.push(q);
85 i = q + needle_b.len();
88 } else {
89 in_string = true;
90 i = q + 1;
91 }
92 }
93 None => break,
94 }
95 }
96 }
97 out
98}
99
100pub fn extract_values(bytes: &[u8], key: &str) -> Vec<Value> {
108 let positions = find_key_positions(bytes, key);
109 let mut out = Vec::with_capacity(positions.len());
110 let prefix_len = key.len() + 3; for pos in positions {
112 let mut start = pos + prefix_len;
113 while start < bytes.len()
114 && matches!(bytes[start], b' ' | b'\t' | b'\n' | b'\r') {
115 start += 1;
116 }
117 if start >= bytes.len() { continue; }
118 let mut stream = serde_json::Deserializer::from_slice(&bytes[start..])
119 .into_iter::<Value>();
120 if let Some(Ok(v)) = stream.next() {
121 out.push(v);
122 }
123 }
124 out
125}
126
127#[derive(Debug, Clone, Copy)]
131pub struct ValueSpan {
132 pub start: usize,
133 pub end: usize,
134}
135
136pub fn find_first_key_value_span(bytes: &[u8], key: &str) -> Option<ValueSpan> {
142 let needle = {
143 let mut s = String::with_capacity(key.len() + 3);
144 s.push('"');
145 s.push_str(key);
146 s.push_str("\":");
147 s
148 };
149 let needle_b = needle.as_bytes();
150 if needle_b.len() > bytes.len() { return None; }
151 let prefix_len = needle_b.len();
152
153 let mut i = 0usize;
154 let mut in_string = false;
155 let mut escape = false;
156
157 while i < bytes.len() {
158 if escape { escape = false; i += 1; continue; }
159 if in_string {
160 let rest = &bytes[i..];
161 match memchr2(b'\\', b'"', rest) {
162 Some(off) => {
163 i += off;
164 match bytes[i] {
165 b'\\' => { escape = true; i += 1; }
166 b'"' => { in_string = false; i += 1; }
167 _ => unreachable!(),
168 }
169 }
170 None => return None,
171 }
172 } else {
173 let rest = &bytes[i..];
174 match memchr(b'"', rest) {
175 Some(off) => {
176 let q = i + off;
177 if q + prefix_len <= bytes.len()
178 && &bytes[q..q + prefix_len] == needle_b
179 {
180 let mut start = q + prefix_len;
181 while start < bytes.len()
182 && matches!(bytes[start], b' ' | b'\t' | b'\n' | b'\r')
183 { start += 1; }
184 if start >= bytes.len() { return None; }
185 return value_end(bytes, start)
186 .map(|end| ValueSpan { start, end });
187 } else {
188 in_string = true;
189 i = q + 1;
190 }
191 }
192 None => return None,
193 }
194 }
195 }
196 None
197}
198
199pub fn find_key_value_spans(bytes: &[u8], key: &str) -> Vec<ValueSpan> {
204 let positions = find_key_positions(bytes, key);
205 let prefix_len = key.len() + 3;
206 let mut out = Vec::with_capacity(positions.len());
207 for pos in positions {
208 let mut start = pos + prefix_len;
209 while start < bytes.len()
210 && matches!(bytes[start], b' ' | b'\t' | b'\n' | b'\r') {
211 start += 1;
212 }
213 if start >= bytes.len() { continue; }
214 if let Some(end) = value_end(bytes, start) {
215 out.push(ValueSpan { start, end });
216 }
217 }
218 out
219}
220
221pub fn extract_values_eq(bytes: &[u8], key: &str, lit: &[u8]) -> Vec<Value> {
227 let mut out = Vec::new();
228 for span in find_key_value_spans(bytes, key) {
229 if span.end - span.start == lit.len()
230 && &bytes[span.start..span.end] == lit
231 {
232 if let Ok(v) = serde_json::from_slice::<Value>(&bytes[span.start..span.end]) {
233 out.push(v);
234 }
235 }
236 }
237 out
238}
239
240pub fn count_key_value_eq(bytes: &[u8], key: &str, lit: &[u8]) -> usize {
248 let mut n = 0;
249 for span in find_key_value_spans(bytes, key) {
250 if span.end - span.start == lit.len()
251 && &bytes[span.start..span.end] == lit
252 {
253 n += 1;
254 }
255 }
256 n
257}
258
259pub fn find_enclosing_objects_eq(bytes: &[u8], key: &str, lit: &[u8]) -> Vec<ValueSpan> {
275 let needle = {
276 let mut s = String::with_capacity(key.len() + 3);
277 s.push('"');
278 s.push_str(key);
279 s.push_str("\":");
280 s
281 };
282 let needle_b = needle.as_bytes();
283 let mut out = Vec::new();
284 let mut stack: Vec<(usize, bool)> = Vec::new();
285 let mut i = 0usize;
286 let mut in_string = false;
287 let mut escape = false;
288
289 while i < bytes.len() {
290 if escape { escape = false; i += 1; continue; }
291 if in_string {
292 let rest = &bytes[i..];
293 match memchr2(b'\\', b'"', rest) {
294 Some(off) => {
295 i += off;
296 match bytes[i] {
297 b'\\' => { escape = true; i += 1; }
298 b'"' => { in_string = false; i += 1; }
299 _ => unreachable!(),
300 }
301 }
302 None => break,
303 }
304 continue;
305 }
306 match bytes[i] {
307 b'{' => { stack.push((i, false)); i += 1; }
308 b'}' => {
309 if let Some((start, matched)) = stack.pop() {
310 if matched { out.push(ValueSpan { start, end: i + 1 }); }
311 }
312 i += 1;
313 }
314 b'"' => {
315 if i + needle_b.len() <= bytes.len()
316 && &bytes[i..i + needle_b.len()] == needle_b
317 {
318 let mut vs = i + needle_b.len();
319 while vs < bytes.len()
320 && matches!(bytes[vs], b' ' | b'\t' | b'\n' | b'\r')
321 { vs += 1; }
322 if let Some(ve) = value_end(bytes, vs) {
323 if ve - vs == lit.len() && &bytes[vs..ve] == lit {
324 if let Some(top) = stack.last_mut() { top.1 = true; }
325 }
326 i = ve;
327 } else {
328 i = vs;
329 }
330 } else {
331 in_string = true;
332 i += 1;
333 }
334 }
335 _ => i += 1,
336 }
337 }
338
339 out.sort_by_key(|s| s.start);
340 out
341}
342
343pub fn find_enclosing_objects_eq_multi(
348 bytes: &[u8],
349 conjuncts: &[(String, Vec<u8>)],
350) -> Vec<ValueSpan> {
351 assert!(conjuncts.len() <= 64, "at most 64 conjuncts supported");
352 if conjuncts.is_empty() { return Vec::new(); }
353
354 let needles: Vec<Vec<u8>> = conjuncts.iter().map(|(k, _)| {
357 let mut s = Vec::with_capacity(k.len() + 3);
358 s.push(b'"');
359 s.extend_from_slice(k.as_bytes());
360 s.extend_from_slice(b"\":");
361 s
362 }).collect();
363 let full_mask: u64 = if conjuncts.len() == 64 { u64::MAX }
364 else { (1u64 << conjuncts.len()) - 1 };
365
366 let mut out = Vec::new();
367 let mut stack: Vec<(usize, u64)> = Vec::new();
368 let mut i = 0usize;
369 let mut in_string = false;
370 let mut escape = false;
371
372 while i < bytes.len() {
373 if escape { escape = false; i += 1; continue; }
374 if in_string {
375 let rest = &bytes[i..];
376 match memchr2(b'\\', b'"', rest) {
377 Some(off) => {
378 i += off;
379 match bytes[i] {
380 b'\\' => { escape = true; i += 1; }
381 b'"' => { in_string = false; i += 1; }
382 _ => unreachable!(),
383 }
384 }
385 None => break,
386 }
387 continue;
388 }
389 match bytes[i] {
390 b'{' => { stack.push((i, 0u64)); i += 1; }
391 b'}' => {
392 if let Some((start, mask)) = stack.pop() {
393 if mask == full_mask {
394 out.push(ValueSpan { start, end: i + 1 });
395 }
396 }
397 i += 1;
398 }
399 b'"' => {
400 let mut matched_idx: Option<usize> = None;
401 for (idx, nb) in needles.iter().enumerate() {
402 if i + nb.len() <= bytes.len() && &bytes[i..i + nb.len()] == &nb[..] {
403 matched_idx = Some(idx);
404 break;
405 }
406 }
407 if let Some(idx) = matched_idx {
408 let nb = &needles[idx];
409 let mut vs = i + nb.len();
410 while vs < bytes.len()
411 && matches!(bytes[vs], b' ' | b'\t' | b'\n' | b'\r')
412 { vs += 1; }
413 if let Some(ve) = value_end(bytes, vs) {
414 let lit = &conjuncts[idx].1;
415 if ve - vs == lit.len() && &bytes[vs..ve] == &lit[..] {
416 if let Some(top) = stack.last_mut() {
417 top.1 |= 1u64 << idx;
418 }
419 }
420 i = ve;
421 } else {
422 i = vs;
423 }
424 } else {
425 in_string = true;
426 i += 1;
427 }
428 }
429 _ => i += 1,
430 }
431 }
432
433 out.sort_by_key(|s| s.start);
434 out
435}
436
437#[derive(Debug, Clone, Copy, PartialEq, Eq)]
440pub enum ScanCmp { Lt, Lte, Gt, Gte }
441
442impl ScanCmp {
443 #[inline]
444 fn holds(self, lhs: f64, rhs: f64) -> bool {
445 match self {
446 ScanCmp::Lt => lhs < rhs,
447 ScanCmp::Lte => lhs <= rhs,
448 ScanCmp::Gt => lhs > rhs,
449 ScanCmp::Gte => lhs >= rhs,
450 }
451 }
452}
453
454#[derive(Debug, Clone)]
457pub enum ScanPred {
458 Eq(Vec<u8>),
461 Cmp(ScanCmp, f64),
464}
465
466pub fn find_enclosing_objects_cmp(
477 bytes: &[u8],
478 key: &str,
479 op: ScanCmp,
480 threshold: f64,
481) -> Vec<ValueSpan> {
482 let needle = {
483 let mut s = String::with_capacity(key.len() + 3);
484 s.push('"');
485 s.push_str(key);
486 s.push_str("\":");
487 s
488 };
489 let needle_b = needle.as_bytes();
490 let mut out = Vec::new();
491 let mut stack: Vec<(usize, bool)> = Vec::new();
492 let mut i = 0usize;
493 let mut in_string = false;
494 let mut escape = false;
495
496 while i < bytes.len() {
497 if escape { escape = false; i += 1; continue; }
498 if in_string {
499 let rest = &bytes[i..];
500 match memchr2(b'\\', b'"', rest) {
501 Some(off) => {
502 i += off;
503 match bytes[i] {
504 b'\\' => { escape = true; i += 1; }
505 b'"' => { in_string = false; i += 1; }
506 _ => unreachable!(),
507 }
508 }
509 None => break,
510 }
511 continue;
512 }
513 match bytes[i] {
514 b'{' => { stack.push((i, false)); i += 1; }
515 b'}' => {
516 if let Some((start, matched)) = stack.pop() {
517 if matched { out.push(ValueSpan { start, end: i + 1 }); }
518 }
519 i += 1;
520 }
521 b'"' => {
522 if i + needle_b.len() <= bytes.len()
523 && &bytes[i..i + needle_b.len()] == needle_b
524 {
525 let mut vs = i + needle_b.len();
526 while vs < bytes.len()
527 && matches!(bytes[vs], b' ' | b'\t' | b'\n' | b'\r')
528 { vs += 1; }
529 if let Some(ve) = value_end(bytes, vs) {
530 if let Some((_, as_f, _)) = parse_num_span(&bytes[vs..ve]) {
531 if op.holds(as_f, threshold) {
532 if let Some(top) = stack.last_mut() { top.1 = true; }
533 }
534 }
535 i = ve;
536 } else {
537 i = vs;
538 }
539 } else {
540 in_string = true;
541 i += 1;
542 }
543 }
544 _ => i += 1,
545 }
546 }
547
548 out.sort_by_key(|s| s.start);
549 out
550}
551
552pub fn find_direct_field(obj_bytes: &[u8], key: &str) -> Option<ValueSpan> {
557 if obj_bytes.is_empty() || obj_bytes[0] != b'{' { return None; }
558 let needle = {
559 let mut s = String::with_capacity(key.len() + 3);
560 s.push('"');
561 s.push_str(key);
562 s.push_str("\":");
563 s
564 };
565 let needle_b = needle.as_bytes();
566 let mut depth: usize = 0;
567 let mut i = 0usize;
568 let mut in_string = false;
569 let mut escape = false;
570 while i < obj_bytes.len() {
571 if escape { escape = false; i += 1; continue; }
572 if in_string {
573 match memchr2(b'\\', b'"', &obj_bytes[i..]) {
574 Some(off) => {
575 i += off;
576 match obj_bytes[i] {
577 b'\\' => { escape = true; i += 1; }
578 b'"' => { in_string = false; i += 1; }
579 _ => unreachable!(),
580 }
581 }
582 None => return None,
583 }
584 continue;
585 }
586 match obj_bytes[i] {
587 b'{' | b'[' => { depth += 1; i += 1; }
588 b'}' | b']' => {
589 if depth == 0 { return None; }
590 depth -= 1;
591 i += 1;
592 }
593 b'"' => {
594 if depth == 1
595 && i + needle_b.len() <= obj_bytes.len()
596 && &obj_bytes[i..i + needle_b.len()] == needle_b
597 {
598 let mut vs = i + needle_b.len();
599 while vs < obj_bytes.len()
600 && matches!(obj_bytes[vs], b' ' | b'\t' | b'\n' | b'\r')
601 { vs += 1; }
602 return value_end(obj_bytes, vs)
603 .map(|end| ValueSpan { start: vs, end });
604 }
605 in_string = true;
606 i += 1;
607 }
608 _ => i += 1,
609 }
610 }
611 None
612}
613
614pub fn find_enclosing_objects_mixed(
620 bytes: &[u8],
621 conjuncts: &[(String, ScanPred)],
622) -> Vec<ValueSpan> {
623 assert!(conjuncts.len() <= 64, "at most 64 conjuncts supported");
624 if conjuncts.is_empty() { return Vec::new(); }
625
626 let needles: Vec<Vec<u8>> = conjuncts.iter().map(|(k, _)| {
627 let mut s = Vec::with_capacity(k.len() + 3);
628 s.push(b'"');
629 s.extend_from_slice(k.as_bytes());
630 s.extend_from_slice(b"\":");
631 s
632 }).collect();
633 let full_mask: u64 = if conjuncts.len() == 64 { u64::MAX }
634 else { (1u64 << conjuncts.len()) - 1 };
635
636 let mut out = Vec::new();
637 let mut stack: Vec<(usize, u64)> = Vec::new();
638 let mut i = 0usize;
639 let mut in_string = false;
640 let mut escape = false;
641
642 while i < bytes.len() {
643 if escape { escape = false; i += 1; continue; }
644 if in_string {
645 let rest = &bytes[i..];
646 match memchr2(b'\\', b'"', rest) {
647 Some(off) => {
648 i += off;
649 match bytes[i] {
650 b'\\' => { escape = true; i += 1; }
651 b'"' => { in_string = false; i += 1; }
652 _ => unreachable!(),
653 }
654 }
655 None => break,
656 }
657 continue;
658 }
659 match bytes[i] {
660 b'{' => { stack.push((i, 0u64)); i += 1; }
661 b'}' => {
662 if let Some((start, mask)) = stack.pop() {
663 if mask == full_mask {
664 out.push(ValueSpan { start, end: i + 1 });
665 }
666 }
667 i += 1;
668 }
669 b'"' => {
670 let mut matched_idx: Option<usize> = None;
671 for (idx, nb) in needles.iter().enumerate() {
672 if i + nb.len() <= bytes.len() && &bytes[i..i + nb.len()] == &nb[..] {
673 matched_idx = Some(idx);
674 break;
675 }
676 }
677 if let Some(idx) = matched_idx {
678 let nb = &needles[idx];
679 let mut vs = i + nb.len();
680 while vs < bytes.len()
681 && matches!(bytes[vs], b' ' | b'\t' | b'\n' | b'\r')
682 { vs += 1; }
683 if let Some(ve) = value_end(bytes, vs) {
684 let fires = match &conjuncts[idx].1 {
685 ScanPred::Eq(lit) =>
686 ve - vs == lit.len() && &bytes[vs..ve] == &lit[..],
687 ScanPred::Cmp(op, thresh) =>
688 parse_num_span(&bytes[vs..ve])
689 .map(|(_, f, _)| op.holds(f, *thresh))
690 .unwrap_or(false),
691 };
692 if fires {
693 if let Some(top) = stack.last_mut() {
694 top.1 |= 1u64 << idx;
695 }
696 }
697 i = ve;
698 } else {
699 i = vs;
700 }
701 } else {
702 in_string = true;
703 i += 1;
704 }
705 }
706 _ => i += 1,
707 }
708 }
709
710 out.sort_by_key(|s| s.start);
711 out
712}
713
714#[derive(Debug, Clone, Copy, Default)]
719pub struct NumFold {
720 pub int_sum: i64,
721 pub float_sum: f64,
722 pub is_float: bool,
723 pub count: usize,
724 pub min_i: i64,
725 pub max_i: i64,
726 pub min_f: f64,
727 pub max_f: f64,
728 pub any: bool,
729}
730
731#[inline]
734pub fn parse_num_span(s: &[u8]) -> Option<(i64, f64, bool)> {
735 let s = std::str::from_utf8(s).ok()?;
736 let has_frac_or_exp = s.bytes().any(|b| matches!(b, b'.' | b'e' | b'E'));
738 if !has_frac_or_exp {
739 if let Ok(n) = s.parse::<i64>() {
740 return Some((n, n as f64, true));
741 }
742 }
743 s.parse::<f64>().ok().map(|f| (f as i64, f, false))
744}
745
746pub fn fold_nums(bytes: &[u8], spans: &[ValueSpan]) -> NumFold {
749 let mut f = NumFold::default();
750 for s in spans {
751 let slice = &bytes[s.start..s.end];
752 let Some((i, x, is_int)) = parse_num_span(slice) else { continue };
753 f.count += 1;
754 if !f.any {
755 f.any = true;
756 f.min_i = i; f.max_i = i;
757 f.min_f = x; f.max_f = x;
758 } else {
759 if i < f.min_i { f.min_i = i; }
760 if i > f.max_i { f.max_i = i; }
761 if x < f.min_f { f.min_f = x; }
762 if x > f.max_f { f.max_f = x; }
763 }
764 if is_int && !f.is_float {
765 f.int_sum = f.int_sum.wrapping_add(i);
766 f.float_sum += x;
767 } else {
768 if !f.is_float { f.float_sum = f.int_sum as f64; f.is_float = true; }
769 f.float_sum += x;
770 }
771 }
772 f
773}
774
775pub fn fold_direct_field_nums(
780 bytes: &[u8], spans: &[ValueSpan], key: &str,
781) -> NumFold {
782 let mut f = NumFold::default();
783 for s in spans {
784 let obj_bytes = &bytes[s.start..s.end];
785 let Some(vs) = find_direct_field(obj_bytes, key) else { continue };
786 let Some((i, x, is_int)) = parse_num_span(&obj_bytes[vs.start..vs.end])
787 else { continue };
788 f.count += 1;
789 if !f.any {
790 f.any = true;
791 f.min_i = i; f.max_i = i;
792 f.min_f = x; f.max_f = x;
793 } else {
794 if i < f.min_i { f.min_i = i; }
795 if i > f.max_i { f.max_i = i; }
796 if x < f.min_f { f.min_f = x; }
797 if x > f.max_f { f.max_f = x; }
798 }
799 if is_int && !f.is_float {
800 f.int_sum = f.int_sum.wrapping_add(i);
801 f.float_sum += x;
802 } else {
803 if !f.is_float { f.float_sum = f.int_sum as f64; f.is_float = true; }
804 f.float_sum += x;
805 }
806 }
807 f
808}
809
810fn value_end(bytes: &[u8], start: usize) -> Option<usize> {
813 if start >= bytes.len() { return None; }
814 match bytes[start] {
815 b'"' => {
816 let mut i = start + 1;
818 let mut escape = false;
819 while i < bytes.len() {
820 if escape { escape = false; i += 1; continue; }
821 match bytes[i] {
822 b'\\' => { escape = true; i += 1; }
823 b'"' => return Some(i + 1),
824 _ => i += 1,
825 }
826 }
827 None
828 }
829 b'{' | b'[' => {
830 let open = bytes[start];
831 let close = if open == b'{' { b'}' } else { b']' };
832 let mut depth = 1usize;
833 let mut i = start + 1;
834 let mut in_string = false;
835 let mut escape = false;
836 while i < bytes.len() {
837 let b = bytes[i];
838 if escape { escape = false; i += 1; continue; }
839 if in_string {
840 match b {
841 b'\\' => escape = true,
842 b'"' => in_string = false,
843 _ => {}
844 }
845 i += 1;
846 continue;
847 }
848 match b {
849 b'"' => in_string = true,
850 c if c == open => depth += 1,
851 c if c == close => {
852 depth -= 1;
853 if depth == 0 { return Some(i + 1); }
854 }
855 _ => {}
856 }
857 i += 1;
858 }
859 None
860 }
861 _ => {
862 let mut i = start;
864 while i < bytes.len() {
865 match bytes[i] {
866 b',' | b'}' | b']' | b' ' | b'\t' | b'\n' | b'\r' => break,
867 _ => i += 1,
868 }
869 }
870 if i == start { None } else { Some(i) }
871 }
872 }
873}
874
875#[cfg(test)]
876mod tests {
877 use super::*;
878
879 #[test]
880 fn finds_top_level_key() {
881 let doc = br#"{"test": 42, "other": 7}"#;
882 let pos = find_key_positions(doc, "test");
883 assert_eq!(pos, vec![1]);
884 }
885
886 #[test]
887 fn finds_nested_keys() {
888 let doc = br#"{"a":{"test":1},"b":[{"test":2},{"test":3}]}"#;
889 let pos = find_key_positions(doc, "test");
890 assert_eq!(pos.len(), 3);
891 }
892
893 #[test]
894 fn ignores_hits_inside_string_values() {
895 let doc = br#"{"comment":"the \"test\": lie","test":99}"#;
896 let vals = extract_values(doc, "test");
897 assert_eq!(vals, vec![serde_json::json!(99)]);
898 }
899
900 #[test]
901 fn does_not_match_longer_key_suffix() {
902 let doc = br#"{"nottest":1,"test":2}"#;
903 let vals = extract_values(doc, "test");
904 assert_eq!(vals, vec![serde_json::json!(2)]);
905 }
906
907 #[test]
908 fn handles_escaped_backslash_then_quote() {
909 let doc = br#"{"path":"c:\\","test":"ok"}"#;
912 let vals = extract_values(doc, "test");
913 assert_eq!(vals, vec![serde_json::json!("ok")]);
914 }
915
916 #[test]
917 fn empty_on_missing_key() {
918 let doc = br#"{"a":1,"b":2}"#;
919 assert!(find_key_positions(doc, "zzz").is_empty());
920 assert!(extract_values(doc, "zzz").is_empty());
921 }
922
923 #[test]
924 fn extracts_nested_object_value() {
925 let doc = br#"{"test":{"nested":[1,2,3]}}"#;
926 let vals = extract_values(doc, "test");
927 assert_eq!(vals, vec![serde_json::json!({"nested":[1,2,3]})]);
928 }
929
930 #[test]
931 fn extracts_all_nested_hits_in_order() {
932 let doc = br#"{"a":{"test":1},"b":[{"test":2},{"test":3}]}"#;
933 let vals = extract_values(doc, "test");
934 assert_eq!(vals, vec![
935 serde_json::json!(1),
936 serde_json::json!(2),
937 serde_json::json!(3),
938 ]);
939 }
940
941 #[test]
942 fn spans_cover_every_value_kind() {
943 let doc = br#"{"a":1,"b":"two","c":[1,2,3],"d":{"x":1},"e":true}"#;
944 let keys_and_expected: &[(&str, &[u8])] = &[
945 ("a", b"1"),
946 ("b", b"\"two\""),
947 ("c", b"[1,2,3]"),
948 ("d", b"{\"x\":1}"),
949 ("e", b"true"),
950 ];
951 for (k, want) in keys_and_expected {
952 let spans = find_key_value_spans(doc, k);
953 assert_eq!(spans.len(), 1, "key {} not found", k);
954 assert_eq!(&doc[spans[0].start..spans[0].end], *want);
955 }
956 }
957
958 #[test]
959 fn count_eq_matches_only_literal_equals() {
960 let doc = br#"{"a":[{"type":"action"},{"type":"idle"},{"type":"action"},{"type":"noop"}]}"#;
961 assert_eq!(count_key_value_eq(doc, "type", br#""action""#), 2);
962 assert_eq!(count_key_value_eq(doc, "type", br#""missing""#), 0);
963 }
964
965 #[test]
966 fn count_eq_numeric_literal() {
967 let doc = br#"{"xs":[{"n":10},{"n":42},{"n":10},{"n":42}]}"#;
968 assert_eq!(count_key_value_eq(doc, "n", b"42"), 2);
969 assert_eq!(count_key_value_eq(doc, "n", b"10"), 2);
970 }
971
972 #[test]
973 fn spans_skip_whitespace_after_colon() {
974 let doc = br#"{"a": 42 ,"b": "x"}"#;
975 let a = find_key_value_spans(doc, "a");
976 assert_eq!(&doc[a[0].start..a[0].end], b"42");
977 let b = find_key_value_spans(doc, "b");
978 assert_eq!(&doc[b[0].start..b[0].end], b"\"x\"");
979 }
980
981 #[test]
982 fn enclosing_object_simple_match() {
983 let doc = br#"{"events":[{"type":"action","id":1},{"type":"idle","id":2},{"type":"action","id":3}]}"#;
984 let spans = find_enclosing_objects_eq(doc, "type", br#""action""#);
985 assert_eq!(spans.len(), 2);
986 let objs: Vec<_> = spans.iter()
987 .map(|s| serde_json::from_slice::<serde_json::Value>(&doc[s.start..s.end]).unwrap())
988 .collect();
989 assert_eq!(objs[0], serde_json::json!({"type":"action","id":1}));
990 assert_eq!(objs[1], serde_json::json!({"type":"action","id":3}));
991 }
992
993 #[test]
994 fn enclosing_object_nested_both_match() {
995 let doc = br#"{"type":"x","child":{"type":"x","n":2}}"#;
998 let spans = find_enclosing_objects_eq(doc, "type", br#""x""#);
999 assert_eq!(spans.len(), 2);
1000 assert!(spans[0].start < spans[1].start);
1001 assert_eq!(&doc[spans[0].start..spans[0].end], doc);
1002 assert_eq!(&doc[spans[1].start..spans[1].end], br#"{"type":"x","n":2}"#);
1003 }
1004
1005 #[test]
1006 fn enclosing_object_nested_inner_only() {
1007 let doc = br#"{"type":"a","child":{"type":"b","n":2}}"#;
1008 let spans = find_enclosing_objects_eq(doc, "type", br#""b""#);
1009 assert_eq!(spans.len(), 1);
1010 assert_eq!(&doc[spans[0].start..spans[0].end], br#"{"type":"b","n":2}"#);
1011 }
1012
1013 #[test]
1014 fn enclosing_object_ignores_string_value_containing_needle() {
1015 let doc = br#"{"comment":"the \"type\":\"action\" label","events":[{"type":"action"}]}"#;
1016 let spans = find_enclosing_objects_eq(doc, "type", br#""action""#);
1017 assert_eq!(spans.len(), 1);
1018 assert_eq!(&doc[spans[0].start..spans[0].end], br#"{"type":"action"}"#);
1019 }
1020
1021 #[test]
1022 fn enclosing_object_numeric_literal() {
1023 let doc = br#"[{"v":10},{"v":42},{"v":42}]"#;
1024 let spans = find_enclosing_objects_eq(doc, "v", b"42");
1025 assert_eq!(spans.len(), 2);
1026 }
1027
1028 #[test]
1029 fn enclosing_object_no_match() {
1030 let doc = br#"{"xs":[{"v":1},{"v":2}]}"#;
1031 let spans = find_enclosing_objects_eq(doc, "v", b"99");
1032 assert!(spans.is_empty());
1033 }
1034
1035 #[test]
1036 fn enclosing_object_multi_and_both_match() {
1037 let doc = br#"[{"t":"a","v":1},{"t":"a","v":2},{"t":"b","v":1}]"#;
1038 let c = vec![
1039 ("t".to_string(), br#""a""#.to_vec()),
1040 ("v".to_string(), b"1".to_vec()),
1041 ];
1042 let spans = find_enclosing_objects_eq_multi(doc, &c);
1043 assert_eq!(spans.len(), 1);
1044 assert_eq!(&doc[spans[0].start..spans[0].end], br#"{"t":"a","v":1}"#);
1045 }
1046
1047 #[test]
1048 fn enclosing_object_multi_and_nested_propagates() {
1049 let doc = br#"{"t":"a","child":{"t":"a","v":1},"v":1}"#;
1051 let c = vec![
1052 ("t".to_string(), br#""a""#.to_vec()),
1053 ("v".to_string(), b"1".to_vec()),
1054 ];
1055 let spans = find_enclosing_objects_eq_multi(doc, &c);
1056 assert_eq!(spans.len(), 2);
1057 assert!(spans[0].start < spans[1].start);
1058 }
1059
1060 #[test]
1061 fn enclosing_object_multi_and_partial_no_match() {
1062 let doc = br#"[{"t":"a","v":2},{"t":"b","v":1}]"#;
1064 let c = vec![
1065 ("t".to_string(), br#""a""#.to_vec()),
1066 ("v".to_string(), b"1".to_vec()),
1067 ];
1068 let spans = find_enclosing_objects_eq_multi(doc, &c);
1069 assert!(spans.is_empty());
1070 }
1071}