1use crate::error::{DsonError, Result};
9use std::sync::Arc;
10
11#[inline]
13#[must_use]
14pub fn hash_field_name(field: &[u8]) -> u64 {
15 use std::hash::{Hash, Hasher};
16 let mut hasher = ahash::AHasher::default();
17 field.hash(&mut hasher);
18 hasher.finish()
19}
20
21#[derive(Debug, Clone)]
25pub struct SchemaFilter {
26 paths: Arc<[String]>,
28 compiled_patterns: Arc<[regex::Regex]>,
30}
31
32impl SchemaFilter {
33 pub fn new(paths: Vec<String>) -> Result<Self> {
38 let mut compiled_patterns = Vec::with_capacity(paths.len());
39
40 for path in &paths {
41 let regex_pattern = Self::json_path_to_regex(path);
43 let regex = regex::Regex::new(®ex_pattern).map_err(|e| {
44 DsonError::InvalidOperation(format!("Invalid JSON-path pattern '{path}': {e}"))
45 })?;
46 compiled_patterns.push(regex);
47 }
48
49 let paths: Arc<[String]> = paths.into();
51 let compiled_patterns: Arc<[regex::Regex]> = compiled_patterns.into();
52
53 Ok(Self {
54 paths,
55 compiled_patterns,
56 })
57 }
58
59 #[must_use]
61 #[inline]
62 pub fn paths(&self) -> &[String] {
63 &self.paths
64 }
65
66 #[inline]
68 #[must_use]
69 pub fn matches(&self, json_path: &str) -> bool {
70 self.compiled_patterns
71 .iter()
72 .any(|pattern| pattern.is_match(json_path))
73 }
74
75 fn json_path_to_regex(pattern: &str) -> String {
77 let mut regex = "^".to_string();
79
80 for part in pattern.split('.') {
81 if part == "*" {
82 regex.push_str(r"[^\.]*");
83 } else if part.starts_with('[') && part.ends_with(']') {
84 if part == "[*]" {
85 regex.push_str(r"\[\d+\]");
86 } else {
87 regex.push_str(®ex::escape(part));
89 }
90 } else {
91 regex.push_str(®ex::escape(part));
92 }
93 regex.push_str(r"\.?");
94 }
95
96 if regex.ends_with(r"\.?") {
98 regex.truncate(regex.len() - 3);
99 }
100 regex.push('$');
101 regex
102 }
103}
104
105#[derive(Debug)]
107pub struct CompiledSchema {
108 pub include_patterns: Vec<SchemaPattern>,
110 pub exclude_patterns: Vec<SchemaPattern>,
112 pub max_depth: usize,
114 pub pattern_hashes: Vec<u64>,
116}
117
118impl CompiledSchema {
119 pub fn compile(paths: &[String]) -> Result<Self> {
124 let mut include_patterns = Vec::new();
125 let mut pattern_hashes = Vec::new();
126
127 for path in paths {
128 let pattern = SchemaPattern::compile(path)?;
129 include_patterns.push(pattern);
130
131 let hash = hash_field_name(path.as_bytes());
133 pattern_hashes.push(hash);
134 }
135
136 Ok(Self {
137 include_patterns,
138 exclude_patterns: Vec::new(),
139 max_depth: 10, pattern_hashes,
141 })
142 }
143
144 pub fn compile_with_excludes(
149 include_paths: &[String],
150 exclude_paths: &[String],
151 ) -> Result<Self> {
152 let mut include_patterns = Vec::new();
153 let mut exclude_patterns = Vec::new();
154 let mut pattern_hashes = Vec::new();
155
156 for path in include_paths {
157 let pattern = SchemaPattern::compile(path)?;
158 include_patterns.push(pattern);
159 let hash = hash_field_name(path.as_bytes());
160 pattern_hashes.push(hash);
161 }
162
163 for path in exclude_paths {
164 let pattern = SchemaPattern::compile(path)?;
165 exclude_patterns.push(pattern);
166 }
167
168 Ok(Self {
169 include_patterns,
170 exclude_patterns,
171 max_depth: 10,
172 pattern_hashes,
173 })
174 }
175
176 #[must_use]
178 pub fn field_paths(&self) -> Vec<String> {
179 self.include_patterns
180 .iter()
181 .map(|pattern| pattern.path.clone())
182 .collect()
183 }
184
185 #[must_use]
187 pub fn matches_path(&self, path: &str) -> bool {
188 for exclude_pattern in &self.exclude_patterns {
190 if exclude_pattern.matches(path) {
191 return false;
192 }
193 }
194
195 let path_hash = hash_field_name(path.as_bytes());
197
198 if !self.pattern_hashes.contains(&path_hash) {
200 return false;
201 }
202
203 for pattern in &self.include_patterns {
205 if pattern.matches(path) {
206 return true;
207 }
208 }
209
210 false
211 }
212
213 #[must_use]
215 pub fn is_excluded(&self, path: &str) -> bool {
216 for exclude_pattern in &self.exclude_patterns {
217 if exclude_pattern.matches(path) {
218 return true;
219 }
220 }
221 false
222 }
223
224 #[must_use]
226 pub fn should_include_object(&self, path: &str) -> bool {
227 if self.is_excluded(path) {
229 return false;
230 }
231
232 for pattern in &self.include_patterns {
234 if pattern.could_match_children(path) {
235 return true;
236 }
237 }
238 false
239 }
240}
241
242#[derive(Debug)]
244pub struct SchemaPattern {
245 pub path: String,
247 pub components: Vec<String>,
249 pub match_type: MatchType,
251 pub regex: Option<regex::Regex>,
253}
254
255impl SchemaPattern {
256 pub fn compile(path: &str) -> Result<Self> {
261 let components: Vec<String> = path
262 .split('.')
263 .map(std::string::ToString::to_string)
264 .collect();
265
266 let match_type = if path.contains('*') {
267 MatchType::Wildcard
268 } else if components.len() > 1 {
269 MatchType::Prefix
270 } else {
271 MatchType::Exact
272 };
273
274 let regex = if matches!(match_type, MatchType::Wildcard) {
275 let regex_pattern = Self::glob_to_regex(path);
277 Some(
278 regex::Regex::new(®ex_pattern)
279 .map_err(|e| DsonError::ParseError(format!("Invalid regex pattern: {e}")))?,
280 )
281 } else {
282 None
283 };
284
285 Ok(Self {
286 path: path.to_string(),
287 components,
288 match_type,
289 regex,
290 })
291 }
292
293 #[must_use]
295 pub fn matches(&self, path: &str) -> bool {
296 match self.match_type {
297 MatchType::Exact => self.path == path,
298 MatchType::Prefix => path.starts_with(&self.path),
299 MatchType::Wildcard => self
300 .regex
301 .as_ref()
302 .is_some_and(|regex| regex.is_match(path)),
303 }
304 }
305
306 #[must_use]
308 pub fn could_match_children(&self, path: &str) -> bool {
309 match self.match_type {
310 MatchType::Exact => self.path.starts_with(&format!("{path}.")),
311 MatchType::Prefix => {
312 self.path.starts_with(&format!("{path}."))
313 || path.starts_with(&format!("{}.", self.path))
314 }
315 MatchType::Wildcard => {
316 let test_path = format!("{path}.test");
318 self.regex
319 .as_ref()
320 .is_some_and(|regex| regex.is_match(&test_path))
321 }
322 }
323 }
324
325 fn glob_to_regex(pattern: &str) -> String {
327 let mut regex = String::from("^");
328 let mut chars = pattern.chars().peekable();
329
330 while let Some(ch) = chars.next() {
331 match ch {
332 '*' => {
333 if chars.peek() == Some(&'*') {
334 chars.next(); regex.push_str(".*");
337 } else {
338 regex.push_str("[^.]*");
340 }
341 }
342 '.' => regex.push_str("\\."),
343 '?' => regex.push('.'),
344 '[' => regex.push_str("\\["),
345 ']' => regex.push_str("\\]"),
346 '{' => regex.push_str("\\{"),
347 '}' => regex.push_str("\\}"),
348 '(' => regex.push_str("\\("),
349 ')' => regex.push_str("\\)"),
350 '+' => regex.push_str("\\+"),
351 '^' => regex.push_str("\\^"),
352 '$' => regex.push_str("\\$"),
353 '|' => regex.push_str("\\|"),
354 '\\' => regex.push_str("\\\\"),
355 other => regex.push(other),
356 }
357 }
358
359 regex.push('$');
360 regex
361 }
362}
363
364#[derive(Debug, Clone, Copy)]
366pub enum MatchType {
367 Exact,
369 Prefix,
371 Wildcard,
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378
379 #[test]
384 fn test_hash_field_name() {
385 let hash1 = hash_field_name(b"test");
386 let hash2 = hash_field_name(b"test");
387 let hash3 = hash_field_name(b"other");
388 assert_eq!(hash1, hash2);
389 assert_ne!(hash1, hash3);
390 }
391
392 #[test]
393 fn test_hash_field_name_empty() {
394 let _hash = hash_field_name(b"");
396 }
397
398 #[test]
399 fn test_hash_field_name_unicode() {
400 let hash1 = hash_field_name("名前".as_bytes());
401 let hash2 = hash_field_name("名前".as_bytes());
402 assert_eq!(hash1, hash2);
403 }
404
405 #[test]
406 fn test_hash_field_name_special_chars() {
407 let hash1 = hash_field_name(b"user.name[0]");
408 let hash2 = hash_field_name(b"user.name[0]");
409 let hash3 = hash_field_name(b"user.name[1]");
410 assert_eq!(hash1, hash2);
411 assert_ne!(hash1, hash3);
412 }
413
414 #[test]
419 fn test_schema_filter_new() {
420 let filter = SchemaFilter::new(vec!["user.name".to_string()]);
421 assert!(filter.is_ok());
422 }
423
424 #[test]
425 fn test_schema_filter_new_empty() {
426 let filter = SchemaFilter::new(vec![]);
427 assert!(filter.is_ok());
428 assert_eq!(filter.unwrap().paths().len(), 0);
429 }
430
431 #[test]
432 fn test_schema_filter_new_invalid_regex() {
433 let filter = SchemaFilter::new(vec!["[invalid".to_string()]);
435 assert!(filter.is_ok());
437 }
438
439 #[test]
440 fn test_schema_filter_paths() {
441 let filter =
442 SchemaFilter::new(vec!["user.name".to_string(), "user.email".to_string()]).unwrap();
443 assert_eq!(filter.paths().len(), 2);
444 assert_eq!(filter.paths()[0], "user.name");
445 assert_eq!(filter.paths()[1], "user.email");
446 }
447
448 #[test]
449 fn test_schema_filter_matches() {
450 let filter = SchemaFilter::new(vec!["user.name".to_string()]).unwrap();
451 assert!(filter.matches("user.name"));
452 assert!(!filter.matches("user.email"));
453 }
454
455 #[test]
456 fn test_schema_filter_matches_multiple_patterns() {
457 let filter =
458 SchemaFilter::new(vec!["user.name".to_string(), "user.age".to_string()]).unwrap();
459 assert!(filter.matches("user.name"));
460 assert!(filter.matches("user.age"));
461 assert!(!filter.matches("user.email"));
462 }
463
464 #[test]
465 fn test_schema_filter_matches_wildcard_star() {
466 let filter = SchemaFilter::new(vec!["users.*.id".to_string()]).unwrap();
467 assert!(filter.matches("users.foo.id"));
469 }
470
471 #[test]
472 fn test_schema_filter_matches_array_wildcard() {
473 let filter = SchemaFilter::new(vec!["users.[*].id".to_string()]).unwrap();
475 assert!(filter.matches("users.[0].id"));
477 assert!(filter.matches("users.[99].id"));
478 }
479
480 #[test]
481 fn test_schema_filter_matches_specific_array_index() {
482 let filter = SchemaFilter::new(vec!["users[0].name".to_string()]).unwrap();
483 assert!(filter.matches("users[0].name"));
484 assert!(!filter.matches("users[1].name"));
485 }
486
487 #[test]
488 fn test_schema_filter_debug() {
489 let filter = SchemaFilter::new(vec!["test".to_string()]).unwrap();
490 let debug_str = format!("{filter:?}");
491 assert!(debug_str.contains("SchemaFilter"));
492 }
493
494 #[test]
495 fn test_schema_filter_clone() {
496 let filter = SchemaFilter::new(vec!["test".to_string()]).unwrap();
497 let cloned = filter.clone();
498 assert_eq!(filter.paths(), cloned.paths());
499 }
500
501 #[test]
506 fn test_compiled_schema_compile() {
507 let schema = CompiledSchema::compile(&["name".to_string(), "age".to_string()]);
508 assert!(schema.is_ok());
509 let schema = schema.unwrap();
510 assert_eq!(schema.include_patterns.len(), 2);
511 }
512
513 #[test]
514 fn test_compiled_schema_compile_empty() {
515 let schema = CompiledSchema::compile(&[]).unwrap();
516 assert_eq!(schema.include_patterns.len(), 0);
517 assert_eq!(schema.pattern_hashes.len(), 0);
518 }
519
520 #[test]
521 fn test_compiled_schema_compile_with_excludes() {
522 let schema = CompiledSchema::compile_with_excludes(
523 &["user.name".to_string(), "user.age".to_string()],
524 &["user.password".to_string()],
525 )
526 .unwrap();
527 assert_eq!(schema.include_patterns.len(), 2);
528 assert_eq!(schema.exclude_patterns.len(), 1);
529 assert_eq!(schema.max_depth, 10);
530 }
531
532 #[test]
533 fn test_compiled_schema_compile_with_excludes_empty() {
534 let schema = CompiledSchema::compile_with_excludes(&[], &[]).unwrap();
535 assert_eq!(schema.include_patterns.len(), 0);
536 assert_eq!(schema.exclude_patterns.len(), 0);
537 }
538
539 #[test]
540 fn test_compiled_schema_field_paths() {
541 let schema = CompiledSchema::compile(&["name".to_string(), "email".to_string()]).unwrap();
542 let paths = schema.field_paths();
543 assert_eq!(paths.len(), 2);
544 assert!(paths.contains(&"name".to_string()));
545 assert!(paths.contains(&"email".to_string()));
546 }
547
548 #[test]
549 fn test_compiled_schema_matches_path() {
550 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
551 assert!(schema.matches_path("name"));
552 assert!(!schema.matches_path("age"));
553 }
554
555 #[test]
556 fn test_compiled_schema_matches_path_with_exclude() {
557 let schema = CompiledSchema::compile_with_excludes(
558 &["user.name".to_string(), "user.age".to_string()],
559 &["user.age".to_string()],
560 )
561 .unwrap();
562 assert!(schema.matches_path("user.name"));
563 assert!(!schema.matches_path("user.age")); }
565
566 #[test]
567 fn test_compiled_schema_matches_path_hash_mismatch() {
568 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
569 assert!(!schema.matches_path("nonexistent"));
571 }
572
573 #[test]
574 fn test_compiled_schema_is_excluded() {
575 let schema = CompiledSchema::compile_with_excludes(
576 &["user.name".to_string()],
577 &["user.password".to_string()],
578 )
579 .unwrap();
580 assert!(!schema.is_excluded("user.name"));
581 assert!(schema.is_excluded("user.password"));
582 }
583
584 #[test]
585 fn test_compiled_schema_is_excluded_empty() {
586 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
587 assert!(!schema.is_excluded("name"));
588 assert!(!schema.is_excluded("anything"));
589 }
590
591 #[test]
592 fn test_compiled_schema_should_include_object() {
593 let schema = CompiledSchema::compile(&["user.name".to_string()]).unwrap();
594 assert!(schema.should_include_object("user"));
596 }
597
598 #[test]
599 fn test_compiled_schema_should_include_object_excluded() {
600 let schema = CompiledSchema::compile_with_excludes(
601 &["user.name".to_string()],
602 &["user".to_string()],
603 )
604 .unwrap();
605 assert!(!schema.should_include_object("user")); }
607
608 #[test]
609 fn test_compiled_schema_should_include_object_no_match() {
610 let schema = CompiledSchema::compile(&["user.name".to_string()]).unwrap();
611 assert!(!schema.should_include_object("config")); }
613
614 #[test]
615 fn test_compiled_schema_debug() {
616 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
617 let debug_str = format!("{schema:?}");
618 assert!(debug_str.contains("CompiledSchema"));
619 }
620
621 #[test]
626 fn test_schema_pattern_exact() {
627 let pattern = SchemaPattern::compile("name").unwrap();
628 assert!(matches!(pattern.match_type, MatchType::Exact));
629 assert!(pattern.matches("name"));
630 assert!(!pattern.matches("age"));
631 }
632
633 #[test]
634 fn test_schema_pattern_prefix() {
635 let pattern = SchemaPattern::compile("user.name").unwrap();
636 assert!(matches!(pattern.match_type, MatchType::Prefix));
637 assert!(pattern.matches("user.name"));
638 assert!(pattern.matches("user.name.first")); assert!(!pattern.matches("user.age"));
640 }
641
642 #[test]
643 fn test_schema_pattern_wildcard() {
644 let pattern = SchemaPattern::compile("user.*").unwrap();
645 assert!(matches!(pattern.match_type, MatchType::Wildcard));
646 assert!(pattern.matches("user.name"));
647 assert!(!pattern.matches("name"));
648 }
649
650 #[test]
651 fn test_schema_pattern_double_wildcard() {
652 let pattern = SchemaPattern::compile("user.**").unwrap();
653 assert!(matches!(pattern.match_type, MatchType::Wildcard));
654 assert!(pattern.matches("user.name"));
656 assert!(pattern.matches("user.name.first"));
657 }
658
659 #[test]
660 fn test_schema_pattern_could_match_children_exact() {
661 let pattern = SchemaPattern::compile("user").unwrap();
662 assert!(!pattern.could_match_children("user")); assert!(!pattern.could_match_children("other"));
665 }
666
667 #[test]
668 fn test_schema_pattern_could_match_children_exact_deeper() {
669 let pattern = SchemaPattern::compile("user.name").unwrap();
670 assert!(pattern.could_match_children("user"));
672 }
673
674 #[test]
675 fn test_schema_pattern_could_match_children_prefix() {
676 let pattern = SchemaPattern::compile("user.name").unwrap();
677 assert!(matches!(pattern.match_type, MatchType::Prefix));
678 assert!(pattern.could_match_children("user")); }
681
682 #[test]
683 fn test_schema_pattern_could_match_children_prefix_reverse() {
684 let pattern = SchemaPattern::compile("user.name").unwrap();
685 assert!(pattern.could_match_children("user.name.first"));
687 }
688
689 #[test]
690 fn test_schema_pattern_could_match_children_wildcard() {
691 let pattern = SchemaPattern::compile("user.*").unwrap();
692 assert!(matches!(pattern.match_type, MatchType::Wildcard));
693 assert!(pattern.could_match_children("user"));
695 }
696
697 #[test]
698 fn test_schema_pattern_could_match_children_wildcard_no_match() {
699 let pattern = SchemaPattern::compile("user.*").unwrap();
700 assert!(!pattern.could_match_children("config")); }
702
703 #[test]
704 fn test_schema_pattern_glob_to_regex_question_mark() {
705 let pattern = SchemaPattern::compile("user.*ame").unwrap();
707 assert!(pattern.matches("user.name"));
708 assert!(pattern.matches("user.fame"));
709 }
710
711 #[test]
712 fn test_schema_pattern_glob_to_regex_special_chars() {
713 let pattern = SchemaPattern::compile("a[b]c").unwrap();
715 assert!(pattern.matches("a[b]c"));
716 }
717
718 #[test]
719 fn test_schema_pattern_glob_to_regex_braces() {
720 let pattern = SchemaPattern::compile("a{b}c").unwrap();
721 assert!(pattern.matches("a{b}c"));
722 }
723
724 #[test]
725 fn test_schema_pattern_glob_to_regex_parens() {
726 let pattern = SchemaPattern::compile("a(b)c").unwrap();
727 assert!(pattern.matches("a(b)c"));
728 }
729
730 #[test]
731 fn test_schema_pattern_glob_to_regex_plus() {
732 let pattern = SchemaPattern::compile("a+b").unwrap();
733 assert!(pattern.matches("a+b"));
734 }
735
736 #[test]
737 fn test_schema_pattern_glob_to_regex_caret() {
738 let pattern = SchemaPattern::compile("a^b").unwrap();
739 assert!(pattern.matches("a^b"));
740 }
741
742 #[test]
743 fn test_schema_pattern_glob_to_regex_dollar() {
744 let pattern = SchemaPattern::compile("a$b").unwrap();
745 assert!(pattern.matches("a$b"));
746 }
747
748 #[test]
749 fn test_schema_pattern_glob_to_regex_pipe() {
750 let pattern = SchemaPattern::compile("a|b").unwrap();
751 assert!(pattern.matches("a|b"));
752 }
753
754 #[test]
755 fn test_schema_pattern_glob_to_regex_backslash() {
756 let pattern = SchemaPattern::compile(r"a\b").unwrap();
757 assert!(pattern.matches(r"a\b"));
758 }
759
760 #[test]
761 fn test_schema_pattern_debug() {
762 let pattern = SchemaPattern::compile("user.name").unwrap();
763 let debug_str = format!("{pattern:?}");
764 assert!(debug_str.contains("SchemaPattern"));
765 assert!(debug_str.contains("user.name"));
766 }
767
768 #[test]
769 fn test_schema_pattern_components() {
770 let pattern = SchemaPattern::compile("user.profile.name").unwrap();
771 assert_eq!(pattern.components.len(), 3);
772 assert_eq!(pattern.components[0], "user");
773 assert_eq!(pattern.components[1], "profile");
774 assert_eq!(pattern.components[2], "name");
775 }
776
777 #[test]
782 fn test_match_type_debug() {
783 assert!(format!("{:?}", MatchType::Exact).contains("Exact"));
784 assert!(format!("{:?}", MatchType::Prefix).contains("Prefix"));
785 assert!(format!("{:?}", MatchType::Wildcard).contains("Wildcard"));
786 }
787
788 #[test]
789 fn test_match_type_clone() {
790 let mt = MatchType::Exact;
791 let cloned = mt;
792 assert!(matches!(cloned, MatchType::Exact));
793 }
794
795 #[test]
796 fn test_match_type_copy() {
797 let mt = MatchType::Prefix;
798 let copied = mt;
799 assert!(matches!(copied, MatchType::Prefix));
800 assert!(matches!(mt, MatchType::Prefix));
802 }
803
804 #[test]
809 fn test_json_path_to_regex_simple() {
810 let filter = SchemaFilter::new(vec!["name".to_string()]).unwrap();
811 assert!(filter.matches("name"));
812 assert!(!filter.matches("name2"));
813 }
814
815 #[test]
816 fn test_json_path_to_regex_dotted() {
817 let filter = SchemaFilter::new(vec!["user.name".to_string()]).unwrap();
818 assert!(filter.matches("user.name"));
819 assert!(!filter.matches("user_name"));
820 }
821
822 #[test]
823 fn test_json_path_to_regex_star() {
824 let filter = SchemaFilter::new(vec!["*.name".to_string()]).unwrap();
825 assert!(filter.matches("user.name"));
826 assert!(filter.matches("admin.name"));
827 }
828
829 #[test]
830 fn test_json_path_to_regex_array_wildcard() {
831 let filter = SchemaFilter::new(vec!["items.[*]".to_string()]).unwrap();
833 assert!(filter.matches("items.[0]"));
834 assert!(filter.matches("items.[123]"));
835 }
836
837 #[test]
838 fn test_json_path_to_regex_mixed() {
839 let filter = SchemaFilter::new(vec!["users.[*].*.id".to_string()]).unwrap();
841 assert!(filter.matches("users.[0].profile.id"));
842 }
843}