1use crate::skiptape::error::{Result, SkipTapeError};
8use crate::skiptape::simd_ops::SimdStringOps;
9
10#[derive(Debug)]
12pub struct CompiledSchema {
13 pub include_patterns: Vec<SchemaPattern>,
15 pub exclude_patterns: Vec<SchemaPattern>,
17 pub max_depth: usize,
19 pub pattern_hashes: Vec<u64>,
21}
22
23impl CompiledSchema {
24 pub fn compile(paths: &[String]) -> Result<Self> {
29 let mut include_patterns = Vec::new();
30 let mut pattern_hashes = Vec::new();
31
32 for path in paths {
33 let pattern = SchemaPattern::compile(path)?;
34 include_patterns.push(pattern);
35
36 let hash = SimdStringOps::hash_field_name(path.as_bytes());
38 pattern_hashes.push(hash);
39 }
40
41 Ok(Self {
42 include_patterns,
43 exclude_patterns: Vec::new(),
44 max_depth: 10, pattern_hashes,
46 })
47 }
48
49 pub fn compile_with_excludes(
54 include_paths: &[String],
55 exclude_paths: &[String],
56 ) -> Result<Self> {
57 let mut include_patterns = Vec::new();
58 let mut exclude_patterns = Vec::new();
59 let mut pattern_hashes = Vec::new();
60
61 for path in include_paths {
62 let pattern = SchemaPattern::compile(path)?;
63 include_patterns.push(pattern);
64 let hash = SimdStringOps::hash_field_name(path.as_bytes());
65 pattern_hashes.push(hash);
66 }
67
68 for path in exclude_paths {
69 let pattern = SchemaPattern::compile(path)?;
70 exclude_patterns.push(pattern);
71 }
72
73 Ok(Self {
74 include_patterns,
75 exclude_patterns,
76 max_depth: 10,
77 pattern_hashes,
78 })
79 }
80
81 #[must_use]
83 pub fn field_paths(&self) -> Vec<String> {
84 self.include_patterns
85 .iter()
86 .map(|pattern| pattern.path.clone())
87 .collect()
88 }
89
90 #[must_use]
92 pub fn matches_path(&self, path: &str) -> bool {
93 for exclude_pattern in &self.exclude_patterns {
95 if exclude_pattern.matches(path) {
96 return false;
97 }
98 }
99
100 let path_hash = SimdStringOps::hash_field_name(path.as_bytes());
102
103 if !self.pattern_hashes.contains(&path_hash) {
105 return false;
106 }
107
108 for pattern in &self.include_patterns {
110 if pattern.matches(path) {
111 return true;
112 }
113 }
114
115 false
116 }
117
118 #[must_use]
120 pub fn is_excluded(&self, path: &str) -> bool {
121 for exclude_pattern in &self.exclude_patterns {
122 if exclude_pattern.matches(path) {
123 return true;
124 }
125 }
126 false
127 }
128
129 #[must_use]
131 pub fn should_include_object(&self, path: &str) -> bool {
132 if self.is_excluded(path) {
134 return false;
135 }
136
137 for pattern in &self.include_patterns {
139 if pattern.could_match_children(path) {
140 return true;
142 }
143 }
144 false
145 }
146}
147
148#[derive(Debug)]
150pub struct SchemaPattern {
151 pub path: String,
153 pub components: Vec<String>,
155 pub match_type: MatchType,
157 pub regex: Option<regex::Regex>,
159}
160
161impl SchemaPattern {
162 pub fn compile(path: &str) -> Result<Self> {
167 let components: Vec<String> = path
168 .split('.')
169 .map(std::string::ToString::to_string)
170 .collect();
171
172 let match_type = if path.contains('*') {
173 MatchType::Wildcard
174 } else if components.len() > 1 {
175 MatchType::Prefix
176 } else {
177 MatchType::Exact
178 };
179
180 let regex =
181 if matches!(match_type, MatchType::Wildcard) {
182 let regex_pattern = Self::glob_to_regex(path);
184 Some(regex::Regex::new(®ex_pattern).map_err(|e| {
185 SkipTapeError::SchemaError(format!("Invalid regex pattern: {e}"))
186 })?)
187 } else {
188 None
189 };
190
191 Ok(Self {
192 path: path.to_string(),
193 components,
194 match_type,
195 regex,
196 })
197 }
198
199 #[must_use]
201 pub fn matches(&self, path: &str) -> bool {
202 match self.match_type {
203 MatchType::Exact => self.path == path,
204 MatchType::Prefix => path.starts_with(&self.path),
205 MatchType::Wildcard => self
206 .regex
207 .as_ref()
208 .is_some_and(|regex| regex.is_match(path)),
209 }
210 }
211
212 #[must_use]
214 pub fn could_match_children(&self, path: &str) -> bool {
215 match self.match_type {
216 MatchType::Exact => self.path.starts_with(&format!("{path}.")),
217 MatchType::Prefix => {
218 self.path.starts_with(&format!("{path}."))
219 || path.starts_with(&format!("{}.", self.path))
220 }
221 MatchType::Wildcard => {
222 let test_path = format!("{path}.test");
224 self.regex
225 .as_ref()
226 .is_some_and(|regex| regex.is_match(&test_path))
227 }
228 }
229 }
230
231 fn glob_to_regex(pattern: &str) -> String {
233 let mut regex = String::from("^");
234 let mut chars = pattern.chars().peekable();
235
236 while let Some(ch) = chars.next() {
237 match ch {
238 '*' => {
239 if chars.peek() == Some(&'*') {
240 chars.next(); regex.push_str(".*");
243 } else {
244 regex.push_str("[^.]*");
246 }
247 }
248 '.' => regex.push_str("\\."),
249 '?' => regex.push('.'),
250 '[' => regex.push_str("\\["),
251 ']' => regex.push_str("\\]"),
252 '{' => regex.push_str("\\{"),
253 '}' => regex.push_str("\\}"),
254 '(' => regex.push_str("\\("),
255 ')' => regex.push_str("\\)"),
256 '+' => regex.push_str("\\+"),
257 '^' => regex.push_str("\\^"),
258 '$' => regex.push_str("\\$"),
259 '|' => regex.push_str("\\|"),
260 '\\' => regex.push_str("\\\\"),
261 other => regex.push(other),
262 }
263 }
264
265 regex.push('$');
266 regex
267 }
268}
269
270#[derive(Debug, Clone, Copy)]
272pub enum MatchType {
273 Exact,
275 Prefix,
277 Wildcard,
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn test_compiled_schema_compile() {
287 let schema = CompiledSchema::compile(&["name".to_string(), "age".to_string()]);
288 assert!(schema.is_ok());
289 let schema = schema.unwrap();
290 assert_eq!(schema.include_patterns.len(), 2);
291 }
292
293 #[test]
294 fn test_compiled_schema_compile_empty() {
295 let schema = CompiledSchema::compile(&[]);
296 assert!(schema.is_ok());
297 let schema = schema.unwrap();
298 assert!(schema.include_patterns.is_empty());
299 }
300
301 #[test]
302 fn test_compiled_schema_with_excludes() {
303 let schema =
304 CompiledSchema::compile_with_excludes(&["name".to_string()], &["age".to_string()]);
305 assert!(schema.is_ok());
306 let schema = schema.unwrap();
307 assert_eq!(schema.include_patterns.len(), 1);
308 assert_eq!(schema.exclude_patterns.len(), 1);
309 }
310
311 #[test]
312 fn test_compiled_schema_field_paths() {
313 let schema = CompiledSchema::compile(&["name".to_string(), "age".to_string()]).unwrap();
314 let paths = schema.field_paths();
315 assert!(paths.contains(&"name".to_string()));
316 assert!(paths.contains(&"age".to_string()));
317 }
318
319 #[test]
320 fn test_compiled_schema_matches_path_exact() {
321 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
322 assert!(schema.matches_path("name"));
323 assert!(!schema.matches_path("age"));
324 }
325
326 #[test]
327 fn test_compiled_schema_is_excluded() {
328 let schema =
329 CompiledSchema::compile_with_excludes(&["*".to_string()], &["secret".to_string()])
330 .unwrap();
331 assert!(schema.is_excluded("secret"));
332 assert!(!schema.is_excluded("name"));
333 }
334
335 #[test]
336 fn test_compiled_schema_should_include_object() {
337 let schema = CompiledSchema::compile(&["user.name".to_string()]).unwrap();
338 assert!(schema.should_include_object("user"));
339 }
340
341 #[test]
342 fn test_compiled_schema_should_not_include_excluded() {
343 let schema = CompiledSchema::compile_with_excludes(
344 &["user.name".to_string()],
345 &["user".to_string()],
346 )
347 .unwrap();
348 assert!(!schema.should_include_object("user"));
349 }
350
351 #[test]
352 fn test_schema_pattern_exact() {
353 let pattern = SchemaPattern::compile("name").unwrap();
354 assert!(matches!(pattern.match_type, MatchType::Exact));
355 assert!(pattern.matches("name"));
356 assert!(!pattern.matches("age"));
357 }
358
359 #[test]
360 fn test_schema_pattern_prefix() {
361 let pattern = SchemaPattern::compile("user.name").unwrap();
362 assert!(matches!(pattern.match_type, MatchType::Prefix));
363 assert!(pattern.matches("user.name"));
364 assert!(pattern.matches("user.name.first"));
365 }
366
367 #[test]
368 fn test_schema_pattern_wildcard() {
369 let pattern = SchemaPattern::compile("user.*").unwrap();
370 assert!(matches!(pattern.match_type, MatchType::Wildcard));
371 assert!(pattern.matches("user.name"));
372 assert!(pattern.matches("user.age"));
373 assert!(!pattern.matches("name"));
374 }
375
376 #[test]
377 fn test_schema_pattern_double_wildcard() {
378 let pattern = SchemaPattern::compile("user.**").unwrap();
379 assert!(pattern.matches("user.profile.name"));
380 }
381
382 #[test]
383 fn test_schema_pattern_could_match_children() {
384 let pattern = SchemaPattern::compile("user.name").unwrap();
385 assert!(pattern.could_match_children("user"));
386 }
387
388 #[test]
389 fn test_schema_pattern_could_match_children_wildcard() {
390 let pattern = SchemaPattern::compile("user.*").unwrap();
391 assert!(pattern.could_match_children("user"));
392 }
393
394 #[test]
395 fn test_glob_to_regex_special_chars() {
396 let pattern = SchemaPattern::compile("test[0]").unwrap();
398 assert!(pattern.matches("test[0]"));
399 }
400
401 #[test]
402 fn test_glob_to_regex_question_mark() {
403 let pattern = SchemaPattern::compile("test?*").unwrap();
405 assert!(pattern.matches("test1abc"));
406 assert!(pattern.matches("testAbc"));
407 }
408
409 #[test]
410 fn test_match_type_debug() {
411 let mt = MatchType::Exact;
412 let debug = format!("{mt:?}");
413 assert!(debug.contains("Exact"));
414 }
415
416 #[test]
417 fn test_match_type_clone() {
418 let mt = MatchType::Wildcard;
419 let cloned = mt;
420 assert!(matches!(cloned, MatchType::Wildcard));
421 }
422
423 #[test]
424 fn test_schema_pattern_debug() {
425 let pattern = SchemaPattern::compile("name").unwrap();
426 let debug = format!("{pattern:?}");
427 assert!(debug.contains("name"));
428 }
429
430 #[test]
431 fn test_compiled_schema_debug() {
432 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
433 let debug = format!("{schema:?}");
434 assert!(!debug.is_empty());
435 }
436
437 #[test]
438 fn test_schema_max_depth() {
439 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
440 assert_eq!(schema.max_depth, 10);
441 }
442
443 #[test]
444 fn test_schema_pattern_hashes() {
445 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
446 assert_eq!(schema.pattern_hashes.len(), 1);
447 }
448
449 #[test]
450 fn test_glob_to_regex_curly_braces() {
451 let pattern = SchemaPattern::compile("test{0}*").unwrap();
452 assert!(pattern.matches("test{0}"));
453 }
454
455 #[test]
456 fn test_glob_to_regex_parentheses() {
457 let pattern = SchemaPattern::compile("test(1)*").unwrap();
458 assert!(pattern.matches("test(1)"));
459 }
460
461 #[test]
462 fn test_glob_to_regex_plus() {
463 let pattern = SchemaPattern::compile("test+1*").unwrap();
464 assert!(pattern.matches("test+1"));
465 }
466
467 #[test]
468 fn test_glob_to_regex_caret() {
469 let pattern = SchemaPattern::compile("test^1*").unwrap();
470 assert!(pattern.matches("test^1"));
471 }
472
473 #[test]
474 fn test_glob_to_regex_dollar() {
475 let pattern = SchemaPattern::compile("test$1*").unwrap();
476 assert!(pattern.matches("test$1"));
477 }
478
479 #[test]
480 fn test_glob_to_regex_pipe() {
481 let pattern = SchemaPattern::compile("test|1*").unwrap();
482 assert!(pattern.matches("test|1"));
483 }
484
485 #[test]
486 fn test_glob_to_regex_backslash() {
487 let pattern = SchemaPattern::compile("test\\1*").unwrap();
488 assert!(pattern.matches("test\\1"));
489 }
490
491 #[test]
492 fn test_matches_path_hash_collision() {
493 let schema =
495 CompiledSchema::compile(&["user.name".to_string(), "user.email".to_string()]).unwrap();
496 assert!(schema.matches_path("user.name"));
497 assert!(schema.matches_path("user.email"));
498 assert!(!schema.matches_path("user.age"));
499 }
500
501 #[test]
502 fn test_matches_path_with_exclusion() {
503 let schema = CompiledSchema::compile_with_excludes(
506 &["user.name".to_string()],
507 &["user.secret".to_string()],
508 )
509 .unwrap();
510 assert!(schema.matches_path("user.name"));
511 assert!(schema.is_excluded("user.secret"));
513 }
514
515 #[test]
516 fn test_could_match_children_exact_no_match() {
517 let pattern = SchemaPattern::compile("name").unwrap();
518 assert!(!pattern.could_match_children("other"));
520 }
521
522 #[test]
523 fn test_could_match_children_prefix_reverse() {
524 let pattern = SchemaPattern::compile("user.name").unwrap();
525 assert!(pattern.could_match_children("user.name.first"));
527 }
528
529 #[test]
530 fn test_could_match_children_wildcard_no_regex() {
531 let pattern = SchemaPattern {
534 path: "test.*".to_string(),
535 components: vec!["test".to_string(), "*".to_string()],
536 match_type: MatchType::Wildcard,
537 regex: None,
538 };
539 assert!(!pattern.could_match_children("test"));
540 }
541
542 #[test]
543 fn test_matches_wildcard_no_regex() {
544 let pattern = SchemaPattern {
546 path: "test.*".to_string(),
547 components: vec!["test".to_string(), "*".to_string()],
548 match_type: MatchType::Wildcard,
549 regex: None,
550 };
551 assert!(!pattern.matches("test.abc"));
552 }
553
554 #[test]
555 fn test_should_include_object_no_match() {
556 let schema = CompiledSchema::compile(&["other.field".to_string()]).unwrap();
557 assert!(!schema.should_include_object("user"));
559 }
560
561 #[test]
562 fn test_exclude_patterns_empty() {
563 let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
564 assert!(schema.exclude_patterns.is_empty());
565 }
566
567 #[test]
568 fn test_match_type_copy() {
569 let mt = MatchType::Prefix;
570 let copied = mt;
571 assert!(matches!(copied, MatchType::Prefix));
572 }
573
574 #[test]
575 fn test_compiled_schema_multiple_patterns() {
576 let schema = CompiledSchema::compile(&[
578 "user.name".to_string(),
579 "user.email".to_string(),
580 "address.city".to_string(),
581 ])
582 .unwrap();
583
584 assert!(schema.matches_path("user.name"));
585 assert!(schema.matches_path("address.city"));
586 assert!(!schema.matches_path("phone"));
587 }
588
589 #[test]
590 fn test_compiled_schema_excludes_with_wildcards() {
591 let schema =
592 CompiledSchema::compile_with_excludes(&["**".to_string()], &["secret.*".to_string()])
593 .unwrap();
594
595 assert!(schema.is_excluded("secret.key"));
596 assert!(!schema.is_excluded("public.key"));
597 }
598
599 #[test]
600 fn test_schema_pattern_components() {
601 let pattern = SchemaPattern::compile("user.profile.name").unwrap();
602 assert_eq!(pattern.components.len(), 3);
603 assert_eq!(pattern.components[0], "user");
604 assert_eq!(pattern.components[1], "profile");
605 assert_eq!(pattern.components[2], "name");
606 }
607
608 #[test]
609 fn test_could_match_children_wildcard_match() {
610 let pattern = SchemaPattern::compile("user.**").unwrap();
611 assert!(pattern.could_match_children("user"));
613 }
614
615 #[test]
616 fn test_could_match_children_no_match() {
617 let pattern = SchemaPattern::compile("user.name").unwrap();
618 assert!(!pattern.could_match_children("other"));
620 }
621
622 #[test]
623 fn test_matches_path_no_hash_match() {
624 let schema = CompiledSchema::compile(&["specific.field".to_string()]).unwrap();
626 assert!(!schema.matches_path("different.field"));
627 }
628}