1use regex::Regex;
2use std::collections::HashSet;
3use std::path::Path;
4use thiserror::Error;
5
6#[derive(Error, Debug)]
8pub enum ValidationError {
9 #[error("Invalid glob pattern '{pattern}': {reason}")]
10 InvalidGlobPattern { pattern: String, reason: String },
11
12 #[error("Invalid gitignore pattern '{pattern}': {reason}")]
13 InvalidGitignorePattern { pattern: String, reason: String },
14
15 #[error("Pattern too complex: {reason}")]
16 PatternTooComplex { reason: String },
17
18 #[error("Conflicting patterns detected: {conflict}")]
19 ConflictingPatterns { conflict: String },
20
21 #[error("Invalid path '{path}': {reason}")]
22 InvalidPath { path: String, reason: String },
23
24 #[error("Pattern limit exceeded: maximum {max} patterns allowed, got {actual}")]
25 PatternLimitExceeded { max: usize, actual: usize },
26
27 #[error("Empty pattern not allowed")]
28 EmptyPattern,
29
30 #[error("Regex compilation failed for pattern '{pattern}': {source}")]
31 RegexError {
32 pattern: String,
33 #[source]
34 source: regex::Error,
35 },
36
37 #[error("IO error while validating path '{path}': {source}")]
38 IoError {
39 path: String,
40 #[source]
41 source: std::io::Error,
42 },
43}
44
45pub type ValidationResult<T> = Result<T, ValidationError>;
47
48#[derive(Debug, Clone)]
50pub struct ValidationConfig {
51 pub max_patterns: usize,
53 pub max_pattern_length: usize,
55 pub max_glob_depth: usize,
57 pub allow_empty_patterns: bool,
59 pub validate_path_existence: bool,
61 pub check_conflicts: bool,
63 pub max_validation_time_ms: u64,
65}
66
67impl Default for ValidationConfig {
68 fn default() -> Self {
69 Self {
70 max_patterns: 1000,
71 max_pattern_length: 2048,
72 max_glob_depth: 20,
73 allow_empty_patterns: false,
74 validate_path_existence: false,
75 check_conflicts: true,
76 max_validation_time_ms: 5000,
77 }
78 }
79}
80
81pub struct PatternValidator {
83 config: ValidationConfig,
84 glob_regex: Regex,
85 dangerous_patterns: HashSet<String>,
86}
87
88impl PatternValidator {
89 pub fn new(config: ValidationConfig) -> ValidationResult<Self> {
91 let glob_regex = Regex::new(r"[\*\?\[\]{}]").map_err(|e| ValidationError::RegexError {
93 pattern: r"[\*\?\[\]{}]".to_string(),
94 source: e,
95 })?;
96
97 let mut dangerous_patterns = HashSet::new();
99 dangerous_patterns.insert("**/*/**/*/**/*/**/*/**".to_string()); dangerous_patterns.insert("*".repeat(100)); dangerous_patterns.insert("?".repeat(100)); Ok(Self {
104 config,
105 glob_regex,
106 dangerous_patterns,
107 })
108 }
109
110 pub fn default() -> ValidationResult<Self> {
112 Self::new(ValidationConfig::default())
113 }
114
115 pub fn validate_glob_pattern(&self, pattern: &str) -> ValidationResult<()> {
117 if pattern.is_empty() && !self.config.allow_empty_patterns {
119 return Err(ValidationError::EmptyPattern);
120 }
121
122 if pattern.len() > self.config.max_pattern_length {
124 return Err(ValidationError::InvalidGlobPattern {
125 pattern: pattern.to_string(),
126 reason: format!(
127 "Pattern too long: {} characters (max: {})",
128 pattern.len(),
129 self.config.max_pattern_length
130 ),
131 });
132 }
133
134 if self.dangerous_patterns.contains(pattern) {
136 return Err(ValidationError::PatternTooComplex {
137 reason: "Pattern is known to cause performance issues".to_string(),
138 });
139 }
140
141 self.validate_glob_syntax(pattern)?;
143
144 self.validate_glob_depth(pattern)?;
146
147 self.validate_glob_sequences(pattern)?;
149
150 Ok(())
151 }
152
153 fn validate_glob_syntax(&self, pattern: &str) -> ValidationResult<()> {
155 let mut bracket_depth = 0;
156 let mut brace_depth = 0;
157 let mut chars = pattern.chars().peekable();
158
159 while let Some(ch) = chars.next() {
160 match ch {
161 '[' => {
162 bracket_depth += 1;
163 if bracket_depth > 1 {
164 return Err(ValidationError::InvalidGlobPattern {
165 pattern: pattern.to_string(),
166 reason: "Nested character classes not allowed".to_string(),
167 });
168 }
169 if chars.peek() == Some(&']') {
171 return Err(ValidationError::InvalidGlobPattern {
172 pattern: pattern.to_string(),
173 reason: "Empty character class []".to_string(),
174 });
175 }
176 }
177 ']' => {
178 if bracket_depth == 0 {
179 return Err(ValidationError::InvalidGlobPattern {
180 pattern: pattern.to_string(),
181 reason: "Unmatched closing bracket ']'".to_string(),
182 });
183 }
184 bracket_depth -= 1;
185 }
186 '{' => {
187 brace_depth += 1;
188 if brace_depth > 3 {
189 return Err(ValidationError::InvalidGlobPattern {
190 pattern: pattern.to_string(),
191 reason: "Too many nested braces (max 3)".to_string(),
192 });
193 }
194 }
195 '}' => {
196 if brace_depth == 0 {
197 return Err(ValidationError::InvalidGlobPattern {
198 pattern: pattern.to_string(),
199 reason: "Unmatched closing brace '}'".to_string(),
200 });
201 }
202 brace_depth -= 1;
203 }
204 '\\' => {
205 if let Some(next_ch) = chars.next() {
207 if !matches!(
208 next_ch,
209 '*' | '?' | '[' | ']' | '{' | '}' | '\\' | '/' | '!' | '-' | '^'
210 ) {
211 return Err(ValidationError::InvalidGlobPattern {
212 pattern: pattern.to_string(),
213 reason: format!("Invalid escape sequence '\\{}'", next_ch),
214 });
215 }
216 } else {
217 return Err(ValidationError::InvalidGlobPattern {
218 pattern: pattern.to_string(),
219 reason: "Trailing backslash".to_string(),
220 });
221 }
222 }
223 _ => {}
224 }
225 }
226
227 if bracket_depth > 0 {
229 return Err(ValidationError::InvalidGlobPattern {
230 pattern: pattern.to_string(),
231 reason: "Unclosed character class '['".to_string(),
232 });
233 }
234
235 if brace_depth > 0 {
236 return Err(ValidationError::InvalidGlobPattern {
237 pattern: pattern.to_string(),
238 reason: "Unclosed brace group '{'".to_string(),
239 });
240 }
241
242 Ok(())
243 }
244
245 fn validate_glob_depth(&self, pattern: &str) -> ValidationResult<()> {
247 let depth = pattern.matches("**/").count() + pattern.matches("/**/").count();
248 if depth > self.config.max_glob_depth {
249 return Err(ValidationError::PatternTooComplex {
250 reason: format!(
251 "Pattern depth {} exceeds maximum {}",
252 depth, self.config.max_glob_depth
253 ),
254 });
255 }
256 Ok(())
257 }
258
259 fn validate_glob_sequences(&self, pattern: &str) -> ValidationResult<()> {
261 if pattern.contains("****") {
263 return Err(ValidationError::InvalidGlobPattern {
264 pattern: pattern.to_string(),
265 reason: "Too many consecutive wildcards".to_string(),
266 });
267 }
268
269 if pattern.contains("????") {
271 return Err(ValidationError::InvalidGlobPattern {
272 pattern: pattern.to_string(),
273 reason: "Too many consecutive single-character wildcards".to_string(),
274 });
275 }
276
277 if pattern.contains("**/**/**/**") {
279 return Err(ValidationError::PatternTooComplex {
280 reason: "Too many recursive directory wildcards".to_string(),
281 });
282 }
283
284 Ok(())
285 }
286
287 pub fn validate_gitignore_pattern(&self, pattern: &str) -> ValidationResult<()> {
289 let trimmed = pattern.trim();
291 if trimmed.starts_with('#') || trimmed.is_empty() {
292 return Ok(());
293 }
294
295 if trimmed.is_empty() && !self.config.allow_empty_patterns {
297 return Err(ValidationError::EmptyPattern);
298 }
299
300 if pattern.len() > self.config.max_pattern_length {
302 return Err(ValidationError::InvalidGitignorePattern {
303 pattern: pattern.to_string(),
304 reason: format!(
305 "Pattern too long: {} characters (max: {})",
306 pattern.len(),
307 self.config.max_pattern_length
308 ),
309 });
310 }
311
312 self.validate_gitignore_syntax(trimmed)?;
314
315 Ok(())
316 }
317
318 fn validate_gitignore_syntax(&self, pattern: &str) -> ValidationResult<()> {
320 let pattern = if pattern.starts_with('!') {
322 &pattern[1..]
323 } else {
324 pattern
325 };
326
327 let pattern = pattern.trim_end_matches('/');
329
330 self.validate_glob_pattern(pattern)?;
332
333 if pattern.contains("**/**/**/**") {
335 return Err(ValidationError::InvalidGitignorePattern {
336 pattern: pattern.to_string(),
337 reason: "Too many recursive directory patterns".to_string(),
338 });
339 }
340
341 Ok(())
342 }
343
344 pub fn validate_patterns<I, S>(&self, patterns: I) -> ValidationResult<()>
346 where
347 I: IntoIterator<Item = S>,
348 S: AsRef<str>,
349 {
350 let patterns: Vec<_> = patterns.into_iter().collect();
351
352 if patterns.len() > self.config.max_patterns {
354 return Err(ValidationError::PatternLimitExceeded {
355 max: self.config.max_patterns,
356 actual: patterns.len(),
357 });
358 }
359
360 for pattern in &patterns {
362 self.validate_glob_pattern(pattern.as_ref())?;
363 }
364
365 if self.config.check_conflicts {
367 self.check_pattern_conflicts(&patterns)?;
368 }
369
370 Ok(())
371 }
372
373 fn check_pattern_conflicts<S: AsRef<str>>(&self, patterns: &[S]) -> ValidationResult<()> {
375 let mut seen_patterns = HashSet::new();
376 let mut include_patterns = HashSet::new();
377 let mut exclude_patterns = HashSet::new();
378
379 for pattern in patterns {
380 let pattern_str = pattern.as_ref();
381
382 if !seen_patterns.insert(pattern_str.to_string()) {
384 return Err(ValidationError::ConflictingPatterns {
385 conflict: format!("Duplicate pattern: '{}'", pattern_str),
386 });
387 }
388
389 if pattern_str.starts_with('!') {
391 exclude_patterns.insert(&pattern_str[1..]);
392 } else {
393 include_patterns.insert(pattern_str);
394 }
395 }
396
397 for include in &include_patterns {
399 if exclude_patterns.contains(include) {
400 return Err(ValidationError::ConflictingPatterns {
401 conflict: format!("Pattern '{}' is both included and excluded", include),
402 });
403 }
404 }
405
406 Ok(())
407 }
408
409 pub fn validate_path<P: AsRef<Path>>(&self, path: P) -> ValidationResult<()> {
411 let path = path.as_ref();
412 let path_str = path.to_string_lossy();
413
414 #[cfg(windows)]
416 {
417 let invalid_chars = ['<', '>', ':', '"', '|', '?', '*'];
418 if path_str.chars().any(|c| invalid_chars.contains(&c)) {
419 return Err(ValidationError::InvalidPath {
420 path: path_str.to_string(),
421 reason: "Contains invalid characters for Windows".to_string(),
422 });
423 }
424 }
425
426 #[cfg(windows)]
428 const MAX_PATH_LEN: usize = 260;
429 #[cfg(not(windows))]
430 const MAX_PATH_LEN: usize = 4096;
431
432 if path_str.len() > MAX_PATH_LEN {
433 return Err(ValidationError::InvalidPath {
434 path: path_str.to_string(),
435 reason: format!(
436 "Path too long: {} characters (max: {})",
437 path_str.len(),
438 MAX_PATH_LEN
439 ),
440 });
441 }
442
443 if self.config.validate_path_existence && !path.exists() {
445 return Err(ValidationError::InvalidPath {
446 path: path_str.to_string(),
447 reason: "Path does not exist".to_string(),
448 });
449 }
450
451 Ok(())
452 }
453
454 pub fn validate_pattern_performance(&self, pattern: &str) -> ValidationResult<PerformanceRisk> {
456 let mut risk_score = 0;
457 let mut issues = Vec::new();
458
459 let wildcard_count = pattern.matches('*').count();
461 let single_wildcard_count = pattern.matches('?').count();
462
463 if wildcard_count > 10 {
464 risk_score += 3;
465 issues.push("High number of wildcards may impact performance".to_string());
466 }
467
468 if single_wildcard_count > 20 {
469 risk_score += 2;
470 issues.push("High number of single-char wildcards may impact performance".to_string());
471 }
472
473 let recursive_count = pattern.matches("**/").count();
475 if recursive_count > 3 {
476 risk_score += 4;
477 issues.push(
478 "Multiple recursive patterns may cause exponential matching time".to_string(),
479 );
480 }
481
482 let alternation_count = pattern.matches('{').count();
484 if alternation_count > 5 {
485 risk_score += 2;
486 issues.push("Many alternations may increase compilation time".to_string());
487 }
488
489 let char_class_count = pattern.matches('[').count();
491 if char_class_count > 10 {
492 risk_score += 1;
493 issues.push("Many character classes may slow down matching".to_string());
494 }
495
496 let risk_level = match risk_score {
497 0..=2 => PerformanceRiskLevel::Low,
498 3..=5 => PerformanceRiskLevel::Medium,
499 6..=8 => PerformanceRiskLevel::High,
500 _ => PerformanceRiskLevel::Critical,
501 };
502
503 let recommendations = self.generate_performance_recommendations(risk_score, &issues);
504
505 Ok(PerformanceRisk {
506 level: risk_level,
507 score: risk_score,
508 issues,
509 recommendations,
510 })
511 }
512
513 fn generate_performance_recommendations(
515 &self,
516 risk_score: u32,
517 issues: &[String],
518 ) -> Vec<String> {
519 let mut recommendations = Vec::new();
520
521 if risk_score > 5 {
522 recommendations
523 .push("Consider simplifying the pattern to improve performance".to_string());
524 }
525
526 if issues.iter().any(|i| i.contains("recursive")) {
527 recommendations
528 .push("Limit recursive patterns (**/) to essential cases only".to_string());
529 }
530
531 if issues.iter().any(|i| i.contains("wildcards")) {
532 recommendations.push(
533 "Use specific patterns instead of multiple wildcards where possible".to_string(),
534 );
535 }
536
537 if issues.iter().any(|i| i.contains("alternations")) {
538 recommendations.push(
539 "Consider splitting complex alternations into multiple simpler patterns"
540 .to_string(),
541 );
542 }
543
544 recommendations
545 }
546}
547
548#[derive(Debug, Clone, PartialEq)]
550pub struct PerformanceRisk {
551 pub level: PerformanceRiskLevel,
552 pub score: u32,
553 pub issues: Vec<String>,
554 pub recommendations: Vec<String>,
555}
556
557#[derive(Debug, Clone, PartialEq)]
559pub enum PerformanceRiskLevel {
560 Low,
561 Medium,
562 High,
563 Critical,
564}
565
566impl PerformanceRiskLevel {
567 pub fn needs_attention(&self) -> bool {
569 matches!(
570 self,
571 PerformanceRiskLevel::High | PerformanceRiskLevel::Critical
572 )
573 }
574
575 pub fn should_reject(&self) -> bool {
577 matches!(self, PerformanceRiskLevel::Critical)
578 }
579}
580
581pub fn sanitize_pattern(pattern: &str) -> String {
583 let mut sanitized = String::with_capacity(pattern.len());
584 let mut consecutive_wildcards = 0;
585 let mut chars = pattern.chars();
586
587 while let Some(ch) = chars.next() {
588 match ch {
589 '*' => {
590 consecutive_wildcards += 1;
591 if consecutive_wildcards <= 2 {
592 sanitized.push(ch);
593 }
594 }
595 '?' => {
596 consecutive_wildcards = 0;
597 sanitized.push(ch);
598 }
599 '\\' => {
600 sanitized.push(ch);
602 if let Some(next_ch) = chars.next() {
603 sanitized.push(next_ch);
604 }
605 consecutive_wildcards = 0;
606 }
607 _ => {
608 consecutive_wildcards = 0;
609 sanitized.push(ch);
610 }
611 }
612 }
613
614 if sanitized.len() > 1024 {
616 sanitized.truncate(1024);
617 }
618
619 sanitized
620}
621
622#[cfg(test)]
623mod tests {
624 use super::*;
625
626 fn create_validator() -> PatternValidator {
627 PatternValidator::default().unwrap()
628 }
629
630 #[test]
631 fn test_valid_glob_patterns() {
632 let validator = create_validator();
633
634 let valid_patterns = [
635 "*.rs",
636 "src/**/*.rs",
637 "test/[a-z]*.py",
638 "{*.js,*.ts}",
639 "file?.txt",
640 "src/**/lib.rs",
641 ];
642
643 for pattern in &valid_patterns {
644 assert!(
645 validator.validate_glob_pattern(pattern).is_ok(),
646 "Pattern should be valid: {}",
647 pattern
648 );
649 }
650 }
651
652 #[test]
653 fn test_invalid_glob_patterns() {
654 let validator = create_validator();
655
656 let invalid_patterns = [
657 "[", "}", "\\", "[]", "****", "????", ];
664
665 for pattern in &invalid_patterns {
666 assert!(
667 validator.validate_glob_pattern(pattern).is_err(),
668 "Pattern should be invalid: {}",
669 pattern
670 );
671 }
672 }
673
674 #[test]
675 fn test_valid_gitignore_patterns() {
676 let validator = create_validator();
677
678 let valid_patterns = [
679 "*.log",
680 "!important.log",
681 "temp/",
682 "/absolute/path",
683 "# This is a comment",
684 "",
685 " ",
686 ];
687
688 for pattern in &valid_patterns {
689 assert!(
690 validator.validate_gitignore_pattern(pattern).is_ok(),
691 "Gitignore pattern should be valid: {}",
692 pattern
693 );
694 }
695 }
696
697 #[test]
698 fn test_pattern_conflicts() {
699 let validator = create_validator();
700
701 let conflicting_patterns = [
702 "*.rs", "!*.rs", ];
704
705 assert!(validator
706 .validate_patterns(conflicting_patterns.iter())
707 .is_err());
708 }
709
710 #[test]
711 fn test_duplicate_patterns() {
712 let validator = create_validator();
713
714 let duplicate_patterns = [
715 "*.rs", "*.py", "*.rs", ];
717
718 assert!(validator
719 .validate_patterns(duplicate_patterns.iter())
720 .is_err());
721 }
722
723 #[test]
724 fn test_pattern_limits() {
725 let config = ValidationConfig {
726 max_patterns: 2,
727 ..Default::default()
728 };
729 let validator = PatternValidator::new(config).unwrap();
730
731 let too_many_patterns = ["*.rs", "*.py", "*.js"];
732 assert!(validator
733 .validate_patterns(too_many_patterns.iter())
734 .is_err());
735 }
736
737 #[test]
738 fn test_empty_patterns() {
739 let config = ValidationConfig {
740 allow_empty_patterns: false,
741 ..Default::default()
742 };
743 let validator = PatternValidator::new(config).unwrap();
744
745 assert!(validator.validate_glob_pattern("").is_err());
746
747 let config = ValidationConfig {
748 allow_empty_patterns: true,
749 ..Default::default()
750 };
751 let validator = PatternValidator::new(config).unwrap();
752
753 assert!(validator.validate_glob_pattern("").is_ok());
754 }
755
756 #[test]
757 fn test_performance_validation() {
758 let validator = create_validator();
759
760 let low_risk = validator.validate_pattern_performance("*.rs").unwrap();
762 assert_eq!(low_risk.level, PerformanceRiskLevel::Low);
763
764 let high_risk = validator
766 .validate_pattern_performance("**/**/**/**/**/**/*****.rs")
767 .unwrap();
768 assert!(matches!(
769 high_risk.level,
770 PerformanceRiskLevel::High | PerformanceRiskLevel::Critical
771 ));
772 assert!(high_risk.level.needs_attention());
773 }
774
775 #[test]
776 fn test_path_validation() {
777 let validator = create_validator();
778
779 assert!(validator.validate_path("src/main.rs").is_ok());
781 assert!(validator.validate_path("./relative/path").is_ok());
782
783 assert!(validator.validate_path(".").is_ok());
785 }
786
787 #[test]
788 fn test_pattern_sanitization() {
789 assert_eq!(sanitize_pattern("****"), "**");
790 assert_eq!(sanitize_pattern("a****b"), "a**b");
791 assert_eq!(sanitize_pattern("normal.rs"), "normal.rs");
792
793 let long_pattern = "a".repeat(2000);
795 let sanitized = sanitize_pattern(&long_pattern);
796 assert!(sanitized.len() <= 1024);
797 }
798
799 #[test]
800 fn test_escape_sequences() {
801 let validator = create_validator();
802
803 assert!(validator.validate_glob_pattern(r"\*literal\*").is_ok());
805 assert!(validator.validate_glob_pattern(r"file\?.txt").is_ok());
806 assert!(validator.validate_glob_pattern(r"\[not a class\]").is_ok());
807
808 assert!(validator.validate_glob_pattern(r"\z").is_err());
810 }
811
812 #[test]
813 fn test_nested_patterns() {
814 let validator = create_validator();
815
816 assert!(validator.validate_glob_pattern("src/{lib,main}.rs").is_ok());
818 assert!(validator.validate_glob_pattern("test/[a-z]*.py").is_ok());
819
820 assert!(validator.validate_glob_pattern("{{{{{{{{").is_err());
822 assert!(validator.validate_glob_pattern("[[[[[[").is_err());
823 }
824}