1use std::path::Path;
2use std::collections::HashSet;
3use thiserror::Error;
4use regex::Regex;
5
6#[derive(Error, Debug)]
8pub enum ValidationError {
9 #[error("Invalid glob pattern '{pattern}': {reason}")]
10 InvalidGlobPattern {
11 pattern: String,
12 reason: String,
13 },
14
15 #[error("Invalid gitignore pattern '{pattern}': {reason}")]
16 InvalidGitignorePattern {
17 pattern: String,
18 reason: String,
19 },
20
21 #[error("Pattern too complex: {reason}")]
22 PatternTooComplex {
23 reason: String,
24 },
25
26 #[error("Conflicting patterns detected: {conflict}")]
27 ConflictingPatterns {
28 conflict: String,
29 },
30
31 #[error("Invalid path '{path}': {reason}")]
32 InvalidPath {
33 path: String,
34 reason: String,
35 },
36
37 #[error("Pattern limit exceeded: maximum {max} patterns allowed, got {actual}")]
38 PatternLimitExceeded {
39 max: usize,
40 actual: usize,
41 },
42
43 #[error("Empty pattern not allowed")]
44 EmptyPattern,
45
46 #[error("Regex compilation failed for pattern '{pattern}': {source}")]
47 RegexError {
48 pattern: String,
49 #[source]
50 source: regex::Error,
51 },
52
53 #[error("IO error while validating path '{path}': {source}")]
54 IoError {
55 path: String,
56 #[source]
57 source: std::io::Error,
58 },
59}
60
61pub type ValidationResult<T> = Result<T, ValidationError>;
63
64#[derive(Debug, Clone)]
66pub struct ValidationConfig {
67 pub max_patterns: usize,
69 pub max_pattern_length: usize,
71 pub max_glob_depth: usize,
73 pub allow_empty_patterns: bool,
75 pub validate_path_existence: bool,
77 pub check_conflicts: bool,
79 pub max_validation_time_ms: u64,
81}
82
83impl Default for ValidationConfig {
84 fn default() -> Self {
85 Self {
86 max_patterns: 1000,
87 max_pattern_length: 2048,
88 max_glob_depth: 20,
89 allow_empty_patterns: false,
90 validate_path_existence: false,
91 check_conflicts: true,
92 max_validation_time_ms: 5000,
93 }
94 }
95}
96
97pub struct PatternValidator {
99 config: ValidationConfig,
100 glob_regex: Regex,
101 dangerous_patterns: HashSet<String>,
102}
103
104impl PatternValidator {
105 pub fn new(config: ValidationConfig) -> ValidationResult<Self> {
107 let glob_regex = Regex::new(r"[\*\?\[\]{}]")
109 .map_err(|e| ValidationError::RegexError {
110 pattern: r"[\*\?\[\]{}]".to_string(),
111 source: e,
112 })?;
113
114 let mut dangerous_patterns = HashSet::new();
116 dangerous_patterns.insert("**/*/**/*/**/*/**/*/**".to_string()); dangerous_patterns.insert("*".repeat(100)); dangerous_patterns.insert("?".repeat(100)); Ok(Self {
121 config,
122 glob_regex,
123 dangerous_patterns,
124 })
125 }
126
127 pub fn default() -> ValidationResult<Self> {
129 Self::new(ValidationConfig::default())
130 }
131
132 pub fn validate_glob_pattern(&self, pattern: &str) -> ValidationResult<()> {
134 if pattern.is_empty() && !self.config.allow_empty_patterns {
136 return Err(ValidationError::EmptyPattern);
137 }
138
139 if pattern.len() > self.config.max_pattern_length {
141 return Err(ValidationError::InvalidGlobPattern {
142 pattern: pattern.to_string(),
143 reason: format!(
144 "Pattern too long: {} characters (max: {})",
145 pattern.len(),
146 self.config.max_pattern_length
147 ),
148 });
149 }
150
151 if self.dangerous_patterns.contains(pattern) {
153 return Err(ValidationError::PatternTooComplex {
154 reason: "Pattern is known to cause performance issues".to_string(),
155 });
156 }
157
158 self.validate_glob_syntax(pattern)?;
160
161 self.validate_glob_depth(pattern)?;
163
164 self.validate_glob_sequences(pattern)?;
166
167 Ok(())
168 }
169
170 fn validate_glob_syntax(&self, pattern: &str) -> ValidationResult<()> {
172 let mut bracket_depth = 0;
173 let mut brace_depth = 0;
174 let mut chars = pattern.chars().peekable();
175
176 while let Some(ch) = chars.next() {
177 match ch {
178 '[' => {
179 bracket_depth += 1;
180 if bracket_depth > 1 {
181 return Err(ValidationError::InvalidGlobPattern {
182 pattern: pattern.to_string(),
183 reason: "Nested character classes not allowed".to_string(),
184 });
185 }
186 if chars.peek() == Some(&']') {
188 return Err(ValidationError::InvalidGlobPattern {
189 pattern: pattern.to_string(),
190 reason: "Empty character class []".to_string(),
191 });
192 }
193 }
194 ']' => {
195 if bracket_depth == 0 {
196 return Err(ValidationError::InvalidGlobPattern {
197 pattern: pattern.to_string(),
198 reason: "Unmatched closing bracket ']'".to_string(),
199 });
200 }
201 bracket_depth -= 1;
202 }
203 '{' => {
204 brace_depth += 1;
205 if brace_depth > 3 {
206 return Err(ValidationError::InvalidGlobPattern {
207 pattern: pattern.to_string(),
208 reason: "Too many nested braces (max 3)".to_string(),
209 });
210 }
211 }
212 '}' => {
213 if brace_depth == 0 {
214 return Err(ValidationError::InvalidGlobPattern {
215 pattern: pattern.to_string(),
216 reason: "Unmatched closing brace '}'".to_string(),
217 });
218 }
219 brace_depth -= 1;
220 }
221 '\\' => {
222 if let Some(next_ch) = chars.next() {
224 if !matches!(next_ch, '*' | '?' | '[' | ']' | '{' | '}' | '\\' | '/' | '!' | '-' | '^') {
225 return Err(ValidationError::InvalidGlobPattern {
226 pattern: pattern.to_string(),
227 reason: format!("Invalid escape sequence '\\{}'", next_ch),
228 });
229 }
230 } else {
231 return Err(ValidationError::InvalidGlobPattern {
232 pattern: pattern.to_string(),
233 reason: "Trailing backslash".to_string(),
234 });
235 }
236 }
237 _ => {}
238 }
239 }
240
241 if bracket_depth > 0 {
243 return Err(ValidationError::InvalidGlobPattern {
244 pattern: pattern.to_string(),
245 reason: "Unclosed character class '['".to_string(),
246 });
247 }
248
249 if brace_depth > 0 {
250 return Err(ValidationError::InvalidGlobPattern {
251 pattern: pattern.to_string(),
252 reason: "Unclosed brace group '{'".to_string(),
253 });
254 }
255
256 Ok(())
257 }
258
259 fn validate_glob_depth(&self, pattern: &str) -> ValidationResult<()> {
261 let depth = pattern.matches("**/").count() + pattern.matches("/**/").count();
262 if depth > self.config.max_glob_depth {
263 return Err(ValidationError::PatternTooComplex {
264 reason: format!(
265 "Pattern depth {} exceeds maximum {}",
266 depth,
267 self.config.max_glob_depth
268 ),
269 });
270 }
271 Ok(())
272 }
273
274 fn validate_glob_sequences(&self, pattern: &str) -> ValidationResult<()> {
276 if pattern.contains("****") {
278 return Err(ValidationError::InvalidGlobPattern {
279 pattern: pattern.to_string(),
280 reason: "Too many consecutive wildcards".to_string(),
281 });
282 }
283
284 if pattern.contains("????") {
286 return Err(ValidationError::InvalidGlobPattern {
287 pattern: pattern.to_string(),
288 reason: "Too many consecutive single-character wildcards".to_string(),
289 });
290 }
291
292 if pattern.contains("**/**/**/**") {
294 return Err(ValidationError::PatternTooComplex {
295 reason: "Too many recursive directory wildcards".to_string(),
296 });
297 }
298
299 Ok(())
300 }
301
302 pub fn validate_gitignore_pattern(&self, pattern: &str) -> ValidationResult<()> {
304 let trimmed = pattern.trim();
306 if trimmed.starts_with('#') || trimmed.is_empty() {
307 return Ok(());
308 }
309
310 if trimmed.is_empty() && !self.config.allow_empty_patterns {
312 return Err(ValidationError::EmptyPattern);
313 }
314
315 if pattern.len() > self.config.max_pattern_length {
317 return Err(ValidationError::InvalidGitignorePattern {
318 pattern: pattern.to_string(),
319 reason: format!(
320 "Pattern too long: {} characters (max: {})",
321 pattern.len(),
322 self.config.max_pattern_length
323 ),
324 });
325 }
326
327 self.validate_gitignore_syntax(trimmed)?;
329
330 Ok(())
331 }
332
333 fn validate_gitignore_syntax(&self, pattern: &str) -> ValidationResult<()> {
335 let pattern = if pattern.starts_with('!') {
337 &pattern[1..]
338 } else {
339 pattern
340 };
341
342 let pattern = pattern.trim_end_matches('/');
344
345 self.validate_glob_pattern(pattern)?;
347
348 if pattern.contains("**/**/**/**") {
350 return Err(ValidationError::InvalidGitignorePattern {
351 pattern: pattern.to_string(),
352 reason: "Too many recursive directory patterns".to_string(),
353 });
354 }
355
356 Ok(())
357 }
358
359 pub fn validate_patterns<I, S>(&self, patterns: I) -> ValidationResult<()>
361 where
362 I: IntoIterator<Item = S>,
363 S: AsRef<str>,
364 {
365 let patterns: Vec<_> = patterns.into_iter().collect();
366
367 if patterns.len() > self.config.max_patterns {
369 return Err(ValidationError::PatternLimitExceeded {
370 max: self.config.max_patterns,
371 actual: patterns.len(),
372 });
373 }
374
375 for pattern in &patterns {
377 self.validate_glob_pattern(pattern.as_ref())?;
378 }
379
380 if self.config.check_conflicts {
382 self.check_pattern_conflicts(&patterns)?;
383 }
384
385 Ok(())
386 }
387
388 fn check_pattern_conflicts<S: AsRef<str>>(&self, patterns: &[S]) -> ValidationResult<()> {
390 let mut seen_patterns = HashSet::new();
391 let mut include_patterns = HashSet::new();
392 let mut exclude_patterns = HashSet::new();
393
394 for pattern in patterns {
395 let pattern_str = pattern.as_ref();
396
397 if !seen_patterns.insert(pattern_str.to_string()) {
399 return Err(ValidationError::ConflictingPatterns {
400 conflict: format!("Duplicate pattern: '{}'", pattern_str),
401 });
402 }
403
404 if pattern_str.starts_with('!') {
406 exclude_patterns.insert(&pattern_str[1..]);
407 } else {
408 include_patterns.insert(pattern_str);
409 }
410 }
411
412 for include in &include_patterns {
414 if exclude_patterns.contains(include) {
415 return Err(ValidationError::ConflictingPatterns {
416 conflict: format!("Pattern '{}' is both included and excluded", include),
417 });
418 }
419 }
420
421 Ok(())
422 }
423
424 pub fn validate_path<P: AsRef<Path>>(&self, path: P) -> ValidationResult<()> {
426 let path = path.as_ref();
427 let path_str = path.to_string_lossy();
428
429 #[cfg(windows)]
431 {
432 let invalid_chars = ['<', '>', ':', '"', '|', '?', '*'];
433 if path_str.chars().any(|c| invalid_chars.contains(&c)) {
434 return Err(ValidationError::InvalidPath {
435 path: path_str.to_string(),
436 reason: "Contains invalid characters for Windows".to_string(),
437 });
438 }
439 }
440
441 #[cfg(windows)]
443 const MAX_PATH_LEN: usize = 260;
444 #[cfg(not(windows))]
445 const MAX_PATH_LEN: usize = 4096;
446
447 if path_str.len() > MAX_PATH_LEN {
448 return Err(ValidationError::InvalidPath {
449 path: path_str.to_string(),
450 reason: format!("Path too long: {} characters (max: {})", path_str.len(), MAX_PATH_LEN),
451 });
452 }
453
454 if self.config.validate_path_existence && !path.exists() {
456 return Err(ValidationError::InvalidPath {
457 path: path_str.to_string(),
458 reason: "Path does not exist".to_string(),
459 });
460 }
461
462 Ok(())
463 }
464
465 pub fn validate_pattern_performance(&self, pattern: &str) -> ValidationResult<PerformanceRisk> {
467 let mut risk_score = 0;
468 let mut issues = Vec::new();
469
470 let wildcard_count = pattern.matches('*').count();
472 let single_wildcard_count = pattern.matches('?').count();
473
474 if wildcard_count > 10 {
475 risk_score += 3;
476 issues.push("High number of wildcards may impact performance".to_string());
477 }
478
479 if single_wildcard_count > 20 {
480 risk_score += 2;
481 issues.push("High number of single-char wildcards may impact performance".to_string());
482 }
483
484 let recursive_count = pattern.matches("**/").count();
486 if recursive_count > 3 {
487 risk_score += 4;
488 issues.push("Multiple recursive patterns may cause exponential matching time".to_string());
489 }
490
491 let alternation_count = pattern.matches('{').count();
493 if alternation_count > 5 {
494 risk_score += 2;
495 issues.push("Many alternations may increase compilation time".to_string());
496 }
497
498 let char_class_count = pattern.matches('[').count();
500 if char_class_count > 10 {
501 risk_score += 1;
502 issues.push("Many character classes may slow down matching".to_string());
503 }
504
505 let risk_level = match risk_score {
506 0..=2 => PerformanceRiskLevel::Low,
507 3..=5 => PerformanceRiskLevel::Medium,
508 6..=8 => PerformanceRiskLevel::High,
509 _ => PerformanceRiskLevel::Critical,
510 };
511
512 let recommendations = self.generate_performance_recommendations(risk_score, &issues);
513
514 Ok(PerformanceRisk {
515 level: risk_level,
516 score: risk_score,
517 issues,
518 recommendations,
519 })
520 }
521
522 fn generate_performance_recommendations(&self, risk_score: u32, issues: &[String]) -> Vec<String> {
524 let mut recommendations = Vec::new();
525
526 if risk_score > 5 {
527 recommendations.push("Consider simplifying the pattern to improve performance".to_string());
528 }
529
530 if issues.iter().any(|i| i.contains("recursive")) {
531 recommendations.push("Limit recursive patterns (**/) to essential cases only".to_string());
532 }
533
534 if issues.iter().any(|i| i.contains("wildcards")) {
535 recommendations.push("Use specific patterns instead of multiple wildcards where possible".to_string());
536 }
537
538 if issues.iter().any(|i| i.contains("alternations")) {
539 recommendations.push("Consider splitting complex alternations into multiple simpler patterns".to_string());
540 }
541
542 recommendations
543 }
544}
545
546#[derive(Debug, Clone, PartialEq)]
548pub struct PerformanceRisk {
549 pub level: PerformanceRiskLevel,
550 pub score: u32,
551 pub issues: Vec<String>,
552 pub recommendations: Vec<String>,
553}
554
555#[derive(Debug, Clone, PartialEq)]
557pub enum PerformanceRiskLevel {
558 Low,
559 Medium,
560 High,
561 Critical,
562}
563
564impl PerformanceRiskLevel {
565 pub fn needs_attention(&self) -> bool {
567 matches!(self, PerformanceRiskLevel::High | PerformanceRiskLevel::Critical)
568 }
569
570 pub fn should_reject(&self) -> bool {
572 matches!(self, PerformanceRiskLevel::Critical)
573 }
574}
575
576pub fn sanitize_pattern(pattern: &str) -> String {
578 let mut sanitized = String::with_capacity(pattern.len());
579 let mut consecutive_wildcards = 0;
580 let mut chars = pattern.chars();
581
582 while let Some(ch) = chars.next() {
583 match ch {
584 '*' => {
585 consecutive_wildcards += 1;
586 if consecutive_wildcards <= 2 {
587 sanitized.push(ch);
588 }
589 }
590 '?' => {
591 consecutive_wildcards = 0;
592 sanitized.push(ch);
593 }
594 '\\' => {
595 sanitized.push(ch);
597 if let Some(next_ch) = chars.next() {
598 sanitized.push(next_ch);
599 }
600 consecutive_wildcards = 0;
601 }
602 _ => {
603 consecutive_wildcards = 0;
604 sanitized.push(ch);
605 }
606 }
607 }
608
609 if sanitized.len() > 1024 {
611 sanitized.truncate(1024);
612 }
613
614 sanitized
615}
616
617#[cfg(test)]
618mod tests {
619 use super::*;
620
621 fn create_validator() -> PatternValidator {
622 PatternValidator::default().unwrap()
623 }
624
625 #[test]
626 fn test_valid_glob_patterns() {
627 let validator = create_validator();
628
629 let valid_patterns = [
630 "*.rs",
631 "src/**/*.rs",
632 "test/[a-z]*.py",
633 "{*.js,*.ts}",
634 "file?.txt",
635 "src/**/lib.rs",
636 ];
637
638 for pattern in &valid_patterns {
639 assert!(validator.validate_glob_pattern(pattern).is_ok(), "Pattern should be valid: {}", pattern);
640 }
641 }
642
643 #[test]
644 fn test_invalid_glob_patterns() {
645 let validator = create_validator();
646
647 let invalid_patterns = [
648 "[", "}", "\\", "[]", "****", "????", ];
655
656 for pattern in &invalid_patterns {
657 assert!(validator.validate_glob_pattern(pattern).is_err(), "Pattern should be invalid: {}", pattern);
658 }
659 }
660
661 #[test]
662 fn test_valid_gitignore_patterns() {
663 let validator = create_validator();
664
665 let valid_patterns = [
666 "*.log",
667 "!important.log",
668 "temp/",
669 "/absolute/path",
670 "# This is a comment",
671 "",
672 " ",
673 ];
674
675 for pattern in &valid_patterns {
676 assert!(validator.validate_gitignore_pattern(pattern).is_ok(), "Gitignore pattern should be valid: {}", pattern);
677 }
678 }
679
680 #[test]
681 fn test_pattern_conflicts() {
682 let validator = create_validator();
683
684 let conflicting_patterns = [
685 "*.rs",
686 "!*.rs", ];
688
689 assert!(validator.validate_patterns(conflicting_patterns.iter()).is_err());
690 }
691
692 #[test]
693 fn test_duplicate_patterns() {
694 let validator = create_validator();
695
696 let duplicate_patterns = [
697 "*.rs",
698 "*.py",
699 "*.rs", ];
701
702 assert!(validator.validate_patterns(duplicate_patterns.iter()).is_err());
703 }
704
705 #[test]
706 fn test_pattern_limits() {
707 let config = ValidationConfig {
708 max_patterns: 2,
709 ..Default::default()
710 };
711 let validator = PatternValidator::new(config).unwrap();
712
713 let too_many_patterns = ["*.rs", "*.py", "*.js"];
714 assert!(validator.validate_patterns(too_many_patterns.iter()).is_err());
715 }
716
717 #[test]
718 fn test_empty_patterns() {
719 let config = ValidationConfig {
720 allow_empty_patterns: false,
721 ..Default::default()
722 };
723 let validator = PatternValidator::new(config).unwrap();
724
725 assert!(validator.validate_glob_pattern("").is_err());
726
727 let config = ValidationConfig {
728 allow_empty_patterns: true,
729 ..Default::default()
730 };
731 let validator = PatternValidator::new(config).unwrap();
732
733 assert!(validator.validate_glob_pattern("").is_ok());
734 }
735
736 #[test]
737 fn test_performance_validation() {
738 let validator = create_validator();
739
740 let low_risk = validator.validate_pattern_performance("*.rs").unwrap();
742 assert_eq!(low_risk.level, PerformanceRiskLevel::Low);
743
744 let high_risk = validator.validate_pattern_performance("**/**/**/**/**/**/*****.rs").unwrap();
746 assert!(matches!(high_risk.level, PerformanceRiskLevel::High | PerformanceRiskLevel::Critical));
747 assert!(high_risk.level.needs_attention());
748 }
749
750 #[test]
751 fn test_path_validation() {
752 let validator = create_validator();
753
754 assert!(validator.validate_path("src/main.rs").is_ok());
756 assert!(validator.validate_path("./relative/path").is_ok());
757
758 assert!(validator.validate_path(".").is_ok());
760 }
761
762 #[test]
763 fn test_pattern_sanitization() {
764 assert_eq!(sanitize_pattern("****"), "**");
765 assert_eq!(sanitize_pattern("a****b"), "a**b");
766 assert_eq!(sanitize_pattern("normal.rs"), "normal.rs");
767
768 let long_pattern = "a".repeat(2000);
770 let sanitized = sanitize_pattern(&long_pattern);
771 assert!(sanitized.len() <= 1024);
772 }
773
774 #[test]
775 fn test_escape_sequences() {
776 let validator = create_validator();
777
778 assert!(validator.validate_glob_pattern(r"\*literal\*").is_ok());
780 assert!(validator.validate_glob_pattern(r"file\?.txt").is_ok());
781 assert!(validator.validate_glob_pattern(r"\[not a class\]").is_ok());
782
783 assert!(validator.validate_glob_pattern(r"\z").is_err());
785 }
786
787 #[test]
788 fn test_nested_patterns() {
789 let validator = create_validator();
790
791 assert!(validator.validate_glob_pattern("src/{lib,main}.rs").is_ok());
793 assert!(validator.validate_glob_pattern("test/[a-z]*.py").is_ok());
794
795 assert!(validator.validate_glob_pattern("{{{{{{{{").is_err());
797 assert!(validator.validate_glob_pattern("[[[[[[").is_err());
798 }
799}