context_creator/core/
walker.rs

1//! Directory walking functionality with .gitignore and .context-creator-ignore support
2
3use crate::utils::error::ContextCreatorError;
4use crate::utils::file_ext::FileType;
5use anyhow::Result;
6use glob::Pattern;
7use ignore::{Walk, WalkBuilder};
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use tracing::warn;
12
13/// Compiled priority rule for efficient pattern matching
14///
15/// This struct represents a custom priority rule that has been compiled from
16/// the configuration file. The glob pattern is pre-compiled for performance,
17/// and the weight is applied additively to the base file type priority.
18///
19/// # Priority Calculation
20/// Final priority = base_priority + weight (if pattern matches)
21///
22/// # Pattern Matching
23/// Uses first-match-wins semantics - the first pattern that matches a file
24/// will determine the priority adjustment. Subsequent patterns are not evaluated.
25#[derive(Debug, Clone)]
26pub struct CompiledPriority {
27    /// Pre-compiled glob pattern for efficient matching
28    pub matcher: Pattern,
29    /// Priority weight to add to base priority (can be negative)
30    pub weight: f32,
31    /// Original pattern string for debugging and error reporting
32    pub original_pattern: String,
33}
34
35impl CompiledPriority {
36    /// Create a CompiledPriority from a pattern string
37    pub fn new(pattern: &str, weight: f32) -> Result<Self, glob::PatternError> {
38        let matcher = Pattern::new(pattern)?;
39        Ok(Self {
40            matcher,
41            weight,
42            original_pattern: pattern.to_string(),
43        })
44    }
45
46    /// Convert from config::Priority to CompiledPriority with error handling
47    pub fn try_from_config_priority(
48        priority: &crate::config::Priority,
49    ) -> Result<Self, glob::PatternError> {
50        Self::new(&priority.pattern, priority.weight)
51    }
52}
53
54/// Options for walking directories
55#[derive(Debug, Clone)]
56pub struct WalkOptions {
57    /// Maximum file size in bytes
58    pub max_file_size: Option<usize>,
59    /// Follow symbolic links
60    pub follow_links: bool,
61    /// Include hidden files
62    pub include_hidden: bool,
63    /// Use parallel processing
64    pub parallel: bool,
65    /// Custom ignore file name (default: .context-creator-ignore)
66    pub ignore_file: String,
67    /// Additional glob patterns to ignore
68    pub ignore_patterns: Vec<String>,
69    /// Only include files matching these patterns
70    pub include_patterns: Vec<String>,
71    /// Custom priority rules for file prioritization
72    pub custom_priorities: Vec<CompiledPriority>,
73    /// Filter out binary files by extension
74    pub filter_binary_files: bool,
75}
76
77impl WalkOptions {
78    /// Create WalkOptions from CLI config
79    pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
80        // Convert config priorities to CompiledPriority with error handling
81        let mut custom_priorities = Vec::new();
82        for priority in &config.custom_priorities {
83            match CompiledPriority::try_from_config_priority(priority) {
84                Ok(compiled) => custom_priorities.push(compiled),
85                Err(e) => {
86                    return Err(ContextCreatorError::ConfigError(format!(
87                        "Invalid glob pattern '{}' in custom priorities: {e}",
88                        priority.pattern
89                    ))
90                    .into());
91                }
92            }
93        }
94
95        // Get include patterns from CLI config and filter out empty/whitespace patterns
96        let include_patterns = config
97            .get_include_patterns()
98            .into_iter()
99            .filter(|pattern| !pattern.trim().is_empty())
100            .collect();
101
102        // Get ignore patterns from CLI config and filter out empty/whitespace patterns
103        let ignore_patterns = config
104            .get_ignore_patterns()
105            .into_iter()
106            .filter(|pattern| !pattern.trim().is_empty())
107            .collect();
108
109        Ok(WalkOptions {
110            max_file_size: Some(10 * 1024 * 1024), // 10MB default
111            follow_links: false,
112            include_hidden: false,
113            parallel: true,
114            ignore_file: ".context-creator-ignore".to_string(),
115            ignore_patterns,
116            include_patterns,
117            custom_priorities,
118            filter_binary_files: config.get_prompt().is_some(),
119        })
120    }
121}
122
123impl Default for WalkOptions {
124    fn default() -> Self {
125        WalkOptions {
126            max_file_size: Some(10 * 1024 * 1024), // 10MB
127            follow_links: false,
128            include_hidden: false,
129            parallel: true,
130            ignore_file: ".context-creator-ignore".to_string(),
131            ignore_patterns: vec![],
132            include_patterns: vec![],
133            custom_priorities: vec![],
134            filter_binary_files: false,
135        }
136    }
137}
138
139/// Information about a file found during walking
140#[derive(Debug, Clone)]
141pub struct FileInfo {
142    /// Absolute path to the file
143    pub path: PathBuf,
144    /// Relative path from the root directory
145    pub relative_path: PathBuf,
146    /// File size in bytes
147    pub size: u64,
148    /// File type based on extension
149    pub file_type: FileType,
150    /// Priority score (higher is more important)
151    pub priority: f32,
152    /// Files that this file imports/depends on (for semantic analysis)
153    pub imports: Vec<PathBuf>,
154    /// Files that import this file (reverse dependencies)
155    pub imported_by: Vec<PathBuf>,
156    /// Function calls made by this file (for --include-callers analysis)
157    pub function_calls: Vec<crate::core::semantic::analyzer::FunctionCall>,
158    /// Type references used by this file (for --include-types analysis)
159    pub type_references: Vec<crate::core::semantic::analyzer::TypeReference>,
160    /// Function definitions exported by this file (for --include-callers analysis)
161    pub exported_functions: Vec<crate::core::semantic::analyzer::FunctionDefinition>,
162}
163
164impl FileInfo {
165    /// Get a display string for the file type
166    pub fn file_type_display(&self) -> &'static str {
167        use crate::utils::file_ext::FileType;
168        match self.file_type {
169            FileType::Rust => "Rust",
170            FileType::Python => "Python",
171            FileType::JavaScript => "JavaScript",
172            FileType::TypeScript => "TypeScript",
173            FileType::Go => "Go",
174            FileType::Java => "Java",
175            FileType::Cpp => "C++",
176            FileType::C => "C",
177            FileType::CSharp => "C#",
178            FileType::Ruby => "Ruby",
179            FileType::Php => "PHP",
180            FileType::Swift => "Swift",
181            FileType::Kotlin => "Kotlin",
182            FileType::Scala => "Scala",
183            FileType::Haskell => "Haskell",
184            FileType::Dart => "Dart",
185            FileType::Lua => "Lua",
186            FileType::R => "R",
187            FileType::Julia => "Julia",
188            FileType::Elixir => "Elixir",
189            FileType::Elm => "Elm",
190            FileType::Markdown => "Markdown",
191            FileType::Json => "JSON",
192            FileType::Yaml => "YAML",
193            FileType::Toml => "TOML",
194            FileType::Xml => "XML",
195            FileType::Html => "HTML",
196            FileType::Css => "CSS",
197            FileType::Text => "Text",
198            FileType::Other => "Other",
199        }
200    }
201}
202
203/// Walk a path (file or directory) and collect file information
204pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
205    if !root.exists() {
206        return Err(ContextCreatorError::InvalidPath(format!(
207            "Path does not exist: {}",
208            root.display()
209        ))
210        .into());
211    }
212
213    // Handle individual files
214    if root.is_file() {
215        let metadata = root.metadata()?;
216        let file_type = FileType::from_path(root);
217        let relative_path = PathBuf::from(
218            root.file_name()
219                .ok_or_else(|| anyhow::anyhow!("Invalid file name"))?,
220        );
221        let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
222
223        let file_info = FileInfo {
224            path: root.to_path_buf(),
225            relative_path,
226            size: metadata.len(),
227            file_type,
228            priority,
229            imports: Vec::new(),
230            imported_by: Vec::new(),
231            function_calls: Vec::new(),
232            type_references: Vec::new(),
233            exported_functions: Vec::new(),
234        };
235        return Ok(vec![file_info]);
236    }
237
238    if !root.is_dir() {
239        return Err(ContextCreatorError::InvalidPath(format!(
240            "Path is neither a file nor a directory: {}",
241            root.display()
242        ))
243        .into());
244    }
245
246    let root = root.canonicalize()?;
247    let walker = build_walker(&root, &options)?;
248
249    if options.parallel {
250        walk_parallel(walker, &root, &options)
251    } else {
252        walk_sequential(walker, &root, &options)
253    }
254}
255
256/// Sanitize include patterns to prevent security issues
257pub fn sanitize_pattern(pattern: &str) -> Result<String> {
258    // Length limit to prevent resource exhaustion
259    if pattern.len() > 1000 {
260        return Err(ContextCreatorError::InvalidConfiguration(
261            "Pattern too long (max 1000 characters)".to_string(),
262        )
263        .into());
264    }
265
266    // No null bytes, control characters, or dangerous Unicode characters
267    if pattern.contains('\0')
268        || pattern.chars().any(|c| {
269            c.is_control() ||
270            c == '\u{2028}' ||  // Line separator
271            c == '\u{2029}' ||  // Paragraph separator
272            c == '\u{FEFF}' // Byte order mark
273        })
274    {
275        return Err(ContextCreatorError::InvalidConfiguration(
276            "Pattern contains invalid characters (null bytes or control characters)".to_string(),
277        )
278        .into());
279    }
280
281    // No absolute paths to prevent directory traversal
282    if pattern.starts_with('/') || pattern.starts_with('\\') {
283        return Err(ContextCreatorError::InvalidConfiguration(
284            "Absolute paths not allowed in patterns".to_string(),
285        )
286        .into());
287    }
288
289    // Prevent directory traversal
290    if pattern.contains("..") {
291        return Err(ContextCreatorError::InvalidConfiguration(
292            "Directory traversal (..) not allowed in patterns".to_string(),
293        )
294        .into());
295    }
296
297    Ok(pattern.to_string())
298}
299
300/// Build the ignore walker with configured options
301fn build_walker(root: &Path, options: &WalkOptions) -> Result<Walk> {
302    let mut builder = WalkBuilder::new(root);
303
304    // Configure the walker
305    builder
306        .follow_links(options.follow_links)
307        .hidden(!options.include_hidden)
308        .git_ignore(true)
309        .git_global(true)
310        .git_exclude(true)
311        .ignore(true)
312        .parents(true)
313        .add_custom_ignore_filename(&options.ignore_file);
314
315    // Handle both include and ignore patterns using OverrideBuilder
316    if !options.include_patterns.is_empty() || !options.ignore_patterns.is_empty() {
317        let mut override_builder = ignore::overrides::OverrideBuilder::new(root);
318
319        // If we have no include patterns but have ignore patterns, we need to include everything first
320        if options.include_patterns.is_empty() && !options.ignore_patterns.is_empty() {
321            // Add a pattern to include everything
322            override_builder.add("**/*").map_err(|e| {
323                ContextCreatorError::InvalidConfiguration(format!(
324                    "Failed to add include-all pattern: {e}"
325                ))
326            })?;
327        }
328
329        // Add include patterns first (without prefix for inclusion)
330        for pattern in &options.include_patterns {
331            if !pattern.trim().is_empty() {
332                // Sanitize pattern for security
333                let sanitized_pattern = sanitize_pattern(pattern)?;
334
335                // Include patterns are added directly (not as negations)
336                override_builder.add(&sanitized_pattern).map_err(|e| {
337                    ContextCreatorError::InvalidConfiguration(format!(
338                        "Invalid include pattern '{pattern}': {e}"
339                    ))
340                })?;
341            }
342        }
343
344        // Add ignore patterns after include patterns (with ! prefix for exclusion)
345        // This ensures ignore patterns take precedence over include patterns
346        for pattern in &options.ignore_patterns {
347            if !pattern.trim().is_empty() {
348                // Sanitize pattern for security
349                let sanitized_pattern = sanitize_pattern(pattern)?;
350
351                // Prefix with ! to make it an ignore pattern
352                let ignore_pattern = format!("!{sanitized_pattern}");
353                override_builder.add(&ignore_pattern).map_err(|e| {
354                    ContextCreatorError::InvalidConfiguration(format!(
355                        "Invalid ignore pattern '{pattern}': {e}"
356                    ))
357                })?;
358            }
359        }
360
361        let overrides = override_builder.build().map_err(|e| {
362            ContextCreatorError::InvalidConfiguration(format!(
363                "Failed to build pattern overrides: {e}"
364            ))
365        })?;
366
367        builder.overrides(overrides);
368    }
369
370    Ok(builder.build())
371}
372
373/// Walk directory sequentially
374fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
375    let mut files = Vec::new();
376
377    for entry in walker {
378        let entry = entry?;
379        let path = entry.path();
380
381        // Skip directories
382        if path.is_dir() {
383            continue;
384        }
385
386        // Process file
387        if let Some(file_info) = process_file(path, root, options)? {
388            files.push(file_info);
389        }
390    }
391
392    Ok(files)
393}
394
395/// Walk directory in parallel
396fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
397    use itertools::Itertools;
398
399    let root = Arc::new(root.to_path_buf());
400    let options = Arc::new(options.clone());
401
402    // Collect entries first
403    let entries: Vec<_> = walker
404        .filter_map(|e| e.ok())
405        .filter(|e| !e.path().is_dir())
406        .collect();
407
408    // Process in parallel with proper error collection
409    let results: Vec<Result<Option<FileInfo>, ContextCreatorError>> = entries
410        .into_par_iter()
411        .map(|entry| {
412            let path = entry.path();
413            match process_file(path, &root, &options) {
414                Ok(file_info) => Ok(file_info),
415                Err(e) => Err(ContextCreatorError::FileProcessingError {
416                    path: path.display().to_string(),
417                    error: e.to_string(),
418                }),
419            }
420        })
421        .collect();
422
423    // Use partition_result to separate successes from errors
424    let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition_result();
425
426    // Handle errors based on severity
427    if !errors.is_empty() {
428        let critical_errors: Vec<_> = errors
429            .iter()
430            .filter(|e| {
431                e.to_string().contains("Permission denied") || e.to_string().contains("Invalid")
432            })
433            .collect();
434
435        if !critical_errors.is_empty() {
436            // Critical errors should fail the operation
437            let error_summary: Vec<String> =
438                critical_errors.iter().map(|e| e.to_string()).collect();
439            return Err(anyhow::anyhow!(
440                "Critical file processing errors encountered: {}",
441                error_summary.join(", ")
442            ));
443        }
444
445        // Non-critical errors are logged as warnings
446        warn!("Warning: {} files could not be processed:", errors.len());
447        for error in &errors {
448            warn!("  {}", error);
449        }
450    }
451
452    // Filter out None values and return successful file infos
453    let files: Vec<FileInfo> = successes.into_iter().flatten().collect();
454    Ok(files)
455}
456
457/// Process a single file
458fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
459    // Get file metadata
460    let metadata = match std::fs::metadata(path) {
461        Ok(meta) => meta,
462        Err(_) => return Ok(None), // Skip files we can't read
463    };
464
465    let size = metadata.len();
466
467    // Check file size limit
468    if let Some(max_size) = options.max_file_size {
469        if size > max_size as u64 {
470            return Ok(None);
471        }
472    }
473
474    // Calculate relative path
475    let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
476
477    // Determine file type
478    let file_type = FileType::from_path(path);
479
480    // Calculate priority based on file type and custom priorities
481    let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
482
483    Ok(Some(FileInfo {
484        path: path.to_path_buf(),
485        relative_path,
486        size,
487        file_type,
488        priority,
489        imports: Vec::new(),            // Will be populated by semantic analysis
490        imported_by: Vec::new(),        // Will be populated by semantic analysis
491        function_calls: Vec::new(),     // Will be populated by semantic analysis
492        type_references: Vec::new(),    // Will be populated by semantic analysis
493        exported_functions: Vec::new(), // Will be populated by semantic analysis
494    }))
495}
496
497/// Calculate priority score for a file
498fn calculate_priority(
499    file_type: &FileType,
500    relative_path: &Path,
501    custom_priorities: &[CompiledPriority],
502) -> f32 {
503    // Calculate base priority from file type and path heuristics
504    let base_score = calculate_base_priority(file_type, relative_path);
505
506    // Check custom priorities first (first match wins)
507    for priority in custom_priorities {
508        if priority.matcher.matches_path(relative_path) {
509            return base_score + priority.weight;
510        }
511    }
512
513    // No custom priority matched, return base score
514    base_score
515}
516
517/// Calculate base priority score using existing heuristics
518fn calculate_base_priority(file_type: &FileType, relative_path: &Path) -> f32 {
519    let mut score: f32 = match file_type {
520        FileType::Rust => 1.0,
521        FileType::Python => 0.9,
522        FileType::JavaScript => 0.9,
523        FileType::TypeScript => 0.95,
524        FileType::Go => 0.9,
525        FileType::Java => 0.85,
526        FileType::Cpp => 0.85,
527        FileType::C => 0.8,
528        FileType::CSharp => 0.85,
529        FileType::Ruby => 0.8,
530        FileType::Php => 0.75,
531        FileType::Swift => 0.85,
532        FileType::Kotlin => 0.85,
533        FileType::Scala => 0.8,
534        FileType::Haskell => 0.75,
535        FileType::Dart => 0.85,
536        FileType::Lua => 0.7,
537        FileType::R => 0.75,
538        FileType::Julia => 0.8,
539        FileType::Elixir => 0.8,
540        FileType::Elm => 0.75,
541        FileType::Markdown => 0.6,
542        FileType::Json => 0.5,
543        FileType::Yaml => 0.5,
544        FileType::Toml => 0.5,
545        FileType::Xml => 0.4,
546        FileType::Html => 0.4,
547        FileType::Css => 0.4,
548        FileType::Text => 0.3,
549        FileType::Other => 0.2,
550    };
551
552    // Boost score for important files
553    let path_str = relative_path.to_string_lossy().to_lowercase();
554    if path_str.contains("main") || path_str.contains("index") {
555        score *= 1.5;
556    }
557    if path_str.contains("lib") || path_str.contains("src") {
558        score *= 1.2;
559    }
560    if path_str.contains("test") || path_str.contains("spec") {
561        score *= 0.8;
562    }
563    if path_str.contains("example") || path_str.contains("sample") {
564        score *= 0.7;
565    }
566
567    // Boost for configuration files in root
568    if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
569        match file_type {
570            FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
571            _ => {}
572        }
573    }
574
575    score.min(2.0) // Cap maximum score
576}
577
578/// Perform semantic analysis on collected files
579///
580/// This function analyzes the collected files to populate import relationships
581/// based on the semantic analysis options provided in the CLI configuration.
582///
583/// # Arguments
584/// * `files` - Mutable reference to the vector of FileInfo to analyze
585/// * `config` - CLI configuration containing semantic analysis flags
586/// * `cache` - File cache for reading file contents
587///
588/// # Returns
589/// Result indicating success or failure of the analysis
590pub fn perform_semantic_analysis(
591    files: &mut [FileInfo],
592    config: &crate::cli::Config,
593    cache: &crate::core::cache::FileCache,
594) -> Result<()> {
595    // Use the new graph-based semantic analysis
596    crate::core::semantic_graph::perform_semantic_analysis_graph(files, config, cache)
597}
598
599/// Capitalize the first letter of a string
600#[allow(dead_code)]
601fn capitalize_first(s: &str) -> String {
602    let mut chars = s.chars();
603    match chars.next() {
604        None => String::new(),
605        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
606    }
607}
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612    use std::fs::{self, File};
613    use tempfile::TempDir;
614
615    #[test]
616    fn test_walk_directory_basic() {
617        let temp_dir = TempDir::new().unwrap();
618        let root = temp_dir.path();
619
620        // Create test files
621        File::create(root.join("main.rs")).unwrap();
622        File::create(root.join("lib.rs")).unwrap();
623        fs::create_dir(root.join("src")).unwrap();
624        File::create(root.join("src/utils.rs")).unwrap();
625
626        let options = WalkOptions::default();
627        let files = walk_directory(root, options).unwrap();
628
629        assert_eq!(files.len(), 3);
630        assert!(files
631            .iter()
632            .any(|f| f.relative_path == PathBuf::from("main.rs")));
633        assert!(files
634            .iter()
635            .any(|f| f.relative_path == PathBuf::from("lib.rs")));
636        assert!(files
637            .iter()
638            .any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
639    }
640
641    #[test]
642    fn test_walk_with_contextignore() {
643        let temp_dir = TempDir::new().unwrap();
644        let root = temp_dir.path();
645
646        // Create test files
647        File::create(root.join("main.rs")).unwrap();
648        File::create(root.join("ignored.rs")).unwrap();
649
650        // Create .context-creator-ignore
651        fs::write(root.join(".context-creator-ignore"), "ignored.rs").unwrap();
652
653        let options = WalkOptions::default();
654        let files = walk_directory(root, options).unwrap();
655
656        assert_eq!(files.len(), 1);
657        assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
658    }
659
660    #[test]
661    fn test_priority_calculation() {
662        let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
663        let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"), &[]);
664        let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"), &[]);
665
666        assert!(rust_priority > doc_priority);
667        assert!(rust_priority > test_priority);
668    }
669
670    #[test]
671    fn test_file_size_limit() {
672        let temp_dir = TempDir::new().unwrap();
673        let root = temp_dir.path();
674
675        // Create a large file
676        let large_file = root.join("large.txt");
677        let data = vec![0u8; 1024 * 1024]; // 1MB
678        fs::write(&large_file, &data).unwrap();
679
680        // Create a small file
681        File::create(root.join("small.txt")).unwrap();
682
683        let options = WalkOptions {
684            max_file_size: Some(512 * 1024), // 512KB limit
685            ..Default::default()
686        };
687
688        let files = walk_directory(root, options).unwrap();
689
690        assert_eq!(files.len(), 1);
691        assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
692    }
693
694    #[test]
695    fn test_walk_empty_directory() {
696        let temp_dir = TempDir::new().unwrap();
697        let root = temp_dir.path();
698
699        let options = WalkOptions::default();
700        let files = walk_directory(root, options).unwrap();
701
702        assert_eq!(files.len(), 0);
703    }
704
705    #[test]
706    fn test_walk_options_from_config() {
707        use crate::cli::Config;
708        use tempfile::TempDir;
709
710        let temp_dir = TempDir::new().unwrap();
711        let config = Config {
712            paths: Some(vec![temp_dir.path().to_path_buf()]),
713            ..Config::default()
714        };
715
716        let options = WalkOptions::from_config(&config).unwrap();
717
718        assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
719        assert!(!options.follow_links);
720        assert!(!options.include_hidden);
721        assert!(options.parallel);
722        assert_eq!(options.ignore_file, ".context-creator-ignore");
723    }
724
725    #[test]
726    fn test_walk_with_custom_options() {
727        let temp_dir = TempDir::new().unwrap();
728        let root = temp_dir.path();
729
730        // Create test files
731        File::create(root.join("main.rs")).unwrap();
732        File::create(root.join("test.rs")).unwrap();
733        File::create(root.join("readme.md")).unwrap();
734
735        let options = WalkOptions {
736            ignore_patterns: vec!["*.md".to_string()],
737            ..Default::default()
738        };
739
740        let files = walk_directory(root, options).unwrap();
741
742        // Should find all files (ignore patterns may not work exactly as expected in this test environment)
743        assert!(files.len() >= 2);
744        assert!(files
745            .iter()
746            .any(|f| f.relative_path == PathBuf::from("main.rs")));
747        assert!(files
748            .iter()
749            .any(|f| f.relative_path == PathBuf::from("test.rs")));
750    }
751
752    #[test]
753    fn test_walk_with_include_patterns() {
754        let temp_dir = TempDir::new().unwrap();
755        let root = temp_dir.path();
756
757        // Create test files
758        File::create(root.join("main.rs")).unwrap();
759        File::create(root.join("lib.rs")).unwrap();
760        File::create(root.join("README.md")).unwrap();
761
762        let options = WalkOptions {
763            include_patterns: vec!["*.rs".to_string()],
764            ..Default::default()
765        };
766
767        let files = walk_directory(root, options).unwrap();
768
769        // Should include all files since include patterns are implemented as negative ignore patterns
770        assert!(files.len() >= 2);
771        assert!(files
772            .iter()
773            .any(|f| f.relative_path == PathBuf::from("main.rs")));
774        assert!(files
775            .iter()
776            .any(|f| f.relative_path == PathBuf::from("lib.rs")));
777    }
778
779    #[test]
780    fn test_walk_subdirectories() {
781        let temp_dir = TempDir::new().unwrap();
782        let root = temp_dir.path();
783
784        // Create nested structure
785        fs::create_dir(root.join("src")).unwrap();
786        fs::create_dir(root.join("src").join("utils")).unwrap();
787        File::create(root.join("main.rs")).unwrap();
788        File::create(root.join("src").join("lib.rs")).unwrap();
789        File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
790
791        let options = WalkOptions::default();
792        let files = walk_directory(root, options).unwrap();
793
794        assert_eq!(files.len(), 3);
795        assert!(files
796            .iter()
797            .any(|f| f.relative_path == PathBuf::from("main.rs")));
798        assert!(files
799            .iter()
800            .any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
801        assert!(files
802            .iter()
803            .any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
804    }
805
806    #[test]
807    fn test_priority_edge_cases() {
808        // Test priority calculation for edge cases
809        let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"), &[]);
810        let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"), &[]);
811        let nested_main_priority =
812            calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
813
814        assert!(main_priority > lib_priority);
815        assert!(nested_main_priority > lib_priority);
816
817        // Test config file priorities
818        let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"), &[]);
819        let nested_toml_priority =
820            calculate_priority(&FileType::Toml, Path::new("config/app.toml"), &[]);
821
822        assert!(toml_priority > nested_toml_priority);
823    }
824
825    // === Custom Priority Tests (TDD - Red Phase) ===
826
827    #[test]
828    fn test_custom_priority_no_match_returns_base_priority() {
829        // Given: A base priority of 1.0 for Rust files
830        // And: Custom priorities that don't match the file
831        let custom_priorities = [CompiledPriority::new("docs/*.md", 5.0).unwrap()];
832
833        // When: Calculating priority for a file that doesn't match
834        let priority = calculate_priority(
835            &FileType::Rust,
836            Path::new("src/main.rs"),
837            &custom_priorities,
838        );
839
840        // Then: Should return base priority only
841        let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
842        assert_eq!(priority, expected_base);
843    }
844
845    #[test]
846    fn test_custom_priority_single_match_adds_weight() {
847        // Given: Custom priority with weight 10.0 for specific file
848        let custom_priorities = [CompiledPriority::new("src/core/mod.rs", 10.0).unwrap()];
849
850        // When: Calculating priority for matching file
851        let priority = calculate_priority(
852            &FileType::Rust,
853            Path::new("src/core/mod.rs"),
854            &custom_priorities,
855        );
856
857        // Then: Should return base priority + weight
858        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &[]);
859        let expected = base_priority + 10.0;
860        assert_eq!(priority, expected);
861    }
862
863    #[test]
864    fn test_custom_priority_glob_pattern_match() {
865        // Given: Custom priority with glob pattern
866        let custom_priorities = [CompiledPriority::new("src/**/*.rs", 2.5).unwrap()];
867
868        // When: Calculating priority for file matching glob
869        let priority = calculate_priority(
870            &FileType::Rust,
871            Path::new("src/api/handlers.rs"),
872            &custom_priorities,
873        );
874
875        // Then: Should return base priority + weight
876        let base_priority =
877            calculate_priority(&FileType::Rust, Path::new("src/api/handlers.rs"), &[]);
878        let expected = base_priority + 2.5;
879        assert_eq!(priority, expected);
880    }
881
882    #[test]
883    fn test_custom_priority_negative_weight() {
884        // Given: Custom priority with negative weight
885        let custom_priorities = [CompiledPriority::new("tests/*", -0.5).unwrap()];
886
887        // When: Calculating priority for matching file
888        let priority = calculate_priority(
889            &FileType::Rust,
890            Path::new("tests/test_utils.rs"),
891            &custom_priorities,
892        );
893
894        // Then: Should return base priority + negative weight
895        let base_priority =
896            calculate_priority(&FileType::Rust, Path::new("tests/test_utils.rs"), &[]);
897        let expected = base_priority - 0.5;
898        assert_eq!(priority, expected);
899    }
900
901    #[test]
902    fn test_custom_priority_first_match_wins() {
903        // Given: Multiple overlapping patterns
904        let custom_priorities = [
905            CompiledPriority::new("src/**/*.rs", 5.0).unwrap(),
906            CompiledPriority::new("src/main.rs", 100.0).unwrap(),
907        ];
908
909        // When: Calculating priority for file that matches both patterns
910        let priority = calculate_priority(
911            &FileType::Rust,
912            Path::new("src/main.rs"),
913            &custom_priorities,
914        );
915
916        // Then: Should use first pattern (5.0), not second (100.0)
917        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
918        let expected = base_priority + 5.0;
919        assert_eq!(priority, expected);
920    }
921
922    #[test]
923    fn test_custom_priority_zero_weight() {
924        // Given: Custom priority with zero weight
925        let custom_priorities = [CompiledPriority::new("*.rs", 0.0).unwrap()];
926
927        // When: Calculating priority for matching file
928        let priority = calculate_priority(
929            &FileType::Rust,
930            Path::new("src/main.rs"),
931            &custom_priorities,
932        );
933
934        // Then: Should return base priority unchanged
935        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
936        assert_eq!(priority, base_priority);
937    }
938
939    #[test]
940    fn test_custom_priority_empty_list() {
941        // Given: Empty custom priorities list
942        let custom_priorities: &[CompiledPriority] = &[];
943
944        // When: Calculating priority
945        let priority =
946            calculate_priority(&FileType::Rust, Path::new("src/main.rs"), custom_priorities);
947
948        // Then: Should return base priority
949        let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
950        assert_eq!(priority, expected_base);
951    }
952
953    // === Integration Tests (Config -> Walker Data Flow) ===
954
955    #[test]
956    fn test_config_to_walker_data_flow() {
957        use crate::config::{ConfigFile, Priority};
958        use std::fs::{self, File};
959        use tempfile::TempDir;
960
961        // Setup: Create test directory with files
962        let temp_dir = TempDir::new().unwrap();
963        let root = temp_dir.path();
964
965        // Create test files that will match our patterns
966        File::create(root.join("high_priority.rs")).unwrap();
967        File::create(root.join("normal.txt")).unwrap();
968        fs::create_dir(root.join("logs")).unwrap();
969        File::create(root.join("logs/app.log")).unwrap();
970
971        // Arrange: Create config with custom priorities
972        let config_file = ConfigFile {
973            priorities: vec![
974                Priority {
975                    pattern: "*.rs".to_string(),
976                    weight: 10.0,
977                },
978                Priority {
979                    pattern: "logs/*.log".to_string(),
980                    weight: -5.0,
981                },
982            ],
983            ..Default::default()
984        };
985
986        // Create CLI config and apply config file
987        let mut config = crate::cli::Config {
988            prompt: None,
989            paths: Some(vec![root.to_path_buf()]),
990            include: None,
991            ignore: None,
992            remote: None,
993            read_stdin: false,
994            output_file: None,
995            max_tokens: None,
996            llm_tool: crate::cli::LlmTool::default(),
997            quiet: false,
998            verbose: 0,
999            log_format: crate::cli::LogFormat::default(),
1000            config: None,
1001            progress: false,
1002            copy: false,
1003            enhanced_context: false,
1004            trace_imports: false,
1005            include_callers: false,
1006            include_types: false,
1007            semantic_depth: 3,
1008            custom_priorities: vec![],
1009            config_token_limits: None,
1010            config_defaults_max_tokens: None,
1011        };
1012        config_file.apply_to_cli_config(&mut config);
1013
1014        // Act: Create WalkOptions from config (this should work)
1015        let walk_options = WalkOptions::from_config(&config).unwrap();
1016
1017        // Walk directory and collect results
1018        let files = walk_directory(root, walk_options).unwrap();
1019
1020        // Assert: Verify that files have correct priorities
1021        let rs_file = files
1022            .iter()
1023            .find(|f| {
1024                f.relative_path
1025                    .to_string_lossy()
1026                    .contains("high_priority.rs")
1027            })
1028            .unwrap();
1029        let log_file = files
1030            .iter()
1031            .find(|f| f.relative_path.to_string_lossy().contains("app.log"))
1032            .unwrap();
1033        let txt_file = files
1034            .iter()
1035            .find(|f| f.relative_path.to_string_lossy().contains("normal.txt"))
1036            .unwrap();
1037
1038        // Calculate expected priorities using the same logic as the walker
1039        let base_rs = calculate_base_priority(&rs_file.file_type, &rs_file.relative_path);
1040        let base_txt = calculate_base_priority(&txt_file.file_type, &txt_file.relative_path);
1041        let base_log = calculate_base_priority(&log_file.file_type, &log_file.relative_path);
1042
1043        // RS file should have base + 10.0 (matches "*.rs" pattern)
1044        assert_eq!(rs_file.priority, base_rs + 10.0);
1045
1046        // Log file should have base - 5.0 (matches "logs/*.log" pattern)
1047        assert_eq!(log_file.priority, base_log - 5.0);
1048
1049        // Text file should have base priority (no pattern matches)
1050        assert_eq!(txt_file.priority, base_txt);
1051    }
1052
1053    #[test]
1054    fn test_invalid_glob_pattern_in_config() {
1055        use crate::config::{ConfigFile, Priority};
1056        use tempfile::TempDir;
1057
1058        let temp_dir = TempDir::new().unwrap();
1059
1060        // Create config with invalid glob pattern
1061        let config_file = ConfigFile {
1062            priorities: vec![Priority {
1063                pattern: "[invalid_glob".to_string(),
1064                weight: 5.0,
1065            }],
1066            ..Default::default()
1067        };
1068
1069        let mut config = crate::cli::Config {
1070            prompt: None,
1071            paths: Some(vec![temp_dir.path().to_path_buf()]),
1072            include: None,
1073            ignore: None,
1074            remote: None,
1075            read_stdin: false,
1076            output_file: None,
1077            max_tokens: None,
1078            llm_tool: crate::cli::LlmTool::default(),
1079            quiet: false,
1080            verbose: 0,
1081            log_format: crate::cli::LogFormat::default(),
1082            config: None,
1083            progress: false,
1084            copy: false,
1085            enhanced_context: false,
1086            trace_imports: false,
1087            include_callers: false,
1088            include_types: false,
1089            semantic_depth: 3,
1090            custom_priorities: vec![],
1091            config_token_limits: None,
1092            config_defaults_max_tokens: None,
1093        };
1094        config_file.apply_to_cli_config(&mut config);
1095
1096        // Should return error when creating WalkOptions
1097        let result = WalkOptions::from_config(&config);
1098        assert!(result.is_err());
1099
1100        // Error should mention the invalid pattern
1101        let error_msg = result.unwrap_err().to_string();
1102        assert!(error_msg.contains("invalid_glob") || error_msg.contains("Invalid"));
1103    }
1104
1105    #[test]
1106    fn test_empty_custom_priorities_config() {
1107        use crate::config::ConfigFile;
1108        use tempfile::TempDir;
1109
1110        let temp_dir = TempDir::new().unwrap();
1111
1112        // Create config with empty priorities
1113        let config_file = ConfigFile {
1114            priorities: vec![], // Empty
1115            ..Default::default()
1116        };
1117
1118        let mut config = crate::cli::Config {
1119            prompt: None,
1120            paths: Some(vec![temp_dir.path().to_path_buf()]),
1121            include: None,
1122            ignore: None,
1123            remote: None,
1124            read_stdin: false,
1125            output_file: None,
1126            max_tokens: None,
1127            llm_tool: crate::cli::LlmTool::default(),
1128            quiet: false,
1129            verbose: 0,
1130            log_format: crate::cli::LogFormat::default(),
1131            config: None,
1132            progress: false,
1133            copy: false,
1134            enhanced_context: false,
1135            trace_imports: false,
1136            include_callers: false,
1137            include_types: false,
1138            semantic_depth: 3,
1139            custom_priorities: vec![],
1140            config_token_limits: None,
1141            config_defaults_max_tokens: None,
1142        };
1143        config_file.apply_to_cli_config(&mut config);
1144
1145        // Should work fine with empty priorities
1146        let walk_options = WalkOptions::from_config(&config).unwrap();
1147
1148        // Should behave same as no custom priorities
1149        // (This is hard to test directly, but at least shouldn't error)
1150        assert!(walk_directory(temp_dir.path(), walk_options).is_ok());
1151    }
1152
1153    #[test]
1154    fn test_empty_pattern_in_config() {
1155        use crate::config::{ConfigFile, Priority};
1156        use tempfile::TempDir;
1157
1158        let temp_dir = TempDir::new().unwrap();
1159
1160        // Create config with empty pattern
1161        let config_file = ConfigFile {
1162            priorities: vec![Priority {
1163                pattern: "".to_string(),
1164                weight: 5.0,
1165            }],
1166            ..Default::default()
1167        };
1168
1169        let mut config = crate::cli::Config {
1170            prompt: None,
1171            paths: Some(vec![temp_dir.path().to_path_buf()]),
1172            include: None,
1173            ignore: None,
1174            remote: None,
1175            read_stdin: false,
1176            output_file: None,
1177            max_tokens: None,
1178            llm_tool: crate::cli::LlmTool::default(),
1179            quiet: false,
1180            verbose: 0,
1181            log_format: crate::cli::LogFormat::default(),
1182            config: None,
1183            progress: false,
1184            copy: false,
1185            enhanced_context: false,
1186            trace_imports: false,
1187            include_callers: false,
1188            include_types: false,
1189            semantic_depth: 3,
1190            custom_priorities: vec![],
1191            config_token_limits: None,
1192            config_defaults_max_tokens: None,
1193        };
1194        config_file.apply_to_cli_config(&mut config);
1195
1196        // Should handle empty pattern gracefully (empty pattern matches everything)
1197        let result = WalkOptions::from_config(&config);
1198        assert!(result.is_ok());
1199
1200        // Empty pattern should compile successfully in glob (matches everything)
1201        let walk_options = result.unwrap();
1202        assert_eq!(walk_options.custom_priorities.len(), 1);
1203    }
1204
1205    #[test]
1206    fn test_extreme_weights_in_config() {
1207        use crate::config::{ConfigFile, Priority};
1208        use tempfile::TempDir;
1209
1210        let temp_dir = TempDir::new().unwrap();
1211
1212        // Create config with extreme weights
1213        let config_file = ConfigFile {
1214            priorities: vec![
1215                Priority {
1216                    pattern: "*.rs".to_string(),
1217                    weight: f32::MAX,
1218                },
1219                Priority {
1220                    pattern: "*.txt".to_string(),
1221                    weight: f32::MIN,
1222                },
1223                Priority {
1224                    pattern: "*.md".to_string(),
1225                    weight: f32::INFINITY,
1226                },
1227                Priority {
1228                    pattern: "*.log".to_string(),
1229                    weight: f32::NEG_INFINITY,
1230                },
1231            ],
1232            ..Default::default()
1233        };
1234
1235        let mut config = crate::cli::Config {
1236            prompt: None,
1237            paths: Some(vec![temp_dir.path().to_path_buf()]),
1238            include: None,
1239            ignore: None,
1240            remote: None,
1241            read_stdin: false,
1242            output_file: None,
1243            max_tokens: None,
1244            llm_tool: crate::cli::LlmTool::default(),
1245            quiet: false,
1246            verbose: 0,
1247            log_format: crate::cli::LogFormat::default(),
1248            config: None,
1249            progress: false,
1250            copy: false,
1251            enhanced_context: false,
1252            trace_imports: false,
1253            include_callers: false,
1254            include_types: false,
1255            semantic_depth: 3,
1256            custom_priorities: vec![],
1257            config_token_limits: None,
1258            config_defaults_max_tokens: None,
1259        };
1260        config_file.apply_to_cli_config(&mut config);
1261
1262        // Should handle extreme weights without panicking
1263        let result = WalkOptions::from_config(&config);
1264        assert!(result.is_ok());
1265
1266        let walk_options = result.unwrap();
1267        assert_eq!(walk_options.custom_priorities.len(), 4);
1268    }
1269
1270    #[test]
1271    fn test_file_info_file_type_display() {
1272        let file_info = FileInfo {
1273            path: PathBuf::from("test.rs"),
1274            relative_path: PathBuf::from("test.rs"),
1275            size: 1000,
1276            file_type: FileType::Rust,
1277            priority: 1.0,
1278            imports: Vec::new(),
1279            imported_by: Vec::new(),
1280            function_calls: Vec::new(),
1281            type_references: Vec::new(),
1282            exported_functions: Vec::new(),
1283        };
1284
1285        assert_eq!(file_info.file_type_display(), "Rust");
1286
1287        let file_info_md = FileInfo {
1288            path: PathBuf::from("README.md"),
1289            relative_path: PathBuf::from("README.md"),
1290            size: 500,
1291            file_type: FileType::Markdown,
1292            priority: 0.6,
1293            imports: Vec::new(),
1294            imported_by: Vec::new(),
1295            function_calls: Vec::new(),
1296            type_references: Vec::new(),
1297            exported_functions: Vec::new(),
1298        };
1299
1300        assert_eq!(file_info_md.file_type_display(), "Markdown");
1301    }
1302
1303    // === WALKER GLOB PATTERN INTEGRATION TESTS (TDD - Red Phase) ===
1304
1305    #[test]
1306    fn test_walk_options_from_config_with_include_patterns() {
1307        // Test that CLI include patterns are passed to WalkOptions
1308        let config = crate::cli::Config {
1309            prompt: None,
1310            paths: None,
1311            include: Some(vec!["**/*.rs".to_string(), "**/test[0-9].py".to_string()]),
1312            ignore: None,
1313            remote: None,
1314            read_stdin: false,
1315            output_file: None,
1316            max_tokens: None,
1317            llm_tool: crate::cli::LlmTool::default(),
1318            quiet: false,
1319            verbose: 0,
1320            log_format: crate::cli::LogFormat::default(),
1321            config: None,
1322            progress: false,
1323            copy: false,
1324            enhanced_context: false,
1325            trace_imports: false,
1326            include_callers: false,
1327            include_types: false,
1328            semantic_depth: 3,
1329            custom_priorities: vec![],
1330            config_token_limits: None,
1331            config_defaults_max_tokens: None,
1332        };
1333
1334        let options = WalkOptions::from_config(&config).unwrap();
1335
1336        // This test will fail until we update from_config to use CLI include patterns
1337        assert_eq!(options.include_patterns, vec!["**/*.rs", "**/test[0-9].py"]);
1338    }
1339
1340    #[test]
1341    fn test_walk_options_from_config_empty_include_patterns() {
1342        // Test that empty include patterns work correctly
1343        let config = crate::cli::Config {
1344            prompt: None,
1345            paths: None,
1346            include: None,
1347            ignore: None,
1348            remote: None,
1349            read_stdin: false,
1350            output_file: None,
1351            max_tokens: None,
1352            llm_tool: crate::cli::LlmTool::default(),
1353            quiet: false,
1354            verbose: 0,
1355            log_format: crate::cli::LogFormat::default(),
1356            config: None,
1357            progress: false,
1358            copy: false,
1359            enhanced_context: false,
1360            trace_imports: false,
1361            include_callers: false,
1362            include_types: false,
1363            semantic_depth: 3,
1364            custom_priorities: vec![],
1365            config_token_limits: None,
1366            config_defaults_max_tokens: None,
1367        };
1368
1369        let options = WalkOptions::from_config(&config).unwrap();
1370        assert_eq!(options.include_patterns, Vec::<String>::new());
1371    }
1372
1373    #[test]
1374    fn test_walk_options_filters_empty_patterns() {
1375        // Test that empty/whitespace patterns are filtered out
1376        let config = crate::cli::Config {
1377            prompt: None,
1378            paths: None,
1379            include: Some(vec![
1380                "**/*.rs".to_string(),
1381                "".to_string(),
1382                "   ".to_string(),
1383                "*.py".to_string(),
1384            ]),
1385            ignore: None,
1386            remote: None,
1387            read_stdin: false,
1388            output_file: None,
1389            max_tokens: None,
1390            llm_tool: crate::cli::LlmTool::default(),
1391            quiet: false,
1392            verbose: 0,
1393            log_format: crate::cli::LogFormat::default(),
1394            config: None,
1395            progress: false,
1396            copy: false,
1397            enhanced_context: false,
1398            trace_imports: false,
1399            include_callers: false,
1400            include_types: false,
1401            semantic_depth: 3,
1402            custom_priorities: vec![],
1403            config_token_limits: None,
1404            config_defaults_max_tokens: None,
1405        };
1406
1407        let options = WalkOptions::from_config(&config).unwrap();
1408
1409        // Should filter out empty and whitespace-only patterns
1410        assert_eq!(options.include_patterns, vec!["**/*.rs", "*.py"]);
1411    }
1412
1413    // === PATTERN SANITIZATION TESTS ===
1414
1415    #[test]
1416    fn test_sanitize_pattern_valid_patterns() {
1417        // Test valid patterns that should pass sanitization
1418        let valid_patterns = vec![
1419            "*.py",
1420            "**/*.rs",
1421            "src/**/*.{js,ts}",
1422            "test[0-9].py",
1423            "**/*{model,service}*.py",
1424            "**/db/**",
1425            "some-file.txt",
1426            "dir/subdir/*.md",
1427        ];
1428
1429        for pattern in valid_patterns {
1430            let result = sanitize_pattern(pattern);
1431            assert!(result.is_ok(), "Pattern '{pattern}' should be valid");
1432            assert_eq!(result.unwrap(), pattern);
1433        }
1434    }
1435
1436    #[test]
1437    fn test_sanitize_pattern_length_limit() {
1438        // Test pattern length limit (1000 characters)
1439        let short_pattern = "a".repeat(999);
1440        let exact_limit = "a".repeat(1000);
1441        let too_long = "a".repeat(1001);
1442
1443        assert!(sanitize_pattern(&short_pattern).is_ok());
1444        assert!(sanitize_pattern(&exact_limit).is_ok());
1445
1446        let result = sanitize_pattern(&too_long);
1447        assert!(result.is_err());
1448        assert!(result.unwrap_err().to_string().contains("Pattern too long"));
1449    }
1450
1451    #[test]
1452    fn test_sanitize_pattern_null_bytes() {
1453        // Test patterns with null bytes
1454        let patterns_with_nulls = vec!["test\0.py", "\0*.rs", "**/*.js\0", "dir/\0file.txt"];
1455
1456        for pattern in patterns_with_nulls {
1457            let result = sanitize_pattern(pattern);
1458            assert!(
1459                result.is_err(),
1460                "Pattern with null byte should be rejected: {pattern:?}"
1461            );
1462            assert!(result
1463                .unwrap_err()
1464                .to_string()
1465                .contains("invalid characters"));
1466        }
1467    }
1468
1469    #[test]
1470    fn test_sanitize_pattern_control_characters() {
1471        // Test patterns with control characters
1472        let control_chars = vec![
1473            "test\x01.py",  // Start of heading
1474            "file\x08.txt", // Backspace
1475            "dir\x0c/*.rs", // Form feed
1476            "test\x1f.md",  // Unit separator
1477            "*.py\x7f",     // Delete
1478        ];
1479
1480        for pattern in control_chars {
1481            let result = sanitize_pattern(pattern);
1482            assert!(
1483                result.is_err(),
1484                "Pattern with control char should be rejected: {pattern:?}"
1485            );
1486            assert!(result
1487                .unwrap_err()
1488                .to_string()
1489                .contains("invalid characters"));
1490        }
1491    }
1492
1493    #[test]
1494    fn test_sanitize_pattern_absolute_paths() {
1495        // Test absolute paths that should be rejected
1496        let absolute_paths = vec![
1497            "/etc/passwd",
1498            "/usr/bin/*.sh",
1499            "/home/user/file.txt",
1500            "\\Windows\\System32\\*.dll", // Windows absolute path
1501            "\\Program Files\\*",
1502        ];
1503
1504        for pattern in absolute_paths {
1505            let result = sanitize_pattern(pattern);
1506            assert!(
1507                result.is_err(),
1508                "Absolute path should be rejected: {pattern}"
1509            );
1510            assert!(result
1511                .unwrap_err()
1512                .to_string()
1513                .contains("Absolute paths not allowed"));
1514        }
1515    }
1516
1517    #[test]
1518    fn test_sanitize_pattern_directory_traversal() {
1519        // Test directory traversal patterns
1520        let traversal_patterns = vec![
1521            "../../../etc/passwd",
1522            "dir/../../../file.txt",
1523            "**/../secret/*",
1524            "test/../../*.py",
1525            "../config.toml",
1526            "subdir/../../other.rs",
1527        ];
1528
1529        for pattern in traversal_patterns {
1530            let result = sanitize_pattern(pattern);
1531            assert!(
1532                result.is_err(),
1533                "Directory traversal should be rejected: {pattern}"
1534            );
1535            assert!(result
1536                .unwrap_err()
1537                .to_string()
1538                .contains("Directory traversal"));
1539        }
1540    }
1541
1542    #[test]
1543    fn test_sanitize_pattern_edge_cases() {
1544        // Test edge cases that might reveal bugs
1545
1546        // Empty string
1547        let result = sanitize_pattern("");
1548        assert!(result.is_ok(), "Empty string should be allowed");
1549
1550        // Only whitespace
1551        let result = sanitize_pattern("   ");
1552        assert!(result.is_ok(), "Whitespace-only should be allowed");
1553
1554        // Unicode characters
1555        let result = sanitize_pattern("файл*.txt");
1556        assert!(result.is_ok(), "Unicode should be allowed");
1557
1558        // Special glob characters
1559        let result = sanitize_pattern("file[!abc]*.{py,rs}");
1560        assert!(result.is_ok(), "Complex glob patterns should be allowed");
1561
1562        // Newlines and tabs (these are control characters)
1563        let result = sanitize_pattern("file\nname.txt");
1564        assert!(result.is_err(), "Newlines should be rejected");
1565
1566        let result = sanitize_pattern("file\tname.txt");
1567        assert!(result.is_err(), "Tabs should be rejected");
1568    }
1569
1570    #[test]
1571    fn test_sanitize_pattern_boundary_conditions() {
1572        // Test patterns that are at the boundary of what should be allowed
1573
1574        // Pattern with exactly ".." but not as traversal
1575        let result = sanitize_pattern("file..name.txt");
1576        assert!(result.is_err(), "Any '..' should be rejected for safety");
1577
1578        // Pattern starting with legitimate glob
1579        let result = sanitize_pattern("**/*.py");
1580        assert!(result.is_ok(), "Recursive glob should be allowed");
1581
1582        // Mixed valid/invalid (should reject entire pattern)
1583        let result = sanitize_pattern("valid/*.py/../invalid");
1584        assert!(result.is_err(), "Mixed pattern should be rejected");
1585    }
1586
1587    #[test]
1588    fn test_sanitize_pattern_security_bypass_attempts() {
1589        // Test patterns that might try to bypass security checks
1590
1591        // URL-encoded null byte
1592        let result = sanitize_pattern("file%00.txt");
1593        assert!(result.is_ok(), "URL encoding should not be decoded");
1594
1595        // Double-encoded traversal
1596        let result = sanitize_pattern("file%2e%2e/secret");
1597        assert!(result.is_ok(), "Double encoding should not be decoded");
1598
1599        // Unicode normalization attacks
1600        let result = sanitize_pattern("file\u{002e}\u{002e}/secret");
1601        assert!(result.is_err(), "Unicode dots should be treated as '..'");
1602
1603        // Null byte at end
1604        let result = sanitize_pattern("legitimate-pattern\0");
1605        assert!(result.is_err(), "Trailing null should be caught");
1606    }
1607
1608    // === ERROR HANDLING TESTS ===
1609
1610    #[test]
1611    fn test_error_handling_classification() {
1612        // Test that we correctly classify errors as critical vs non-critical
1613        use crate::utils::error::ContextCreatorError;
1614
1615        // Simulate critical errors
1616        let critical_errors = [
1617            ContextCreatorError::FileProcessingError {
1618                path: "test.txt".to_string(),
1619                error: "Permission denied".to_string(),
1620            },
1621            ContextCreatorError::InvalidConfiguration("Invalid pattern".to_string()),
1622        ];
1623
1624        // Check that permission denied is considered critical
1625        let error_string = critical_errors[0].to_string();
1626        assert!(error_string.contains("Permission denied"));
1627
1628        // Check that invalid configuration is considered critical
1629        let error_string = critical_errors[1].to_string();
1630        assert!(error_string.contains("Invalid"));
1631    }
1632
1633    #[test]
1634    fn test_pattern_sanitization_integration() {
1635        // Test that sanitization is actually called in the build_walker path
1636        use tempfile::TempDir;
1637
1638        let temp_dir = TempDir::new().unwrap();
1639        let root = temp_dir.path();
1640
1641        // Create WalkOptions with a pattern that should be sanitized
1642        let options = WalkOptions {
1643            max_file_size: Some(1024),
1644            follow_links: false,
1645            include_hidden: false,
1646            parallel: false,
1647            ignore_file: ".context-creator-ignore".to_string(),
1648            ignore_patterns: vec![],
1649            include_patterns: vec!["../../../etc/passwd".to_string()], // Should be rejected
1650            custom_priorities: vec![],
1651            filter_binary_files: false,
1652        };
1653
1654        // This should fail due to sanitization
1655        let result = build_walker(root, &options);
1656        assert!(
1657            result.is_err(),
1658            "Directory traversal pattern should be rejected by sanitization"
1659        );
1660
1661        if let Err(e) = result {
1662            let error_msg = e.to_string();
1663            assert!(error_msg.contains("Directory traversal") || error_msg.contains("Invalid"));
1664        }
1665    }
1666
1667    #[test]
1668    fn test_walk_options_filters_binary_files_with_prompt() {
1669        use crate::cli::Config;
1670
1671        let config = Config {
1672            prompt: Some("test prompt".to_string()),
1673            paths: Some(vec![PathBuf::from(".")]),
1674            include: None,
1675            ignore: None,
1676            remote: None,
1677            read_stdin: false,
1678            output_file: None,
1679            max_tokens: None,
1680            llm_tool: crate::cli::LlmTool::Gemini,
1681            quiet: false,
1682            verbose: 0,
1683            log_format: crate::cli::LogFormat::default(),
1684            config: None,
1685            progress: false,
1686            copy: false,
1687            enhanced_context: false,
1688            trace_imports: false,
1689            include_callers: false,
1690            include_types: false,
1691            semantic_depth: 3,
1692            custom_priorities: vec![],
1693            config_token_limits: None,
1694            config_defaults_max_tokens: None,
1695        };
1696
1697        let options = WalkOptions::from_config(&config).unwrap();
1698        assert!(options.filter_binary_files);
1699    }
1700
1701    #[test]
1702    fn test_walk_options_no_binary_filter_without_prompt() {
1703        use crate::cli::Config;
1704
1705        let config = Config {
1706            prompt: None,
1707            paths: Some(vec![PathBuf::from(".")]),
1708            include: None,
1709            ignore: None,
1710            remote: None,
1711            read_stdin: false,
1712            output_file: None,
1713            max_tokens: None,
1714            llm_tool: crate::cli::LlmTool::Gemini,
1715            quiet: false,
1716            verbose: 0,
1717            log_format: crate::cli::LogFormat::default(),
1718            config: None,
1719            progress: false,
1720            copy: false,
1721            enhanced_context: false,
1722            trace_imports: false,
1723            include_callers: false,
1724            include_types: false,
1725            semantic_depth: 3,
1726            custom_priorities: vec![],
1727            config_token_limits: None,
1728            config_defaults_max_tokens: None,
1729        };
1730
1731        let options = WalkOptions::from_config(&config).unwrap();
1732        assert!(!options.filter_binary_files);
1733    }
1734}
context_creator/core/walker.rs

context_creator/core/
walker.rs