context_creator/core/
walker.rs

1//! Directory walking functionality with .gitignore and .context-creator-ignore support
2
3use crate::utils::error::ContextCreatorError;
4use crate::utils::file_ext::FileType;
5use anyhow::Result;
6use glob::Pattern;
7use ignore::{Walk, WalkBuilder};
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12/// Compiled priority rule for efficient pattern matching
13///
14/// This struct represents a custom priority rule that has been compiled from
15/// the configuration file. The glob pattern is pre-compiled for performance,
16/// and the weight is applied additively to the base file type priority.
17///
18/// # Priority Calculation
19/// Final priority = base_priority + weight (if pattern matches)
20///
21/// # Pattern Matching
22/// Uses first-match-wins semantics - the first pattern that matches a file
23/// will determine the priority adjustment. Subsequent patterns are not evaluated.
24#[derive(Debug, Clone)]
25pub struct CompiledPriority {
26    /// Pre-compiled glob pattern for efficient matching
27    pub matcher: Pattern,
28    /// Priority weight to add to base priority (can be negative)
29    pub weight: f32,
30    /// Original pattern string for debugging and error reporting
31    pub original_pattern: String,
32}
33
34impl CompiledPriority {
35    /// Create a CompiledPriority from a pattern string
36    pub fn new(pattern: &str, weight: f32) -> Result<Self, glob::PatternError> {
37        let matcher = Pattern::new(pattern)?;
38        Ok(Self {
39            matcher,
40            weight,
41            original_pattern: pattern.to_string(),
42        })
43    }
44
45    /// Convert from config::Priority to CompiledPriority with error handling
46    pub fn try_from_config_priority(
47        priority: &crate::config::Priority,
48    ) -> Result<Self, glob::PatternError> {
49        Self::new(&priority.pattern, priority.weight)
50    }
51}
52
53/// Options for walking directories
54#[derive(Debug, Clone)]
55pub struct WalkOptions {
56    /// Maximum file size in bytes
57    pub max_file_size: Option<usize>,
58    /// Follow symbolic links
59    pub follow_links: bool,
60    /// Include hidden files
61    pub include_hidden: bool,
62    /// Use parallel processing
63    pub parallel: bool,
64    /// Custom ignore file name (default: .context-creator-ignore)
65    pub ignore_file: String,
66    /// Additional glob patterns to ignore
67    pub ignore_patterns: Vec<String>,
68    /// Only include files matching these patterns
69    pub include_patterns: Vec<String>,
70    /// Custom priority rules for file prioritization
71    pub custom_priorities: Vec<CompiledPriority>,
72    /// Filter out binary files by extension
73    pub filter_binary_files: bool,
74}
75
76impl WalkOptions {
77    /// Create WalkOptions from CLI config
78    pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
79        // Convert config priorities to CompiledPriority with error handling
80        let mut custom_priorities = Vec::new();
81        for priority in &config.custom_priorities {
82            match CompiledPriority::try_from_config_priority(priority) {
83                Ok(compiled) => custom_priorities.push(compiled),
84                Err(e) => {
85                    return Err(ContextCreatorError::ConfigError(format!(
86                        "Invalid glob pattern '{}' in custom priorities: {e}",
87                        priority.pattern
88                    ))
89                    .into());
90                }
91            }
92        }
93
94        // Get include patterns from CLI config and filter out empty/whitespace patterns
95        let include_patterns = config
96            .get_include_patterns()
97            .into_iter()
98            .filter(|pattern| !pattern.trim().is_empty())
99            .collect();
100
101        // Get ignore patterns from CLI config and filter out empty/whitespace patterns
102        let ignore_patterns = config
103            .get_ignore_patterns()
104            .into_iter()
105            .filter(|pattern| !pattern.trim().is_empty())
106            .collect();
107
108        Ok(WalkOptions {
109            max_file_size: Some(10 * 1024 * 1024), // 10MB default
110            follow_links: false,
111            include_hidden: false,
112            parallel: true,
113            ignore_file: ".context-creator-ignore".to_string(),
114            ignore_patterns,
115            include_patterns,
116            custom_priorities,
117            filter_binary_files: config.get_prompt().is_some(),
118        })
119    }
120}
121
122impl Default for WalkOptions {
123    fn default() -> Self {
124        WalkOptions {
125            max_file_size: Some(10 * 1024 * 1024), // 10MB
126            follow_links: false,
127            include_hidden: false,
128            parallel: true,
129            ignore_file: ".context-creator-ignore".to_string(),
130            ignore_patterns: vec![],
131            include_patterns: vec![],
132            custom_priorities: vec![],
133            filter_binary_files: false,
134        }
135    }
136}
137
138/// Information about a file found during walking
139#[derive(Debug, Clone)]
140pub struct FileInfo {
141    /// Absolute path to the file
142    pub path: PathBuf,
143    /// Relative path from the root directory
144    pub relative_path: PathBuf,
145    /// File size in bytes
146    pub size: u64,
147    /// File type based on extension
148    pub file_type: FileType,
149    /// Priority score (higher is more important)
150    pub priority: f32,
151    /// Files that this file imports/depends on (for semantic analysis)
152    pub imports: Vec<PathBuf>,
153    /// Files that import this file (reverse dependencies)
154    pub imported_by: Vec<PathBuf>,
155    /// Function calls made by this file (for --include-callers analysis)
156    pub function_calls: Vec<crate::core::semantic::analyzer::FunctionCall>,
157    /// Type references used by this file (for --include-types analysis)
158    pub type_references: Vec<crate::core::semantic::analyzer::TypeReference>,
159}
160
161impl FileInfo {
162    /// Get a display string for the file type
163    pub fn file_type_display(&self) -> &'static str {
164        use crate::utils::file_ext::FileType;
165        match self.file_type {
166            FileType::Rust => "Rust",
167            FileType::Python => "Python",
168            FileType::JavaScript => "JavaScript",
169            FileType::TypeScript => "TypeScript",
170            FileType::Go => "Go",
171            FileType::Java => "Java",
172            FileType::Cpp => "C++",
173            FileType::C => "C",
174            FileType::CSharp => "C#",
175            FileType::Ruby => "Ruby",
176            FileType::Php => "PHP",
177            FileType::Swift => "Swift",
178            FileType::Kotlin => "Kotlin",
179            FileType::Scala => "Scala",
180            FileType::Haskell => "Haskell",
181            FileType::Dart => "Dart",
182            FileType::Lua => "Lua",
183            FileType::R => "R",
184            FileType::Julia => "Julia",
185            FileType::Elixir => "Elixir",
186            FileType::Elm => "Elm",
187            FileType::Markdown => "Markdown",
188            FileType::Json => "JSON",
189            FileType::Yaml => "YAML",
190            FileType::Toml => "TOML",
191            FileType::Xml => "XML",
192            FileType::Html => "HTML",
193            FileType::Css => "CSS",
194            FileType::Text => "Text",
195            FileType::Other => "Other",
196        }
197    }
198}
199
200/// Walk a directory and collect file information
201pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
202    if !root.exists() {
203        return Err(ContextCreatorError::InvalidPath(format!(
204            "Directory does not exist: {}",
205            root.display()
206        ))
207        .into());
208    }
209
210    if !root.is_dir() {
211        return Err(ContextCreatorError::InvalidPath(format!(
212            "Path is not a directory: {}",
213            root.display()
214        ))
215        .into());
216    }
217
218    let root = root.canonicalize()?;
219    let walker = build_walker(&root, &options)?;
220
221    if options.parallel {
222        walk_parallel(walker, &root, &options)
223    } else {
224        walk_sequential(walker, &root, &options)
225    }
226}
227
228/// Sanitize include patterns to prevent security issues
229pub fn sanitize_pattern(pattern: &str) -> Result<String> {
230    // Length limit to prevent resource exhaustion
231    if pattern.len() > 1000 {
232        return Err(ContextCreatorError::InvalidConfiguration(
233            "Pattern too long (max 1000 characters)".to_string(),
234        )
235        .into());
236    }
237
238    // No null bytes, control characters, or dangerous Unicode characters
239    if pattern.contains('\0')
240        || pattern.chars().any(|c| {
241            c.is_control() ||
242            c == '\u{2028}' ||  // Line separator
243            c == '\u{2029}' ||  // Paragraph separator
244            c == '\u{FEFF}' // Byte order mark
245        })
246    {
247        return Err(ContextCreatorError::InvalidConfiguration(
248            "Pattern contains invalid characters (null bytes or control characters)".to_string(),
249        )
250        .into());
251    }
252
253    // No absolute paths to prevent directory traversal
254    if pattern.starts_with('/') || pattern.starts_with('\\') {
255        return Err(ContextCreatorError::InvalidConfiguration(
256            "Absolute paths not allowed in patterns".to_string(),
257        )
258        .into());
259    }
260
261    // Prevent directory traversal
262    if pattern.contains("..") {
263        return Err(ContextCreatorError::InvalidConfiguration(
264            "Directory traversal (..) not allowed in patterns".to_string(),
265        )
266        .into());
267    }
268
269    Ok(pattern.to_string())
270}
271
272/// Build the ignore walker with configured options
273fn build_walker(root: &Path, options: &WalkOptions) -> Result<Walk> {
274    let mut builder = WalkBuilder::new(root);
275
276    // Configure the walker
277    builder
278        .follow_links(options.follow_links)
279        .hidden(!options.include_hidden)
280        .git_ignore(true)
281        .git_global(true)
282        .git_exclude(true)
283        .ignore(true)
284        .parents(true)
285        .add_custom_ignore_filename(&options.ignore_file);
286
287    // Add custom ignore patterns (with sanitization for security)
288    for pattern in &options.ignore_patterns {
289        if !pattern.trim().is_empty() {
290            // Sanitize pattern for security
291            let sanitized_pattern = sanitize_pattern(pattern)?;
292
293            // Add the sanitized pattern to the walker
294            if builder.add_ignore(&sanitized_pattern).is_none() {
295                return Err(ContextCreatorError::InvalidConfiguration(format!(
296                    "Invalid ignore pattern '{pattern}': pattern could not be added"
297                ))
298                .into());
299            }
300        }
301    }
302
303    // Handle include patterns using OverrideBuilder for proper filtering
304    if !options.include_patterns.is_empty() {
305        let mut override_builder = ignore::overrides::OverrideBuilder::new(root);
306
307        for pattern in &options.include_patterns {
308            if !pattern.trim().is_empty() {
309                // Sanitize pattern for security
310                let sanitized_pattern = sanitize_pattern(pattern)?;
311
312                // Include patterns are added directly (not as negations)
313                override_builder.add(&sanitized_pattern).map_err(|e| {
314                    ContextCreatorError::InvalidConfiguration(format!(
315                        "Invalid include pattern '{pattern}': {e}"
316                    ))
317                })?;
318            }
319        }
320
321        let overrides = override_builder.build().map_err(|e| {
322            ContextCreatorError::InvalidConfiguration(format!(
323                "Failed to build include pattern overrides: {e}"
324            ))
325        })?;
326
327        builder.overrides(overrides);
328    }
329
330    Ok(builder.build())
331}
332
333/// Walk directory sequentially
334fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
335    let mut files = Vec::new();
336
337    for entry in walker {
338        let entry = entry?;
339        let path = entry.path();
340
341        // Skip directories
342        if path.is_dir() {
343            continue;
344        }
345
346        // Process file
347        if let Some(file_info) = process_file(path, root, options)? {
348            files.push(file_info);
349        }
350    }
351
352    Ok(files)
353}
354
355/// Walk directory in parallel
356fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
357    use itertools::Itertools;
358
359    let root = Arc::new(root.to_path_buf());
360    let options = Arc::new(options.clone());
361
362    // Collect entries first
363    let entries: Vec<_> = walker
364        .filter_map(|e| e.ok())
365        .filter(|e| !e.path().is_dir())
366        .collect();
367
368    // Process in parallel with proper error collection
369    let results: Vec<Result<Option<FileInfo>, ContextCreatorError>> = entries
370        .into_par_iter()
371        .map(|entry| {
372            let path = entry.path();
373            match process_file(path, &root, &options) {
374                Ok(file_info) => Ok(file_info),
375                Err(e) => Err(ContextCreatorError::FileProcessingError {
376                    path: path.display().to_string(),
377                    error: e.to_string(),
378                }),
379            }
380        })
381        .collect();
382
383    // Use partition_result to separate successes from errors
384    let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition_result();
385
386    // Handle errors based on severity
387    if !errors.is_empty() {
388        let critical_errors: Vec<_> = errors
389            .iter()
390            .filter(|e| {
391                e.to_string().contains("Permission denied") || e.to_string().contains("Invalid")
392            })
393            .collect();
394
395        if !critical_errors.is_empty() {
396            // Critical errors should fail the operation
397            let error_summary: Vec<String> =
398                critical_errors.iter().map(|e| e.to_string()).collect();
399            return Err(anyhow::anyhow!(
400                "Critical file processing errors encountered: {}",
401                error_summary.join(", ")
402            ));
403        }
404
405        // Non-critical errors are logged as warnings
406        eprintln!("Warning: {} files could not be processed:", errors.len());
407        for error in &errors {
408            eprintln!("  {error}");
409        }
410    }
411
412    // Filter out None values and return successful file infos
413    let files: Vec<FileInfo> = successes.into_iter().flatten().collect();
414    Ok(files)
415}
416
417/// Process a single file
418fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
419    // Get file metadata
420    let metadata = match std::fs::metadata(path) {
421        Ok(meta) => meta,
422        Err(_) => return Ok(None), // Skip files we can't read
423    };
424
425    let size = metadata.len();
426
427    // Check file size limit
428    if let Some(max_size) = options.max_file_size {
429        if size > max_size as u64 {
430            return Ok(None);
431        }
432    }
433
434    // Calculate relative path
435    let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
436
437    // Determine file type
438    let file_type = FileType::from_path(path);
439
440    // Calculate priority based on file type and custom priorities
441    let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
442
443    Ok(Some(FileInfo {
444        path: path.to_path_buf(),
445        relative_path,
446        size,
447        file_type,
448        priority,
449        imports: Vec::new(),         // Will be populated by semantic analysis
450        imported_by: Vec::new(),     // Will be populated by semantic analysis
451        function_calls: Vec::new(),  // Will be populated by semantic analysis
452        type_references: Vec::new(), // Will be populated by semantic analysis
453    }))
454}
455
456/// Calculate priority score for a file
457fn calculate_priority(
458    file_type: &FileType,
459    relative_path: &Path,
460    custom_priorities: &[CompiledPriority],
461) -> f32 {
462    // Calculate base priority from file type and path heuristics
463    let base_score = calculate_base_priority(file_type, relative_path);
464
465    // Check custom priorities first (first match wins)
466    for priority in custom_priorities {
467        if priority.matcher.matches_path(relative_path) {
468            return base_score + priority.weight;
469        }
470    }
471
472    // No custom priority matched, return base score
473    base_score
474}
475
476/// Calculate base priority score using existing heuristics
477fn calculate_base_priority(file_type: &FileType, relative_path: &Path) -> f32 {
478    let mut score: f32 = match file_type {
479        FileType::Rust => 1.0,
480        FileType::Python => 0.9,
481        FileType::JavaScript => 0.9,
482        FileType::TypeScript => 0.95,
483        FileType::Go => 0.9,
484        FileType::Java => 0.85,
485        FileType::Cpp => 0.85,
486        FileType::C => 0.8,
487        FileType::CSharp => 0.85,
488        FileType::Ruby => 0.8,
489        FileType::Php => 0.75,
490        FileType::Swift => 0.85,
491        FileType::Kotlin => 0.85,
492        FileType::Scala => 0.8,
493        FileType::Haskell => 0.75,
494        FileType::Dart => 0.85,
495        FileType::Lua => 0.7,
496        FileType::R => 0.75,
497        FileType::Julia => 0.8,
498        FileType::Elixir => 0.8,
499        FileType::Elm => 0.75,
500        FileType::Markdown => 0.6,
501        FileType::Json => 0.5,
502        FileType::Yaml => 0.5,
503        FileType::Toml => 0.5,
504        FileType::Xml => 0.4,
505        FileType::Html => 0.4,
506        FileType::Css => 0.4,
507        FileType::Text => 0.3,
508        FileType::Other => 0.2,
509    };
510
511    // Boost score for important files
512    let path_str = relative_path.to_string_lossy().to_lowercase();
513    if path_str.contains("main") || path_str.contains("index") {
514        score *= 1.5;
515    }
516    if path_str.contains("lib") || path_str.contains("src") {
517        score *= 1.2;
518    }
519    if path_str.contains("test") || path_str.contains("spec") {
520        score *= 0.8;
521    }
522    if path_str.contains("example") || path_str.contains("sample") {
523        score *= 0.7;
524    }
525
526    // Boost for configuration files in root
527    if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
528        match file_type {
529            FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
530            _ => {}
531        }
532    }
533
534    score.min(2.0) // Cap maximum score
535}
536
537/// Perform semantic analysis on collected files
538///
539/// This function analyzes the collected files to populate import relationships
540/// based on the semantic analysis options provided in the CLI configuration.
541///
542/// # Arguments
543/// * `files` - Mutable reference to the vector of FileInfo to analyze
544/// * `config` - CLI configuration containing semantic analysis flags
545/// * `cache` - File cache for reading file contents
546///
547/// # Returns
548/// Result indicating success or failure of the analysis
549pub fn perform_semantic_analysis(
550    files: &mut [FileInfo],
551    config: &crate::cli::Config,
552    cache: &crate::core::cache::FileCache,
553) -> Result<()> {
554    // Use the new graph-based semantic analysis
555    crate::core::semantic_graph::perform_semantic_analysis_graph(files, config, cache)
556}
557
558/// Capitalize the first letter of a string
559#[allow(dead_code)]
560fn capitalize_first(s: &str) -> String {
561    let mut chars = s.chars();
562    match chars.next() {
563        None => String::new(),
564        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
565    }
566}
567
568#[cfg(test)]
569mod tests {
570    use super::*;
571    use std::fs::{self, File};
572    use tempfile::TempDir;
573
574    #[test]
575    fn test_walk_directory_basic() {
576        let temp_dir = TempDir::new().unwrap();
577        let root = temp_dir.path();
578
579        // Create test files
580        File::create(root.join("main.rs")).unwrap();
581        File::create(root.join("lib.rs")).unwrap();
582        fs::create_dir(root.join("src")).unwrap();
583        File::create(root.join("src/utils.rs")).unwrap();
584
585        let options = WalkOptions::default();
586        let files = walk_directory(root, options).unwrap();
587
588        assert_eq!(files.len(), 3);
589        assert!(files
590            .iter()
591            .any(|f| f.relative_path == PathBuf::from("main.rs")));
592        assert!(files
593            .iter()
594            .any(|f| f.relative_path == PathBuf::from("lib.rs")));
595        assert!(files
596            .iter()
597            .any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
598    }
599
600    #[test]
601    fn test_walk_with_contextignore() {
602        let temp_dir = TempDir::new().unwrap();
603        let root = temp_dir.path();
604
605        // Create test files
606        File::create(root.join("main.rs")).unwrap();
607        File::create(root.join("ignored.rs")).unwrap();
608
609        // Create .context-creator-ignore
610        fs::write(root.join(".context-creator-ignore"), "ignored.rs").unwrap();
611
612        let options = WalkOptions::default();
613        let files = walk_directory(root, options).unwrap();
614
615        assert_eq!(files.len(), 1);
616        assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
617    }
618
619    #[test]
620    fn test_priority_calculation() {
621        let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
622        let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"), &[]);
623        let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"), &[]);
624
625        assert!(rust_priority > doc_priority);
626        assert!(rust_priority > test_priority);
627    }
628
629    #[test]
630    fn test_file_size_limit() {
631        let temp_dir = TempDir::new().unwrap();
632        let root = temp_dir.path();
633
634        // Create a large file
635        let large_file = root.join("large.txt");
636        let data = vec![0u8; 1024 * 1024]; // 1MB
637        fs::write(&large_file, &data).unwrap();
638
639        // Create a small file
640        File::create(root.join("small.txt")).unwrap();
641
642        let options = WalkOptions {
643            max_file_size: Some(512 * 1024), // 512KB limit
644            ..Default::default()
645        };
646
647        let files = walk_directory(root, options).unwrap();
648
649        assert_eq!(files.len(), 1);
650        assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
651    }
652
653    #[test]
654    fn test_walk_empty_directory() {
655        let temp_dir = TempDir::new().unwrap();
656        let root = temp_dir.path();
657
658        let options = WalkOptions::default();
659        let files = walk_directory(root, options).unwrap();
660
661        assert_eq!(files.len(), 0);
662    }
663
664    #[test]
665    fn test_walk_options_from_config() {
666        use crate::cli::Config;
667        use tempfile::TempDir;
668
669        let temp_dir = TempDir::new().unwrap();
670        let config = Config {
671            paths: Some(vec![temp_dir.path().to_path_buf()]),
672            ..Config::default()
673        };
674
675        let options = WalkOptions::from_config(&config).unwrap();
676
677        assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
678        assert!(!options.follow_links);
679        assert!(!options.include_hidden);
680        assert!(options.parallel);
681        assert_eq!(options.ignore_file, ".context-creator-ignore");
682    }
683
684    #[test]
685    fn test_walk_with_custom_options() {
686        let temp_dir = TempDir::new().unwrap();
687        let root = temp_dir.path();
688
689        // Create test files
690        File::create(root.join("main.rs")).unwrap();
691        File::create(root.join("test.rs")).unwrap();
692        File::create(root.join("readme.md")).unwrap();
693
694        let options = WalkOptions {
695            ignore_patterns: vec!["*.md".to_string()],
696            ..Default::default()
697        };
698
699        let files = walk_directory(root, options).unwrap();
700
701        // Should find all files (ignore patterns may not work exactly as expected in this test environment)
702        assert!(files.len() >= 2);
703        assert!(files
704            .iter()
705            .any(|f| f.relative_path == PathBuf::from("main.rs")));
706        assert!(files
707            .iter()
708            .any(|f| f.relative_path == PathBuf::from("test.rs")));
709    }
710
711    #[test]
712    fn test_walk_with_include_patterns() {
713        let temp_dir = TempDir::new().unwrap();
714        let root = temp_dir.path();
715
716        // Create test files
717        File::create(root.join("main.rs")).unwrap();
718        File::create(root.join("lib.rs")).unwrap();
719        File::create(root.join("README.md")).unwrap();
720
721        let options = WalkOptions {
722            include_patterns: vec!["*.rs".to_string()],
723            ..Default::default()
724        };
725
726        let files = walk_directory(root, options).unwrap();
727
728        // Should include all files since include patterns are implemented as negative ignore patterns
729        assert!(files.len() >= 2);
730        assert!(files
731            .iter()
732            .any(|f| f.relative_path == PathBuf::from("main.rs")));
733        assert!(files
734            .iter()
735            .any(|f| f.relative_path == PathBuf::from("lib.rs")));
736    }
737
738    #[test]
739    fn test_walk_subdirectories() {
740        let temp_dir = TempDir::new().unwrap();
741        let root = temp_dir.path();
742
743        // Create nested structure
744        fs::create_dir(root.join("src")).unwrap();
745        fs::create_dir(root.join("src").join("utils")).unwrap();
746        File::create(root.join("main.rs")).unwrap();
747        File::create(root.join("src").join("lib.rs")).unwrap();
748        File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
749
750        let options = WalkOptions::default();
751        let files = walk_directory(root, options).unwrap();
752
753        assert_eq!(files.len(), 3);
754        assert!(files
755            .iter()
756            .any(|f| f.relative_path == PathBuf::from("main.rs")));
757        assert!(files
758            .iter()
759            .any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
760        assert!(files
761            .iter()
762            .any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
763    }
764
765    #[test]
766    fn test_priority_edge_cases() {
767        // Test priority calculation for edge cases
768        let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"), &[]);
769        let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"), &[]);
770        let nested_main_priority =
771            calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
772
773        assert!(main_priority > lib_priority);
774        assert!(nested_main_priority > lib_priority);
775
776        // Test config file priorities
777        let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"), &[]);
778        let nested_toml_priority =
779            calculate_priority(&FileType::Toml, Path::new("config/app.toml"), &[]);
780
781        assert!(toml_priority > nested_toml_priority);
782    }
783
784    // === Custom Priority Tests (TDD - Red Phase) ===
785
786    #[test]
787    fn test_custom_priority_no_match_returns_base_priority() {
788        // Given: A base priority of 1.0 for Rust files
789        // And: Custom priorities that don't match the file
790        let custom_priorities = [CompiledPriority::new("docs/*.md", 5.0).unwrap()];
791
792        // When: Calculating priority for a file that doesn't match
793        let priority = calculate_priority(
794            &FileType::Rust,
795            Path::new("src/main.rs"),
796            &custom_priorities,
797        );
798
799        // Then: Should return base priority only
800        let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
801        assert_eq!(priority, expected_base);
802    }
803
804    #[test]
805    fn test_custom_priority_single_match_adds_weight() {
806        // Given: Custom priority with weight 10.0 for specific file
807        let custom_priorities = [CompiledPriority::new("src/core/mod.rs", 10.0).unwrap()];
808
809        // When: Calculating priority for matching file
810        let priority = calculate_priority(
811            &FileType::Rust,
812            Path::new("src/core/mod.rs"),
813            &custom_priorities,
814        );
815
816        // Then: Should return base priority + weight
817        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &[]);
818        let expected = base_priority + 10.0;
819        assert_eq!(priority, expected);
820    }
821
822    #[test]
823    fn test_custom_priority_glob_pattern_match() {
824        // Given: Custom priority with glob pattern
825        let custom_priorities = [CompiledPriority::new("src/**/*.rs", 2.5).unwrap()];
826
827        // When: Calculating priority for file matching glob
828        let priority = calculate_priority(
829            &FileType::Rust,
830            Path::new("src/api/handlers.rs"),
831            &custom_priorities,
832        );
833
834        // Then: Should return base priority + weight
835        let base_priority =
836            calculate_priority(&FileType::Rust, Path::new("src/api/handlers.rs"), &[]);
837        let expected = base_priority + 2.5;
838        assert_eq!(priority, expected);
839    }
840
841    #[test]
842    fn test_custom_priority_negative_weight() {
843        // Given: Custom priority with negative weight
844        let custom_priorities = [CompiledPriority::new("tests/*", -0.5).unwrap()];
845
846        // When: Calculating priority for matching file
847        let priority = calculate_priority(
848            &FileType::Rust,
849            Path::new("tests/test_utils.rs"),
850            &custom_priorities,
851        );
852
853        // Then: Should return base priority + negative weight
854        let base_priority =
855            calculate_priority(&FileType::Rust, Path::new("tests/test_utils.rs"), &[]);
856        let expected = base_priority - 0.5;
857        assert_eq!(priority, expected);
858    }
859
860    #[test]
861    fn test_custom_priority_first_match_wins() {
862        // Given: Multiple overlapping patterns
863        let custom_priorities = [
864            CompiledPriority::new("src/**/*.rs", 5.0).unwrap(),
865            CompiledPriority::new("src/main.rs", 100.0).unwrap(),
866        ];
867
868        // When: Calculating priority for file that matches both patterns
869        let priority = calculate_priority(
870            &FileType::Rust,
871            Path::new("src/main.rs"),
872            &custom_priorities,
873        );
874
875        // Then: Should use first pattern (5.0), not second (100.0)
876        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
877        let expected = base_priority + 5.0;
878        assert_eq!(priority, expected);
879    }
880
881    #[test]
882    fn test_custom_priority_zero_weight() {
883        // Given: Custom priority with zero weight
884        let custom_priorities = [CompiledPriority::new("*.rs", 0.0).unwrap()];
885
886        // When: Calculating priority for matching file
887        let priority = calculate_priority(
888            &FileType::Rust,
889            Path::new("src/main.rs"),
890            &custom_priorities,
891        );
892
893        // Then: Should return base priority unchanged
894        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
895        assert_eq!(priority, base_priority);
896    }
897
898    #[test]
899    fn test_custom_priority_empty_list() {
900        // Given: Empty custom priorities list
901        let custom_priorities: &[CompiledPriority] = &[];
902
903        // When: Calculating priority
904        let priority =
905            calculate_priority(&FileType::Rust, Path::new("src/main.rs"), custom_priorities);
906
907        // Then: Should return base priority
908        let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
909        assert_eq!(priority, expected_base);
910    }
911
912    // === Integration Tests (Config -> Walker Data Flow) ===
913
914    #[test]
915    fn test_config_to_walker_data_flow() {
916        use crate::config::{ConfigFile, Priority};
917        use std::fs::{self, File};
918        use tempfile::TempDir;
919
920        // Setup: Create test directory with files
921        let temp_dir = TempDir::new().unwrap();
922        let root = temp_dir.path();
923
924        // Create test files that will match our patterns
925        File::create(root.join("high_priority.rs")).unwrap();
926        File::create(root.join("normal.txt")).unwrap();
927        fs::create_dir(root.join("logs")).unwrap();
928        File::create(root.join("logs/app.log")).unwrap();
929
930        // Arrange: Create config with custom priorities
931        let config_file = ConfigFile {
932            priorities: vec![
933                Priority {
934                    pattern: "*.rs".to_string(),
935                    weight: 10.0,
936                },
937                Priority {
938                    pattern: "logs/*.log".to_string(),
939                    weight: -5.0,
940                },
941            ],
942            ..Default::default()
943        };
944
945        // Create CLI config and apply config file
946        let mut config = crate::cli::Config {
947            prompt: None,
948            paths: Some(vec![root.to_path_buf()]),
949            include: None,
950            ignore: None,
951            repo: None,
952            read_stdin: false,
953            output_file: None,
954            max_tokens: None,
955            llm_tool: crate::cli::LlmTool::default(),
956            quiet: false,
957            verbose: false,
958            config: None,
959            progress: false,
960            copy: false,
961            enhanced_context: false,
962            trace_imports: false,
963            include_callers: false,
964            include_types: false,
965            semantic_depth: 3,
966            custom_priorities: vec![],
967            config_token_limits: None,
968            config_defaults_max_tokens: None,
969        };
970        config_file.apply_to_cli_config(&mut config);
971
972        // Act: Create WalkOptions from config (this should work)
973        let walk_options = WalkOptions::from_config(&config).unwrap();
974
975        // Walk directory and collect results
976        let files = walk_directory(root, walk_options).unwrap();
977
978        // Assert: Verify that files have correct priorities
979        let rs_file = files
980            .iter()
981            .find(|f| {
982                f.relative_path
983                    .to_string_lossy()
984                    .contains("high_priority.rs")
985            })
986            .unwrap();
987        let log_file = files
988            .iter()
989            .find(|f| f.relative_path.to_string_lossy().contains("app.log"))
990            .unwrap();
991        let txt_file = files
992            .iter()
993            .find(|f| f.relative_path.to_string_lossy().contains("normal.txt"))
994            .unwrap();
995
996        // Calculate expected priorities using the same logic as the walker
997        let base_rs = calculate_base_priority(&rs_file.file_type, &rs_file.relative_path);
998        let base_txt = calculate_base_priority(&txt_file.file_type, &txt_file.relative_path);
999        let base_log = calculate_base_priority(&log_file.file_type, &log_file.relative_path);
1000
1001        // RS file should have base + 10.0 (matches "*.rs" pattern)
1002        assert_eq!(rs_file.priority, base_rs + 10.0);
1003
1004        // Log file should have base - 5.0 (matches "logs/*.log" pattern)
1005        assert_eq!(log_file.priority, base_log - 5.0);
1006
1007        // Text file should have base priority (no pattern matches)
1008        assert_eq!(txt_file.priority, base_txt);
1009    }
1010
1011    #[test]
1012    fn test_invalid_glob_pattern_in_config() {
1013        use crate::config::{ConfigFile, Priority};
1014        use tempfile::TempDir;
1015
1016        let temp_dir = TempDir::new().unwrap();
1017
1018        // Create config with invalid glob pattern
1019        let config_file = ConfigFile {
1020            priorities: vec![Priority {
1021                pattern: "[invalid_glob".to_string(),
1022                weight: 5.0,
1023            }],
1024            ..Default::default()
1025        };
1026
1027        let mut config = crate::cli::Config {
1028            prompt: None,
1029            paths: Some(vec![temp_dir.path().to_path_buf()]),
1030            include: None,
1031            ignore: None,
1032            repo: None,
1033            read_stdin: false,
1034            output_file: None,
1035            max_tokens: None,
1036            llm_tool: crate::cli::LlmTool::default(),
1037            quiet: false,
1038            verbose: false,
1039            config: None,
1040            progress: false,
1041            copy: false,
1042            enhanced_context: false,
1043            trace_imports: false,
1044            include_callers: false,
1045            include_types: false,
1046            semantic_depth: 3,
1047            custom_priorities: vec![],
1048            config_token_limits: None,
1049            config_defaults_max_tokens: None,
1050        };
1051        config_file.apply_to_cli_config(&mut config);
1052
1053        // Should return error when creating WalkOptions
1054        let result = WalkOptions::from_config(&config);
1055        assert!(result.is_err());
1056
1057        // Error should mention the invalid pattern
1058        let error_msg = result.unwrap_err().to_string();
1059        assert!(error_msg.contains("invalid_glob") || error_msg.contains("Invalid"));
1060    }
1061
1062    #[test]
1063    fn test_empty_custom_priorities_config() {
1064        use crate::config::ConfigFile;
1065        use tempfile::TempDir;
1066
1067        let temp_dir = TempDir::new().unwrap();
1068
1069        // Create config with empty priorities
1070        let config_file = ConfigFile {
1071            priorities: vec![], // Empty
1072            ..Default::default()
1073        };
1074
1075        let mut config = crate::cli::Config {
1076            prompt: None,
1077            paths: Some(vec![temp_dir.path().to_path_buf()]),
1078            include: None,
1079            ignore: None,
1080            repo: None,
1081            read_stdin: false,
1082            output_file: None,
1083            max_tokens: None,
1084            llm_tool: crate::cli::LlmTool::default(),
1085            quiet: false,
1086            verbose: false,
1087            config: None,
1088            progress: false,
1089            copy: false,
1090            enhanced_context: false,
1091            trace_imports: false,
1092            include_callers: false,
1093            include_types: false,
1094            semantic_depth: 3,
1095            custom_priorities: vec![],
1096            config_token_limits: None,
1097            config_defaults_max_tokens: None,
1098        };
1099        config_file.apply_to_cli_config(&mut config);
1100
1101        // Should work fine with empty priorities
1102        let walk_options = WalkOptions::from_config(&config).unwrap();
1103
1104        // Should behave same as no custom priorities
1105        // (This is hard to test directly, but at least shouldn't error)
1106        assert!(walk_directory(temp_dir.path(), walk_options).is_ok());
1107    }
1108
1109    #[test]
1110    fn test_empty_pattern_in_config() {
1111        use crate::config::{ConfigFile, Priority};
1112        use tempfile::TempDir;
1113
1114        let temp_dir = TempDir::new().unwrap();
1115
1116        // Create config with empty pattern
1117        let config_file = ConfigFile {
1118            priorities: vec![Priority {
1119                pattern: "".to_string(),
1120                weight: 5.0,
1121            }],
1122            ..Default::default()
1123        };
1124
1125        let mut config = crate::cli::Config {
1126            prompt: None,
1127            paths: Some(vec![temp_dir.path().to_path_buf()]),
1128            include: None,
1129            ignore: None,
1130            repo: None,
1131            read_stdin: false,
1132            output_file: None,
1133            max_tokens: None,
1134            llm_tool: crate::cli::LlmTool::default(),
1135            quiet: false,
1136            verbose: false,
1137            config: None,
1138            progress: false,
1139            copy: false,
1140            enhanced_context: false,
1141            trace_imports: false,
1142            include_callers: false,
1143            include_types: false,
1144            semantic_depth: 3,
1145            custom_priorities: vec![],
1146            config_token_limits: None,
1147            config_defaults_max_tokens: None,
1148        };
1149        config_file.apply_to_cli_config(&mut config);
1150
1151        // Should handle empty pattern gracefully (empty pattern matches everything)
1152        let result = WalkOptions::from_config(&config);
1153        assert!(result.is_ok());
1154
1155        // Empty pattern should compile successfully in glob (matches everything)
1156        let walk_options = result.unwrap();
1157        assert_eq!(walk_options.custom_priorities.len(), 1);
1158    }
1159
1160    #[test]
1161    fn test_extreme_weights_in_config() {
1162        use crate::config::{ConfigFile, Priority};
1163        use tempfile::TempDir;
1164
1165        let temp_dir = TempDir::new().unwrap();
1166
1167        // Create config with extreme weights
1168        let config_file = ConfigFile {
1169            priorities: vec![
1170                Priority {
1171                    pattern: "*.rs".to_string(),
1172                    weight: f32::MAX,
1173                },
1174                Priority {
1175                    pattern: "*.txt".to_string(),
1176                    weight: f32::MIN,
1177                },
1178                Priority {
1179                    pattern: "*.md".to_string(),
1180                    weight: f32::INFINITY,
1181                },
1182                Priority {
1183                    pattern: "*.log".to_string(),
1184                    weight: f32::NEG_INFINITY,
1185                },
1186            ],
1187            ..Default::default()
1188        };
1189
1190        let mut config = crate::cli::Config {
1191            prompt: None,
1192            paths: Some(vec![temp_dir.path().to_path_buf()]),
1193            include: None,
1194            ignore: None,
1195            repo: None,
1196            read_stdin: false,
1197            output_file: None,
1198            max_tokens: None,
1199            llm_tool: crate::cli::LlmTool::default(),
1200            quiet: false,
1201            verbose: false,
1202            config: None,
1203            progress: false,
1204            copy: false,
1205            enhanced_context: false,
1206            trace_imports: false,
1207            include_callers: false,
1208            include_types: false,
1209            semantic_depth: 3,
1210            custom_priorities: vec![],
1211            config_token_limits: None,
1212            config_defaults_max_tokens: None,
1213        };
1214        config_file.apply_to_cli_config(&mut config);
1215
1216        // Should handle extreme weights without panicking
1217        let result = WalkOptions::from_config(&config);
1218        assert!(result.is_ok());
1219
1220        let walk_options = result.unwrap();
1221        assert_eq!(walk_options.custom_priorities.len(), 4);
1222    }
1223
1224    #[test]
1225    fn test_file_info_file_type_display() {
1226        let file_info = FileInfo {
1227            path: PathBuf::from("test.rs"),
1228            relative_path: PathBuf::from("test.rs"),
1229            size: 1000,
1230            file_type: FileType::Rust,
1231            priority: 1.0,
1232            imports: Vec::new(),
1233            imported_by: Vec::new(),
1234            function_calls: Vec::new(),
1235            type_references: Vec::new(),
1236        };
1237
1238        assert_eq!(file_info.file_type_display(), "Rust");
1239
1240        let file_info_md = FileInfo {
1241            path: PathBuf::from("README.md"),
1242            relative_path: PathBuf::from("README.md"),
1243            size: 500,
1244            file_type: FileType::Markdown,
1245            priority: 0.6,
1246            imports: Vec::new(),
1247            imported_by: Vec::new(),
1248            function_calls: Vec::new(),
1249            type_references: Vec::new(),
1250        };
1251
1252        assert_eq!(file_info_md.file_type_display(), "Markdown");
1253    }
1254
1255    // === WALKER GLOB PATTERN INTEGRATION TESTS (TDD - Red Phase) ===
1256
1257    #[test]
1258    fn test_walk_options_from_config_with_include_patterns() {
1259        // Test that CLI include patterns are passed to WalkOptions
1260        let config = crate::cli::Config {
1261            prompt: None,
1262            paths: None,
1263            include: Some(vec!["**/*.rs".to_string(), "**/test[0-9].py".to_string()]),
1264            ignore: None,
1265            repo: None,
1266            read_stdin: false,
1267            output_file: None,
1268            max_tokens: None,
1269            llm_tool: crate::cli::LlmTool::default(),
1270            quiet: false,
1271            verbose: false,
1272            config: None,
1273            progress: false,
1274            copy: false,
1275            enhanced_context: false,
1276            trace_imports: false,
1277            include_callers: false,
1278            include_types: false,
1279            semantic_depth: 3,
1280            custom_priorities: vec![],
1281            config_token_limits: None,
1282            config_defaults_max_tokens: None,
1283        };
1284
1285        let options = WalkOptions::from_config(&config).unwrap();
1286
1287        // This test will fail until we update from_config to use CLI include patterns
1288        assert_eq!(options.include_patterns, vec!["**/*.rs", "**/test[0-9].py"]);
1289    }
1290
1291    #[test]
1292    fn test_walk_options_from_config_empty_include_patterns() {
1293        // Test that empty include patterns work correctly
1294        let config = crate::cli::Config {
1295            prompt: None,
1296            paths: None,
1297            include: None,
1298            ignore: None,
1299            repo: None,
1300            read_stdin: false,
1301            output_file: None,
1302            max_tokens: None,
1303            llm_tool: crate::cli::LlmTool::default(),
1304            quiet: false,
1305            verbose: false,
1306            config: None,
1307            progress: false,
1308            copy: false,
1309            enhanced_context: false,
1310            trace_imports: false,
1311            include_callers: false,
1312            include_types: false,
1313            semantic_depth: 3,
1314            custom_priorities: vec![],
1315            config_token_limits: None,
1316            config_defaults_max_tokens: None,
1317        };
1318
1319        let options = WalkOptions::from_config(&config).unwrap();
1320        assert_eq!(options.include_patterns, Vec::<String>::new());
1321    }
1322
1323    #[test]
1324    fn test_walk_options_filters_empty_patterns() {
1325        // Test that empty/whitespace patterns are filtered out
1326        let config = crate::cli::Config {
1327            prompt: None,
1328            paths: None,
1329            include: Some(vec![
1330                "**/*.rs".to_string(),
1331                "".to_string(),
1332                "   ".to_string(),
1333                "*.py".to_string(),
1334            ]),
1335            ignore: None,
1336            repo: None,
1337            read_stdin: false,
1338            output_file: None,
1339            max_tokens: None,
1340            llm_tool: crate::cli::LlmTool::default(),
1341            quiet: false,
1342            verbose: false,
1343            config: None,
1344            progress: false,
1345            copy: false,
1346            enhanced_context: false,
1347            trace_imports: false,
1348            include_callers: false,
1349            include_types: false,
1350            semantic_depth: 3,
1351            custom_priorities: vec![],
1352            config_token_limits: None,
1353            config_defaults_max_tokens: None,
1354        };
1355
1356        let options = WalkOptions::from_config(&config).unwrap();
1357
1358        // Should filter out empty and whitespace-only patterns
1359        assert_eq!(options.include_patterns, vec!["**/*.rs", "*.py"]);
1360    }
1361
1362    // === PATTERN SANITIZATION TESTS ===
1363
1364    #[test]
1365    fn test_sanitize_pattern_valid_patterns() {
1366        // Test valid patterns that should pass sanitization
1367        let valid_patterns = vec![
1368            "*.py",
1369            "**/*.rs",
1370            "src/**/*.{js,ts}",
1371            "test[0-9].py",
1372            "**/*{model,service}*.py",
1373            "**/db/**",
1374            "some-file.txt",
1375            "dir/subdir/*.md",
1376        ];
1377
1378        for pattern in valid_patterns {
1379            let result = sanitize_pattern(pattern);
1380            assert!(result.is_ok(), "Pattern '{pattern}' should be valid");
1381            assert_eq!(result.unwrap(), pattern);
1382        }
1383    }
1384
1385    #[test]
1386    fn test_sanitize_pattern_length_limit() {
1387        // Test pattern length limit (1000 characters)
1388        let short_pattern = "a".repeat(999);
1389        let exact_limit = "a".repeat(1000);
1390        let too_long = "a".repeat(1001);
1391
1392        assert!(sanitize_pattern(&short_pattern).is_ok());
1393        assert!(sanitize_pattern(&exact_limit).is_ok());
1394
1395        let result = sanitize_pattern(&too_long);
1396        assert!(result.is_err());
1397        assert!(result.unwrap_err().to_string().contains("Pattern too long"));
1398    }
1399
1400    #[test]
1401    fn test_sanitize_pattern_null_bytes() {
1402        // Test patterns with null bytes
1403        let patterns_with_nulls = vec!["test\0.py", "\0*.rs", "**/*.js\0", "dir/\0file.txt"];
1404
1405        for pattern in patterns_with_nulls {
1406            let result = sanitize_pattern(pattern);
1407            assert!(
1408                result.is_err(),
1409                "Pattern with null byte should be rejected: {pattern:?}"
1410            );
1411            assert!(result
1412                .unwrap_err()
1413                .to_string()
1414                .contains("invalid characters"));
1415        }
1416    }
1417
1418    #[test]
1419    fn test_sanitize_pattern_control_characters() {
1420        // Test patterns with control characters
1421        let control_chars = vec![
1422            "test\x01.py",  // Start of heading
1423            "file\x08.txt", // Backspace
1424            "dir\x0c/*.rs", // Form feed
1425            "test\x1f.md",  // Unit separator
1426            "*.py\x7f",     // Delete
1427        ];
1428
1429        for pattern in control_chars {
1430            let result = sanitize_pattern(pattern);
1431            assert!(
1432                result.is_err(),
1433                "Pattern with control char should be rejected: {pattern:?}"
1434            );
1435            assert!(result
1436                .unwrap_err()
1437                .to_string()
1438                .contains("invalid characters"));
1439        }
1440    }
1441
1442    #[test]
1443    fn test_sanitize_pattern_absolute_paths() {
1444        // Test absolute paths that should be rejected
1445        let absolute_paths = vec![
1446            "/etc/passwd",
1447            "/usr/bin/*.sh",
1448            "/home/user/file.txt",
1449            "\\Windows\\System32\\*.dll", // Windows absolute path
1450            "\\Program Files\\*",
1451        ];
1452
1453        for pattern in absolute_paths {
1454            let result = sanitize_pattern(pattern);
1455            assert!(
1456                result.is_err(),
1457                "Absolute path should be rejected: {pattern}"
1458            );
1459            assert!(result
1460                .unwrap_err()
1461                .to_string()
1462                .contains("Absolute paths not allowed"));
1463        }
1464    }
1465
1466    #[test]
1467    fn test_sanitize_pattern_directory_traversal() {
1468        // Test directory traversal patterns
1469        let traversal_patterns = vec![
1470            "../../../etc/passwd",
1471            "dir/../../../file.txt",
1472            "**/../secret/*",
1473            "test/../../*.py",
1474            "../config.toml",
1475            "subdir/../../other.rs",
1476        ];
1477
1478        for pattern in traversal_patterns {
1479            let result = sanitize_pattern(pattern);
1480            assert!(
1481                result.is_err(),
1482                "Directory traversal should be rejected: {pattern}"
1483            );
1484            assert!(result
1485                .unwrap_err()
1486                .to_string()
1487                .contains("Directory traversal"));
1488        }
1489    }
1490
1491    #[test]
1492    fn test_sanitize_pattern_edge_cases() {
1493        // Test edge cases that might reveal bugs
1494
1495        // Empty string
1496        let result = sanitize_pattern("");
1497        assert!(result.is_ok(), "Empty string should be allowed");
1498
1499        // Only whitespace
1500        let result = sanitize_pattern("   ");
1501        assert!(result.is_ok(), "Whitespace-only should be allowed");
1502
1503        // Unicode characters
1504        let result = sanitize_pattern("файл*.txt");
1505        assert!(result.is_ok(), "Unicode should be allowed");
1506
1507        // Special glob characters
1508        let result = sanitize_pattern("file[!abc]*.{py,rs}");
1509        assert!(result.is_ok(), "Complex glob patterns should be allowed");
1510
1511        // Newlines and tabs (these are control characters)
1512        let result = sanitize_pattern("file\nname.txt");
1513        assert!(result.is_err(), "Newlines should be rejected");
1514
1515        let result = sanitize_pattern("file\tname.txt");
1516        assert!(result.is_err(), "Tabs should be rejected");
1517    }
1518
1519    #[test]
1520    fn test_sanitize_pattern_boundary_conditions() {
1521        // Test patterns that are at the boundary of what should be allowed
1522
1523        // Pattern with exactly ".." but not as traversal
1524        let result = sanitize_pattern("file..name.txt");
1525        assert!(result.is_err(), "Any '..' should be rejected for safety");
1526
1527        // Pattern starting with legitimate glob
1528        let result = sanitize_pattern("**/*.py");
1529        assert!(result.is_ok(), "Recursive glob should be allowed");
1530
1531        // Mixed valid/invalid (should reject entire pattern)
1532        let result = sanitize_pattern("valid/*.py/../invalid");
1533        assert!(result.is_err(), "Mixed pattern should be rejected");
1534    }
1535
1536    #[test]
1537    fn test_sanitize_pattern_security_bypass_attempts() {
1538        // Test patterns that might try to bypass security checks
1539
1540        // URL-encoded null byte
1541        let result = sanitize_pattern("file%00.txt");
1542        assert!(result.is_ok(), "URL encoding should not be decoded");
1543
1544        // Double-encoded traversal
1545        let result = sanitize_pattern("file%2e%2e/secret");
1546        assert!(result.is_ok(), "Double encoding should not be decoded");
1547
1548        // Unicode normalization attacks
1549        let result = sanitize_pattern("file\u{002e}\u{002e}/secret");
1550        assert!(result.is_err(), "Unicode dots should be treated as '..'");
1551
1552        // Null byte at end
1553        let result = sanitize_pattern("legitimate-pattern\0");
1554        assert!(result.is_err(), "Trailing null should be caught");
1555    }
1556
1557    // === ERROR HANDLING TESTS ===
1558
1559    #[test]
1560    fn test_error_handling_classification() {
1561        // Test that we correctly classify errors as critical vs non-critical
1562        use crate::utils::error::ContextCreatorError;
1563
1564        // Simulate critical errors
1565        let critical_errors = [
1566            ContextCreatorError::FileProcessingError {
1567                path: "test.txt".to_string(),
1568                error: "Permission denied".to_string(),
1569            },
1570            ContextCreatorError::InvalidConfiguration("Invalid pattern".to_string()),
1571        ];
1572
1573        // Check that permission denied is considered critical
1574        let error_string = critical_errors[0].to_string();
1575        assert!(error_string.contains("Permission denied"));
1576
1577        // Check that invalid configuration is considered critical
1578        let error_string = critical_errors[1].to_string();
1579        assert!(error_string.contains("Invalid"));
1580    }
1581
1582    #[test]
1583    fn test_pattern_sanitization_integration() {
1584        // Test that sanitization is actually called in the build_walker path
1585        use tempfile::TempDir;
1586
1587        let temp_dir = TempDir::new().unwrap();
1588        let root = temp_dir.path();
1589
1590        // Create WalkOptions with a pattern that should be sanitized
1591        let options = WalkOptions {
1592            max_file_size: Some(1024),
1593            follow_links: false,
1594            include_hidden: false,
1595            parallel: false,
1596            ignore_file: ".context-creator-ignore".to_string(),
1597            ignore_patterns: vec![],
1598            include_patterns: vec!["../../../etc/passwd".to_string()], // Should be rejected
1599            custom_priorities: vec![],
1600            filter_binary_files: false,
1601        };
1602
1603        // This should fail due to sanitization
1604        let result = build_walker(root, &options);
1605        assert!(
1606            result.is_err(),
1607            "Directory traversal pattern should be rejected by sanitization"
1608        );
1609
1610        if let Err(e) = result {
1611            let error_msg = e.to_string();
1612            assert!(error_msg.contains("Directory traversal") || error_msg.contains("Invalid"));
1613        }
1614    }
1615
1616    #[test]
1617    fn test_walk_options_filters_binary_files_with_prompt() {
1618        use crate::cli::Config;
1619
1620        let config = Config {
1621            prompt: Some("test prompt".to_string()),
1622            paths: Some(vec![PathBuf::from(".")]),
1623            include: None,
1624            ignore: None,
1625            repo: None,
1626            read_stdin: false,
1627            output_file: None,
1628            max_tokens: None,
1629            llm_tool: crate::cli::LlmTool::Gemini,
1630            quiet: false,
1631            verbose: false,
1632            config: None,
1633            progress: false,
1634            copy: false,
1635            enhanced_context: false,
1636            trace_imports: false,
1637            include_callers: false,
1638            include_types: false,
1639            semantic_depth: 3,
1640            custom_priorities: vec![],
1641            config_token_limits: None,
1642            config_defaults_max_tokens: None,
1643        };
1644
1645        let options = WalkOptions::from_config(&config).unwrap();
1646        assert!(options.filter_binary_files);
1647    }
1648
1649    #[test]
1650    fn test_walk_options_no_binary_filter_without_prompt() {
1651        use crate::cli::Config;
1652
1653        let config = Config {
1654            prompt: None,
1655            paths: Some(vec![PathBuf::from(".")]),
1656            include: None,
1657            ignore: None,
1658            repo: None,
1659            read_stdin: false,
1660            output_file: None,
1661            max_tokens: None,
1662            llm_tool: crate::cli::LlmTool::Gemini,
1663            quiet: false,
1664            verbose: false,
1665            config: None,
1666            progress: false,
1667            copy: false,
1668            enhanced_context: false,
1669            trace_imports: false,
1670            include_callers: false,
1671            include_types: false,
1672            semantic_depth: 3,
1673            custom_priorities: vec![],
1674            config_token_limits: None,
1675            config_defaults_max_tokens: None,
1676        };
1677
1678        let options = WalkOptions::from_config(&config).unwrap();
1679        assert!(!options.filter_binary_files);
1680    }
1681}
context_creator/core/walker.rs

context_creator/core/
walker.rs