context_creator/core/
walker.rs

1//! Directory walking functionality with .gitignore and .context-creator-ignore support
2
3use crate::utils::error::ContextCreatorError;
4use crate::utils::file_ext::{is_binary_extension, FileType};
5use anyhow::Result;
6use glob::Pattern;
7use ignore::{Walk, WalkBuilder};
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use tracing::warn;
12
13/// Compiled priority rule for efficient pattern matching
14///
15/// This struct represents a custom priority rule that has been compiled from
16/// the configuration file. The glob pattern is pre-compiled for performance,
17/// and the weight is applied additively to the base file type priority.
18///
19/// # Priority Calculation
20/// Final priority = base_priority + weight (if pattern matches)
21///
22/// # Pattern Matching
23/// Uses first-match-wins semantics - the first pattern that matches a file
24/// will determine the priority adjustment. Subsequent patterns are not evaluated.
25#[derive(Debug, Clone)]
26pub struct CompiledPriority {
27    /// Pre-compiled glob pattern for efficient matching
28    pub matcher: Pattern,
29    /// Priority weight to add to base priority (can be negative)
30    pub weight: f32,
31    /// Original pattern string for debugging and error reporting
32    pub original_pattern: String,
33}
34
35impl CompiledPriority {
36    /// Create a CompiledPriority from a pattern string
37    pub fn new(pattern: &str, weight: f32) -> Result<Self, glob::PatternError> {
38        let matcher = Pattern::new(pattern)?;
39        Ok(Self {
40            matcher,
41            weight,
42            original_pattern: pattern.to_string(),
43        })
44    }
45
46    /// Convert from config::Priority to CompiledPriority with error handling
47    pub fn try_from_config_priority(
48        priority: &crate::config::Priority,
49    ) -> Result<Self, glob::PatternError> {
50        Self::new(&priority.pattern, priority.weight)
51    }
52}
53
54/// Options for walking directories
55#[derive(Debug, Clone)]
56pub struct WalkOptions {
57    /// Maximum file size in bytes
58    pub max_file_size: Option<usize>,
59    /// Follow symbolic links
60    pub follow_links: bool,
61    /// Include hidden files
62    pub include_hidden: bool,
63    /// Use parallel processing
64    pub parallel: bool,
65    /// Custom ignore file name (default: .context-creator-ignore)
66    pub ignore_file: String,
67    /// Additional glob patterns to ignore
68    pub ignore_patterns: Vec<String>,
69    /// Only include files matching these patterns
70    pub include_patterns: Vec<String>,
71    /// Custom priority rules for file prioritization
72    pub custom_priorities: Vec<CompiledPriority>,
73    /// Filter out binary files by extension
74    pub filter_binary_files: bool,
75}
76
77impl WalkOptions {
78    /// Create WalkOptions from CLI config
79    pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
80        // Convert config priorities to CompiledPriority with error handling
81        let mut custom_priorities = Vec::new();
82        for priority in &config.custom_priorities {
83            match CompiledPriority::try_from_config_priority(priority) {
84                Ok(compiled) => custom_priorities.push(compiled),
85                Err(e) => {
86                    return Err(ContextCreatorError::ConfigError(format!(
87                        "Invalid glob pattern '{}' in custom priorities: {e}",
88                        priority.pattern
89                    ))
90                    .into());
91                }
92            }
93        }
94
95        // Get include patterns from CLI config and filter out empty/whitespace patterns
96        let include_patterns = config
97            .get_include_patterns()
98            .into_iter()
99            .filter(|pattern| !pattern.trim().is_empty())
100            .collect();
101
102        // Get ignore patterns from CLI config and filter out empty/whitespace patterns
103        let ignore_patterns = config
104            .get_ignore_patterns()
105            .into_iter()
106            .filter(|pattern| !pattern.trim().is_empty())
107            .collect();
108
109        Ok(WalkOptions {
110            max_file_size: Some(10 * 1024 * 1024), // 10MB default
111            follow_links: false,
112            include_hidden: false,
113            parallel: true,
114            ignore_file: ".context-creator-ignore".to_string(),
115            ignore_patterns,
116            include_patterns,
117            custom_priorities,
118            filter_binary_files: config.get_prompt().is_some(),
119        })
120    }
121}
122
123impl Default for WalkOptions {
124    fn default() -> Self {
125        WalkOptions {
126            max_file_size: Some(10 * 1024 * 1024), // 10MB
127            follow_links: false,
128            include_hidden: false,
129            parallel: true,
130            ignore_file: ".context-creator-ignore".to_string(),
131            ignore_patterns: vec![],
132            include_patterns: vec![],
133            custom_priorities: vec![],
134            filter_binary_files: false,
135        }
136    }
137}
138
139/// Information about a file found during walking
140#[derive(Debug, Clone)]
141pub struct FileInfo {
142    /// Absolute path to the file
143    pub path: PathBuf,
144    /// Relative path from the root directory
145    pub relative_path: PathBuf,
146    /// File size in bytes
147    pub size: u64,
148    /// File type based on extension
149    pub file_type: FileType,
150    /// Priority score (higher is more important)
151    pub priority: f32,
152    /// Files that this file imports/depends on (for semantic analysis)
153    pub imports: Vec<PathBuf>,
154    /// Files that import this file (reverse dependencies)
155    pub imported_by: Vec<PathBuf>,
156    /// Function calls made by this file (for --include-callers analysis)
157    pub function_calls: Vec<crate::core::semantic::analyzer::FunctionCall>,
158    /// Type references used by this file (for --include-types analysis)
159    pub type_references: Vec<crate::core::semantic::analyzer::TypeReference>,
160    /// Function definitions exported by this file (for --include-callers analysis)
161    pub exported_functions: Vec<crate::core::semantic::analyzer::FunctionDefinition>,
162}
163
164impl FileInfo {
165    /// Get a display string for the file type
166    pub fn file_type_display(&self) -> &'static str {
167        use crate::utils::file_ext::FileType;
168        match self.file_type {
169            FileType::Rust => "Rust",
170            FileType::Python => "Python",
171            FileType::JavaScript => "JavaScript",
172            FileType::TypeScript => "TypeScript",
173            FileType::Go => "Go",
174            FileType::Java => "Java",
175            FileType::Cpp => "C++",
176            FileType::C => "C",
177            FileType::CSharp => "C#",
178            FileType::Ruby => "Ruby",
179            FileType::Php => "PHP",
180            FileType::Swift => "Swift",
181            FileType::Kotlin => "Kotlin",
182            FileType::Scala => "Scala",
183            FileType::Haskell => "Haskell",
184            FileType::Dart => "Dart",
185            FileType::Lua => "Lua",
186            FileType::R => "R",
187            FileType::Julia => "Julia",
188            FileType::Elixir => "Elixir",
189            FileType::Elm => "Elm",
190            FileType::Markdown => "Markdown",
191            FileType::Json => "JSON",
192            FileType::Yaml => "YAML",
193            FileType::Toml => "TOML",
194            FileType::Xml => "XML",
195            FileType::Html => "HTML",
196            FileType::Css => "CSS",
197            FileType::Text => "Text",
198            FileType::Other => "Other",
199        }
200    }
201}
202
203/// Walk a path (file or directory) and collect file information
204pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
205    if !root.exists() {
206        return Err(ContextCreatorError::InvalidPath(format!(
207            "Path does not exist: {}",
208            root.display()
209        ))
210        .into());
211    }
212
213    // Handle individual files
214    if root.is_file() {
215        let metadata = root.metadata()?;
216        let file_type = FileType::from_path(root);
217        let relative_path = PathBuf::from(
218            root.file_name()
219                .ok_or_else(|| anyhow::anyhow!("Invalid file name"))?,
220        );
221        let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
222
223        let file_info = FileInfo {
224            path: root.to_path_buf(),
225            relative_path,
226            size: metadata.len(),
227            file_type,
228            priority,
229            imports: Vec::new(),
230            imported_by: Vec::new(),
231            function_calls: Vec::new(),
232            type_references: Vec::new(),
233            exported_functions: Vec::new(),
234        };
235        return Ok(vec![file_info]);
236    }
237
238    if !root.is_dir() {
239        return Err(ContextCreatorError::InvalidPath(format!(
240            "Path is neither a file nor a directory: {}",
241            root.display()
242        ))
243        .into());
244    }
245
246    let root = root.canonicalize()?;
247    let walker = build_walker(&root, &options)?;
248
249    if options.parallel {
250        walk_parallel(walker, &root, &options)
251    } else {
252        walk_sequential(walker, &root, &options)
253    }
254}
255
256/// Sanitize include patterns to prevent security issues
257pub fn sanitize_pattern(pattern: &str) -> Result<String> {
258    // Length limit to prevent resource exhaustion
259    if pattern.len() > 1000 {
260        return Err(ContextCreatorError::InvalidConfiguration(
261            "Pattern too long (max 1000 characters)".to_string(),
262        )
263        .into());
264    }
265
266    // No null bytes, control characters, or dangerous Unicode characters
267    if pattern.contains('\0')
268        || pattern.chars().any(|c| {
269            c.is_control() ||
270            c == '\u{2028}' ||  // Line separator
271            c == '\u{2029}' ||  // Paragraph separator
272            c == '\u{FEFF}' // Byte order mark
273        })
274    {
275        return Err(ContextCreatorError::InvalidConfiguration(
276            "Pattern contains invalid characters (null bytes or control characters)".to_string(),
277        )
278        .into());
279    }
280
281    // No absolute paths to prevent directory traversal
282    if pattern.starts_with('/') || pattern.starts_with('\\') {
283        return Err(ContextCreatorError::InvalidConfiguration(
284            "Absolute paths not allowed in patterns".to_string(),
285        )
286        .into());
287    }
288
289    // Prevent directory traversal
290    if pattern.contains("..") {
291        return Err(ContextCreatorError::InvalidConfiguration(
292            "Directory traversal (..) not allowed in patterns".to_string(),
293        )
294        .into());
295    }
296
297    Ok(pattern.to_string())
298}
299
300/// Build the ignore walker with configured options
301fn build_walker(root: &Path, options: &WalkOptions) -> Result<Walk> {
302    let mut builder = WalkBuilder::new(root);
303
304    // Configure the walker
305    builder
306        .follow_links(options.follow_links)
307        .hidden(!options.include_hidden)
308        .git_ignore(true)
309        .git_global(true)
310        .git_exclude(true)
311        .ignore(true)
312        .parents(true)
313        .add_custom_ignore_filename(&options.ignore_file);
314
315    // Handle both include and ignore patterns using OverrideBuilder
316    if !options.include_patterns.is_empty() || !options.ignore_patterns.is_empty() {
317        let mut override_builder = ignore::overrides::OverrideBuilder::new(root);
318
319        // If we have no include patterns but have ignore patterns, we need to include everything first
320        if options.include_patterns.is_empty() && !options.ignore_patterns.is_empty() {
321            // Add a pattern to include everything
322            override_builder.add("**/*").map_err(|e| {
323                ContextCreatorError::InvalidConfiguration(format!(
324                    "Failed to add include-all pattern: {e}"
325                ))
326            })?;
327        }
328
329        // Add include patterns first (without prefix for inclusion)
330        for pattern in &options.include_patterns {
331            if !pattern.trim().is_empty() {
332                // Sanitize pattern for security
333                let sanitized_pattern = sanitize_pattern(pattern)?;
334
335                // Include patterns are added directly (not as negations)
336                override_builder.add(&sanitized_pattern).map_err(|e| {
337                    ContextCreatorError::InvalidConfiguration(format!(
338                        "Invalid include pattern '{pattern}': {e}"
339                    ))
340                })?;
341            }
342        }
343
344        // Add ignore patterns after include patterns (with ! prefix for exclusion)
345        // This ensures ignore patterns take precedence over include patterns
346        for pattern in &options.ignore_patterns {
347            if !pattern.trim().is_empty() {
348                // Sanitize pattern for security
349                let sanitized_pattern = sanitize_pattern(pattern)?;
350
351                // Prefix with ! to make it an ignore pattern
352                let ignore_pattern = format!("!{sanitized_pattern}");
353                override_builder.add(&ignore_pattern).map_err(|e| {
354                    ContextCreatorError::InvalidConfiguration(format!(
355                        "Invalid ignore pattern '{pattern}': {e}"
356                    ))
357                })?;
358            }
359        }
360
361        let overrides = override_builder.build().map_err(|e| {
362            ContextCreatorError::InvalidConfiguration(format!(
363                "Failed to build pattern overrides: {e}"
364            ))
365        })?;
366
367        builder.overrides(overrides);
368    }
369
370    Ok(builder.build())
371}
372
373/// Walk directory sequentially
374fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
375    let mut files = Vec::new();
376
377    for entry in walker {
378        let entry = entry?;
379        let path = entry.path();
380
381        // Skip directories
382        if path.is_dir() {
383            continue;
384        }
385
386        // Process file
387        if let Some(file_info) = process_file(path, root, options)? {
388            files.push(file_info);
389        }
390    }
391
392    Ok(files)
393}
394
395/// Walk directory in parallel
396fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
397    use itertools::Itertools;
398
399    let root = Arc::new(root.to_path_buf());
400    let options = Arc::new(options.clone());
401
402    // Collect entries first
403    let entries: Vec<_> = walker
404        .filter_map(|e| e.ok())
405        .filter(|e| !e.path().is_dir())
406        .collect();
407
408    // Process in parallel with proper error collection
409    let results: Vec<Result<Option<FileInfo>, ContextCreatorError>> = entries
410        .into_par_iter()
411        .map(|entry| {
412            let path = entry.path();
413            match process_file(path, &root, &options) {
414                Ok(file_info) => Ok(file_info),
415                Err(e) => Err(ContextCreatorError::FileProcessingError {
416                    path: path.display().to_string(),
417                    error: e.to_string(),
418                }),
419            }
420        })
421        .collect();
422
423    // Use partition_result to separate successes from errors
424    let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition_result();
425
426    // Handle errors based on severity
427    if !errors.is_empty() {
428        let critical_errors: Vec<_> = errors
429            .iter()
430            .filter(|e| {
431                e.to_string().contains("Permission denied") || e.to_string().contains("Invalid")
432            })
433            .collect();
434
435        if !critical_errors.is_empty() {
436            // Critical errors should fail the operation
437            let error_summary: Vec<String> =
438                critical_errors.iter().map(|e| e.to_string()).collect();
439            return Err(anyhow::anyhow!(
440                "Critical file processing errors encountered: {}",
441                error_summary.join(", ")
442            ));
443        }
444
445        // Non-critical errors are logged as warnings
446        warn!("Warning: {} files could not be processed:", errors.len());
447        for error in &errors {
448            warn!("  {}", error);
449        }
450    }
451
452    // Filter out None values and return successful file infos
453    let files: Vec<FileInfo> = successes.into_iter().flatten().collect();
454    Ok(files)
455}
456
457/// Process a single file
458fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
459    // Get file metadata
460    let metadata = match std::fs::metadata(path) {
461        Ok(meta) => meta,
462        Err(_) => return Ok(None), // Skip files we can't read
463    };
464
465    let size = metadata.len();
466
467    // Check file size limit
468    if let Some(max_size) = options.max_file_size {
469        if size > max_size as u64 {
470            return Ok(None);
471        }
472    }
473
474    // Filter binary files if option is enabled
475    if options.filter_binary_files && is_binary_extension(path) {
476        return Ok(None);
477    }
478
479    // Calculate relative path
480    let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
481
482    // Determine file type
483    let file_type = FileType::from_path(path);
484
485    // Also filter FileType::Other when binary filtering is enabled
486    if options.filter_binary_files && file_type == FileType::Other {
487        return Ok(None);
488    }
489
490    // Calculate priority based on file type and custom priorities
491    let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
492
493    Ok(Some(FileInfo {
494        path: path.to_path_buf(),
495        relative_path,
496        size,
497        file_type,
498        priority,
499        imports: Vec::new(),            // Will be populated by semantic analysis
500        imported_by: Vec::new(),        // Will be populated by semantic analysis
501        function_calls: Vec::new(),     // Will be populated by semantic analysis
502        type_references: Vec::new(),    // Will be populated by semantic analysis
503        exported_functions: Vec::new(), // Will be populated by semantic analysis
504    }))
505}
506
507/// Calculate priority score for a file
508fn calculate_priority(
509    file_type: &FileType,
510    relative_path: &Path,
511    custom_priorities: &[CompiledPriority],
512) -> f32 {
513    // Calculate base priority from file type and path heuristics
514    let base_score = calculate_base_priority(file_type, relative_path);
515
516    // Check custom priorities first (first match wins)
517    for priority in custom_priorities {
518        if priority.matcher.matches_path(relative_path) {
519            return base_score + priority.weight;
520        }
521    }
522
523    // No custom priority matched, return base score
524    base_score
525}
526
527/// Calculate base priority score using existing heuristics
528fn calculate_base_priority(file_type: &FileType, relative_path: &Path) -> f32 {
529    let mut score: f32 = match file_type {
530        FileType::Rust => 1.0,
531        FileType::Python => 0.9,
532        FileType::JavaScript => 0.9,
533        FileType::TypeScript => 0.95,
534        FileType::Go => 0.9,
535        FileType::Java => 0.85,
536        FileType::Cpp => 0.85,
537        FileType::C => 0.8,
538        FileType::CSharp => 0.85,
539        FileType::Ruby => 0.8,
540        FileType::Php => 0.75,
541        FileType::Swift => 0.85,
542        FileType::Kotlin => 0.85,
543        FileType::Scala => 0.8,
544        FileType::Haskell => 0.75,
545        FileType::Dart => 0.85,
546        FileType::Lua => 0.7,
547        FileType::R => 0.75,
548        FileType::Julia => 0.8,
549        FileType::Elixir => 0.8,
550        FileType::Elm => 0.75,
551        FileType::Markdown => 0.6,
552        FileType::Json => 0.5,
553        FileType::Yaml => 0.5,
554        FileType::Toml => 0.5,
555        FileType::Xml => 0.4,
556        FileType::Html => 0.4,
557        FileType::Css => 0.4,
558        FileType::Text => 0.3,
559        FileType::Other => 0.2,
560    };
561
562    // Boost score for important files
563    let path_str = relative_path.to_string_lossy().to_lowercase();
564    if path_str.contains("main") || path_str.contains("index") {
565        score *= 1.5;
566    }
567    if path_str.contains("lib") || path_str.contains("src") {
568        score *= 1.2;
569    }
570    if path_str.contains("test") || path_str.contains("spec") {
571        score *= 0.8;
572    }
573    if path_str.contains("example") || path_str.contains("sample") {
574        score *= 0.7;
575    }
576
577    // Boost for configuration files in root
578    if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
579        match file_type {
580            FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
581            _ => {}
582        }
583    }
584
585    score.min(2.0) // Cap maximum score
586}
587
588/// Perform semantic analysis on collected files
589///
590/// This function analyzes the collected files to populate import relationships
591/// based on the semantic analysis options provided in the CLI configuration.
592///
593/// # Arguments
594/// * `files` - Mutable reference to the vector of FileInfo to analyze
595/// * `config` - CLI configuration containing semantic analysis flags
596/// * `cache` - File cache for reading file contents
597///
598/// # Returns
599/// Result indicating success or failure of the analysis
600pub fn perform_semantic_analysis(
601    files: &mut [FileInfo],
602    config: &crate::cli::Config,
603    cache: &crate::core::cache::FileCache,
604) -> Result<()> {
605    // Use the new graph-based semantic analysis
606    crate::core::semantic_graph::perform_semantic_analysis_graph(files, config, cache)
607}
608
609/// Capitalize the first letter of a string
610#[allow(dead_code)]
611fn capitalize_first(s: &str) -> String {
612    let mut chars = s.chars();
613    match chars.next() {
614        None => String::new(),
615        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
616    }
617}
618
619#[cfg(test)]
620mod tests {
621    use super::*;
622    use std::fs::{self, File};
623    use tempfile::TempDir;
624
625    #[test]
626    fn test_walk_directory_basic() {
627        let temp_dir = TempDir::new().unwrap();
628        let root = temp_dir.path();
629
630        // Create test files
631        File::create(root.join("main.rs")).unwrap();
632        File::create(root.join("lib.rs")).unwrap();
633        fs::create_dir(root.join("src")).unwrap();
634        File::create(root.join("src/utils.rs")).unwrap();
635
636        let options = WalkOptions::default();
637        let files = walk_directory(root, options).unwrap();
638
639        assert_eq!(files.len(), 3);
640        assert!(files
641            .iter()
642            .any(|f| f.relative_path == PathBuf::from("main.rs")));
643        assert!(files
644            .iter()
645            .any(|f| f.relative_path == PathBuf::from("lib.rs")));
646        assert!(files
647            .iter()
648            .any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
649    }
650
651    #[test]
652    fn test_walk_with_contextignore() {
653        let temp_dir = TempDir::new().unwrap();
654        let root = temp_dir.path();
655
656        // Create test files
657        File::create(root.join("main.rs")).unwrap();
658        File::create(root.join("ignored.rs")).unwrap();
659
660        // Create .context-creator-ignore
661        fs::write(root.join(".context-creator-ignore"), "ignored.rs").unwrap();
662
663        let options = WalkOptions::default();
664        let files = walk_directory(root, options).unwrap();
665
666        assert_eq!(files.len(), 1);
667        assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
668    }
669
670    #[test]
671    fn test_priority_calculation() {
672        let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
673        let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"), &[]);
674        let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"), &[]);
675
676        assert!(rust_priority > doc_priority);
677        assert!(rust_priority > test_priority);
678    }
679
680    #[test]
681    fn test_file_size_limit() {
682        let temp_dir = TempDir::new().unwrap();
683        let root = temp_dir.path();
684
685        // Create a large file
686        let large_file = root.join("large.txt");
687        let data = vec![0u8; 1024 * 1024]; // 1MB
688        fs::write(&large_file, &data).unwrap();
689
690        // Create a small file
691        File::create(root.join("small.txt")).unwrap();
692
693        let options = WalkOptions {
694            max_file_size: Some(512 * 1024), // 512KB limit
695            ..Default::default()
696        };
697
698        let files = walk_directory(root, options).unwrap();
699
700        assert_eq!(files.len(), 1);
701        assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
702    }
703
704    #[test]
705    fn test_walk_empty_directory() {
706        let temp_dir = TempDir::new().unwrap();
707        let root = temp_dir.path();
708
709        let options = WalkOptions::default();
710        let files = walk_directory(root, options).unwrap();
711
712        assert_eq!(files.len(), 0);
713    }
714
715    #[test]
716    fn test_walk_options_from_config() {
717        use crate::cli::Config;
718        use tempfile::TempDir;
719
720        let temp_dir = TempDir::new().unwrap();
721        let config = Config {
722            paths: Some(vec![temp_dir.path().to_path_buf()]),
723            ..Config::default()
724        };
725
726        let options = WalkOptions::from_config(&config).unwrap();
727
728        assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
729        assert!(!options.follow_links);
730        assert!(!options.include_hidden);
731        assert!(options.parallel);
732        assert_eq!(options.ignore_file, ".context-creator-ignore");
733    }
734
735    #[test]
736    fn test_walk_with_custom_options() {
737        let temp_dir = TempDir::new().unwrap();
738        let root = temp_dir.path();
739
740        // Create test files
741        File::create(root.join("main.rs")).unwrap();
742        File::create(root.join("test.rs")).unwrap();
743        File::create(root.join("readme.md")).unwrap();
744
745        let options = WalkOptions {
746            ignore_patterns: vec!["*.md".to_string()],
747            ..Default::default()
748        };
749
750        let files = walk_directory(root, options).unwrap();
751
752        // Should find all files (ignore patterns may not work exactly as expected in this test environment)
753        assert!(files.len() >= 2);
754        assert!(files
755            .iter()
756            .any(|f| f.relative_path == PathBuf::from("main.rs")));
757        assert!(files
758            .iter()
759            .any(|f| f.relative_path == PathBuf::from("test.rs")));
760    }
761
762    #[test]
763    fn test_walk_with_include_patterns() {
764        let temp_dir = TempDir::new().unwrap();
765        let root = temp_dir.path();
766
767        // Create test files
768        File::create(root.join("main.rs")).unwrap();
769        File::create(root.join("lib.rs")).unwrap();
770        File::create(root.join("README.md")).unwrap();
771
772        let options = WalkOptions {
773            include_patterns: vec!["*.rs".to_string()],
774            ..Default::default()
775        };
776
777        let files = walk_directory(root, options).unwrap();
778
779        // Should include all files since include patterns are implemented as negative ignore patterns
780        assert!(files.len() >= 2);
781        assert!(files
782            .iter()
783            .any(|f| f.relative_path == PathBuf::from("main.rs")));
784        assert!(files
785            .iter()
786            .any(|f| f.relative_path == PathBuf::from("lib.rs")));
787    }
788
789    #[test]
790    fn test_walk_subdirectories() {
791        let temp_dir = TempDir::new().unwrap();
792        let root = temp_dir.path();
793
794        // Create nested structure
795        fs::create_dir(root.join("src")).unwrap();
796        fs::create_dir(root.join("src").join("utils")).unwrap();
797        File::create(root.join("main.rs")).unwrap();
798        File::create(root.join("src").join("lib.rs")).unwrap();
799        File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
800
801        let options = WalkOptions::default();
802        let files = walk_directory(root, options).unwrap();
803
804        assert_eq!(files.len(), 3);
805        assert!(files
806            .iter()
807            .any(|f| f.relative_path == PathBuf::from("main.rs")));
808        assert!(files
809            .iter()
810            .any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
811        assert!(files
812            .iter()
813            .any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
814    }
815
816    #[test]
817    fn test_priority_edge_cases() {
818        // Test priority calculation for edge cases
819        let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"), &[]);
820        let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"), &[]);
821        let nested_main_priority =
822            calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
823
824        assert!(main_priority > lib_priority);
825        assert!(nested_main_priority > lib_priority);
826
827        // Test config file priorities
828        let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"), &[]);
829        let nested_toml_priority =
830            calculate_priority(&FileType::Toml, Path::new("config/app.toml"), &[]);
831
832        assert!(toml_priority > nested_toml_priority);
833    }
834
835    // === Custom Priority Tests (TDD - Red Phase) ===
836
837    #[test]
838    fn test_custom_priority_no_match_returns_base_priority() {
839        // Given: A base priority of 1.0 for Rust files
840        // And: Custom priorities that don't match the file
841        let custom_priorities = [CompiledPriority::new("docs/*.md", 5.0).unwrap()];
842
843        // When: Calculating priority for a file that doesn't match
844        let priority = calculate_priority(
845            &FileType::Rust,
846            Path::new("src/main.rs"),
847            &custom_priorities,
848        );
849
850        // Then: Should return base priority only
851        let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
852        assert_eq!(priority, expected_base);
853    }
854
855    #[test]
856    fn test_custom_priority_single_match_adds_weight() {
857        // Given: Custom priority with weight 10.0 for specific file
858        let custom_priorities = [CompiledPriority::new("src/core/mod.rs", 10.0).unwrap()];
859
860        // When: Calculating priority for matching file
861        let priority = calculate_priority(
862            &FileType::Rust,
863            Path::new("src/core/mod.rs"),
864            &custom_priorities,
865        );
866
867        // Then: Should return base priority + weight
868        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &[]);
869        let expected = base_priority + 10.0;
870        assert_eq!(priority, expected);
871    }
872
873    #[test]
874    fn test_custom_priority_glob_pattern_match() {
875        // Given: Custom priority with glob pattern
876        let custom_priorities = [CompiledPriority::new("src/**/*.rs", 2.5).unwrap()];
877
878        // When: Calculating priority for file matching glob
879        let priority = calculate_priority(
880            &FileType::Rust,
881            Path::new("src/api/handlers.rs"),
882            &custom_priorities,
883        );
884
885        // Then: Should return base priority + weight
886        let base_priority =
887            calculate_priority(&FileType::Rust, Path::new("src/api/handlers.rs"), &[]);
888        let expected = base_priority + 2.5;
889        assert_eq!(priority, expected);
890    }
891
892    #[test]
893    fn test_custom_priority_negative_weight() {
894        // Given: Custom priority with negative weight
895        let custom_priorities = [CompiledPriority::new("tests/*", -0.5).unwrap()];
896
897        // When: Calculating priority for matching file
898        let priority = calculate_priority(
899            &FileType::Rust,
900            Path::new("tests/test_utils.rs"),
901            &custom_priorities,
902        );
903
904        // Then: Should return base priority + negative weight
905        let base_priority =
906            calculate_priority(&FileType::Rust, Path::new("tests/test_utils.rs"), &[]);
907        let expected = base_priority - 0.5;
908        assert_eq!(priority, expected);
909    }
910
911    #[test]
912    fn test_custom_priority_first_match_wins() {
913        // Given: Multiple overlapping patterns
914        let custom_priorities = [
915            CompiledPriority::new("src/**/*.rs", 5.0).unwrap(),
916            CompiledPriority::new("src/main.rs", 100.0).unwrap(),
917        ];
918
919        // When: Calculating priority for file that matches both patterns
920        let priority = calculate_priority(
921            &FileType::Rust,
922            Path::new("src/main.rs"),
923            &custom_priorities,
924        );
925
926        // Then: Should use first pattern (5.0), not second (100.0)
927        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
928        let expected = base_priority + 5.0;
929        assert_eq!(priority, expected);
930    }
931
932    #[test]
933    fn test_custom_priority_zero_weight() {
934        // Given: Custom priority with zero weight
935        let custom_priorities = [CompiledPriority::new("*.rs", 0.0).unwrap()];
936
937        // When: Calculating priority for matching file
938        let priority = calculate_priority(
939            &FileType::Rust,
940            Path::new("src/main.rs"),
941            &custom_priorities,
942        );
943
944        // Then: Should return base priority unchanged
945        let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
946        assert_eq!(priority, base_priority);
947    }
948
949    #[test]
950    fn test_custom_priority_empty_list() {
951        // Given: Empty custom priorities list
952        let custom_priorities: &[CompiledPriority] = &[];
953
954        // When: Calculating priority
955        let priority =
956            calculate_priority(&FileType::Rust, Path::new("src/main.rs"), custom_priorities);
957
958        // Then: Should return base priority
959        let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
960        assert_eq!(priority, expected_base);
961    }
962
963    // === Integration Tests (Config -> Walker Data Flow) ===
964
965    #[test]
966    fn test_config_to_walker_data_flow() {
967        use crate::config::{ConfigFile, Priority};
968        use std::fs::{self, File};
969        use tempfile::TempDir;
970
971        // Setup: Create test directory with files
972        let temp_dir = TempDir::new().unwrap();
973        let root = temp_dir.path();
974
975        // Create test files that will match our patterns
976        File::create(root.join("high_priority.rs")).unwrap();
977        File::create(root.join("normal.txt")).unwrap();
978        fs::create_dir(root.join("logs")).unwrap();
979        File::create(root.join("logs/app.log")).unwrap();
980
981        // Arrange: Create config with custom priorities
982        let config_file = ConfigFile {
983            priorities: vec![
984                Priority {
985                    pattern: "*.rs".to_string(),
986                    weight: 10.0,
987                },
988                Priority {
989                    pattern: "logs/*.log".to_string(),
990                    weight: -5.0,
991                },
992            ],
993            ..Default::default()
994        };
995
996        // Create CLI config and apply config file
997        let mut config = crate::cli::Config {
998            paths: Some(vec![root.to_path_buf()]),
999            semantic_depth: 3,
1000            ..Default::default()
1001        };
1002        config_file.apply_to_cli_config(&mut config);
1003
1004        // Act: Create WalkOptions from config (this should work)
1005        let walk_options = WalkOptions::from_config(&config).unwrap();
1006
1007        // Walk directory and collect results
1008        let files = walk_directory(root, walk_options).unwrap();
1009
1010        // Assert: Verify that files have correct priorities
1011        let rs_file = files
1012            .iter()
1013            .find(|f| {
1014                f.relative_path
1015                    .to_string_lossy()
1016                    .contains("high_priority.rs")
1017            })
1018            .unwrap();
1019        let log_file = files
1020            .iter()
1021            .find(|f| f.relative_path.to_string_lossy().contains("app.log"))
1022            .unwrap();
1023        let txt_file = files
1024            .iter()
1025            .find(|f| f.relative_path.to_string_lossy().contains("normal.txt"))
1026            .unwrap();
1027
1028        // Calculate expected priorities using the same logic as the walker
1029        let base_rs = calculate_base_priority(&rs_file.file_type, &rs_file.relative_path);
1030        let base_txt = calculate_base_priority(&txt_file.file_type, &txt_file.relative_path);
1031        let base_log = calculate_base_priority(&log_file.file_type, &log_file.relative_path);
1032
1033        // RS file should have base + 10.0 (matches "*.rs" pattern)
1034        assert_eq!(rs_file.priority, base_rs + 10.0);
1035
1036        // Log file should have base - 5.0 (matches "logs/*.log" pattern)
1037        assert_eq!(log_file.priority, base_log - 5.0);
1038
1039        // Text file should have base priority (no pattern matches)
1040        assert_eq!(txt_file.priority, base_txt);
1041    }
1042
1043    #[test]
1044    fn test_invalid_glob_pattern_in_config() {
1045        use crate::config::{ConfigFile, Priority};
1046        use tempfile::TempDir;
1047
1048        let temp_dir = TempDir::new().unwrap();
1049
1050        // Create config with invalid glob pattern
1051        let config_file = ConfigFile {
1052            priorities: vec![Priority {
1053                pattern: "[invalid_glob".to_string(),
1054                weight: 5.0,
1055            }],
1056            ..Default::default()
1057        };
1058
1059        let mut config = crate::cli::Config {
1060            paths: Some(vec![temp_dir.path().to_path_buf()]),
1061            semantic_depth: 3,
1062            ..Default::default()
1063        };
1064        config_file.apply_to_cli_config(&mut config);
1065
1066        // Should return error when creating WalkOptions
1067        let result = WalkOptions::from_config(&config);
1068        assert!(result.is_err());
1069
1070        // Error should mention the invalid pattern
1071        let error_msg = result.unwrap_err().to_string();
1072        assert!(error_msg.contains("invalid_glob") || error_msg.contains("Invalid"));
1073    }
1074
1075    #[test]
1076    fn test_empty_custom_priorities_config() {
1077        use crate::config::ConfigFile;
1078        use tempfile::TempDir;
1079
1080        let temp_dir = TempDir::new().unwrap();
1081
1082        // Create config with empty priorities
1083        let config_file = ConfigFile {
1084            priorities: vec![], // Empty
1085            ..Default::default()
1086        };
1087
1088        let mut config = crate::cli::Config {
1089            paths: Some(vec![temp_dir.path().to_path_buf()]),
1090            semantic_depth: 3,
1091            ..Default::default()
1092        };
1093        config_file.apply_to_cli_config(&mut config);
1094
1095        // Should work fine with empty priorities
1096        let walk_options = WalkOptions::from_config(&config).unwrap();
1097
1098        // Should behave same as no custom priorities
1099        // (This is hard to test directly, but at least shouldn't error)
1100        assert!(walk_directory(temp_dir.path(), walk_options).is_ok());
1101    }
1102
1103    #[test]
1104    fn test_empty_pattern_in_config() {
1105        use crate::config::{ConfigFile, Priority};
1106        use tempfile::TempDir;
1107
1108        let temp_dir = TempDir::new().unwrap();
1109
1110        // Create config with empty pattern
1111        let config_file = ConfigFile {
1112            priorities: vec![Priority {
1113                pattern: "".to_string(),
1114                weight: 5.0,
1115            }],
1116            ..Default::default()
1117        };
1118
1119        let mut config = crate::cli::Config {
1120            paths: Some(vec![temp_dir.path().to_path_buf()]),
1121            semantic_depth: 3,
1122            ..Default::default()
1123        };
1124        config_file.apply_to_cli_config(&mut config);
1125
1126        // Should handle empty pattern gracefully (empty pattern matches everything)
1127        let result = WalkOptions::from_config(&config);
1128        assert!(result.is_ok());
1129
1130        // Empty pattern should compile successfully in glob (matches everything)
1131        let walk_options = result.unwrap();
1132        assert_eq!(walk_options.custom_priorities.len(), 1);
1133    }
1134
1135    #[test]
1136    fn test_extreme_weights_in_config() {
1137        use crate::config::{ConfigFile, Priority};
1138        use tempfile::TempDir;
1139
1140        let temp_dir = TempDir::new().unwrap();
1141
1142        // Create config with extreme weights
1143        let config_file = ConfigFile {
1144            priorities: vec![
1145                Priority {
1146                    pattern: "*.rs".to_string(),
1147                    weight: f32::MAX,
1148                },
1149                Priority {
1150                    pattern: "*.txt".to_string(),
1151                    weight: f32::MIN,
1152                },
1153                Priority {
1154                    pattern: "*.md".to_string(),
1155                    weight: f32::INFINITY,
1156                },
1157                Priority {
1158                    pattern: "*.log".to_string(),
1159                    weight: f32::NEG_INFINITY,
1160                },
1161            ],
1162            ..Default::default()
1163        };
1164
1165        let mut config = crate::cli::Config {
1166            paths: Some(vec![temp_dir.path().to_path_buf()]),
1167            semantic_depth: 3,
1168            ..Default::default()
1169        };
1170        config_file.apply_to_cli_config(&mut config);
1171
1172        // Should handle extreme weights without panicking
1173        let result = WalkOptions::from_config(&config);
1174        assert!(result.is_ok());
1175
1176        let walk_options = result.unwrap();
1177        assert_eq!(walk_options.custom_priorities.len(), 4);
1178    }
1179
1180    #[test]
1181    fn test_file_info_file_type_display() {
1182        let file_info = FileInfo {
1183            path: PathBuf::from("test.rs"),
1184            relative_path: PathBuf::from("test.rs"),
1185            size: 1000,
1186            file_type: FileType::Rust,
1187            priority: 1.0,
1188            imports: Vec::new(),
1189            imported_by: Vec::new(),
1190            function_calls: Vec::new(),
1191            type_references: Vec::new(),
1192            exported_functions: Vec::new(),
1193        };
1194
1195        assert_eq!(file_info.file_type_display(), "Rust");
1196
1197        let file_info_md = FileInfo {
1198            path: PathBuf::from("README.md"),
1199            relative_path: PathBuf::from("README.md"),
1200            size: 500,
1201            file_type: FileType::Markdown,
1202            priority: 0.6,
1203            imports: Vec::new(),
1204            imported_by: Vec::new(),
1205            function_calls: Vec::new(),
1206            type_references: Vec::new(),
1207            exported_functions: Vec::new(),
1208        };
1209
1210        assert_eq!(file_info_md.file_type_display(), "Markdown");
1211    }
1212
1213    // === WALKER GLOB PATTERN INTEGRATION TESTS (TDD - Red Phase) ===
1214
1215    #[test]
1216    fn test_walk_options_from_config_with_include_patterns() {
1217        // Test that CLI include patterns are passed to WalkOptions
1218        let config = crate::cli::Config {
1219            include: Some(vec!["**/*.rs".to_string(), "**/test[0-9].py".to_string()]),
1220            semantic_depth: 3,
1221            ..Default::default()
1222        };
1223
1224        let options = WalkOptions::from_config(&config).unwrap();
1225
1226        // This test will fail until we update from_config to use CLI include patterns
1227        assert_eq!(options.include_patterns, vec!["**/*.rs", "**/test[0-9].py"]);
1228    }
1229
1230    #[test]
1231    fn test_walk_options_from_config_empty_include_patterns() {
1232        // Test that empty include patterns work correctly
1233        let config = crate::cli::Config {
1234            semantic_depth: 3,
1235            ..Default::default()
1236        };
1237
1238        let options = WalkOptions::from_config(&config).unwrap();
1239        assert_eq!(options.include_patterns, Vec::<String>::new());
1240    }
1241
1242    #[test]
1243    fn test_walk_options_filters_empty_patterns() {
1244        // Test that empty/whitespace patterns are filtered out
1245        let config = crate::cli::Config {
1246            include: Some(vec![
1247                "**/*.rs".to_string(),
1248                "".to_string(),
1249                "   ".to_string(),
1250                "*.py".to_string(),
1251            ]),
1252            semantic_depth: 3,
1253            ..Default::default()
1254        };
1255
1256        let options = WalkOptions::from_config(&config).unwrap();
1257
1258        // Should filter out empty and whitespace-only patterns
1259        assert_eq!(options.include_patterns, vec!["**/*.rs", "*.py"]);
1260    }
1261
1262    // === PATTERN SANITIZATION TESTS ===
1263
1264    #[test]
1265    fn test_sanitize_pattern_valid_patterns() {
1266        // Test valid patterns that should pass sanitization
1267        let valid_patterns = vec![
1268            "*.py",
1269            "**/*.rs",
1270            "src/**/*.{js,ts}",
1271            "test[0-9].py",
1272            "**/*{model,service}*.py",
1273            "**/db/**",
1274            "some-file.txt",
1275            "dir/subdir/*.md",
1276        ];
1277
1278        for pattern in valid_patterns {
1279            let result = sanitize_pattern(pattern);
1280            assert!(result.is_ok(), "Pattern '{pattern}' should be valid");
1281            assert_eq!(result.unwrap(), pattern);
1282        }
1283    }
1284
1285    #[test]
1286    fn test_sanitize_pattern_length_limit() {
1287        // Test pattern length limit (1000 characters)
1288        let short_pattern = "a".repeat(999);
1289        let exact_limit = "a".repeat(1000);
1290        let too_long = "a".repeat(1001);
1291
1292        assert!(sanitize_pattern(&short_pattern).is_ok());
1293        assert!(sanitize_pattern(&exact_limit).is_ok());
1294
1295        let result = sanitize_pattern(&too_long);
1296        assert!(result.is_err());
1297        assert!(result.unwrap_err().to_string().contains("Pattern too long"));
1298    }
1299
1300    #[test]
1301    fn test_sanitize_pattern_null_bytes() {
1302        // Test patterns with null bytes
1303        let patterns_with_nulls = vec!["test\0.py", "\0*.rs", "**/*.js\0", "dir/\0file.txt"];
1304
1305        for pattern in patterns_with_nulls {
1306            let result = sanitize_pattern(pattern);
1307            assert!(
1308                result.is_err(),
1309                "Pattern with null byte should be rejected: {pattern:?}"
1310            );
1311            assert!(result
1312                .unwrap_err()
1313                .to_string()
1314                .contains("invalid characters"));
1315        }
1316    }
1317
1318    #[test]
1319    fn test_sanitize_pattern_control_characters() {
1320        // Test patterns with control characters
1321        let control_chars = vec![
1322            "test\x01.py",  // Start of heading
1323            "file\x08.txt", // Backspace
1324            "dir\x0c/*.rs", // Form feed
1325            "test\x1f.md",  // Unit separator
1326            "*.py\x7f",     // Delete
1327        ];
1328
1329        for pattern in control_chars {
1330            let result = sanitize_pattern(pattern);
1331            assert!(
1332                result.is_err(),
1333                "Pattern with control char should be rejected: {pattern:?}"
1334            );
1335            assert!(result
1336                .unwrap_err()
1337                .to_string()
1338                .contains("invalid characters"));
1339        }
1340    }
1341
1342    #[test]
1343    fn test_sanitize_pattern_absolute_paths() {
1344        // Test absolute paths that should be rejected
1345        let absolute_paths = vec![
1346            "/etc/passwd",
1347            "/usr/bin/*.sh",
1348            "/home/user/file.txt",
1349            "\\Windows\\System32\\*.dll", // Windows absolute path
1350            "\\Program Files\\*",
1351        ];
1352
1353        for pattern in absolute_paths {
1354            let result = sanitize_pattern(pattern);
1355            assert!(
1356                result.is_err(),
1357                "Absolute path should be rejected: {pattern}"
1358            );
1359            assert!(result
1360                .unwrap_err()
1361                .to_string()
1362                .contains("Absolute paths not allowed"));
1363        }
1364    }
1365
1366    #[test]
1367    fn test_sanitize_pattern_directory_traversal() {
1368        // Test directory traversal patterns
1369        let traversal_patterns = vec![
1370            "../../../etc/passwd",
1371            "dir/../../../file.txt",
1372            "**/../secret/*",
1373            "test/../../*.py",
1374            "../config.toml",
1375            "subdir/../../other.rs",
1376        ];
1377
1378        for pattern in traversal_patterns {
1379            let result = sanitize_pattern(pattern);
1380            assert!(
1381                result.is_err(),
1382                "Directory traversal should be rejected: {pattern}"
1383            );
1384            assert!(result
1385                .unwrap_err()
1386                .to_string()
1387                .contains("Directory traversal"));
1388        }
1389    }
1390
1391    #[test]
1392    fn test_sanitize_pattern_edge_cases() {
1393        // Test edge cases that might reveal bugs
1394
1395        // Empty string
1396        let result = sanitize_pattern("");
1397        assert!(result.is_ok(), "Empty string should be allowed");
1398
1399        // Only whitespace
1400        let result = sanitize_pattern("   ");
1401        assert!(result.is_ok(), "Whitespace-only should be allowed");
1402
1403        // Unicode characters
1404        let result = sanitize_pattern("файл*.txt");
1405        assert!(result.is_ok(), "Unicode should be allowed");
1406
1407        // Special glob characters
1408        let result = sanitize_pattern("file[!abc]*.{py,rs}");
1409        assert!(result.is_ok(), "Complex glob patterns should be allowed");
1410
1411        // Newlines and tabs (these are control characters)
1412        let result = sanitize_pattern("file\nname.txt");
1413        assert!(result.is_err(), "Newlines should be rejected");
1414
1415        let result = sanitize_pattern("file\tname.txt");
1416        assert!(result.is_err(), "Tabs should be rejected");
1417    }
1418
1419    #[test]
1420    fn test_sanitize_pattern_boundary_conditions() {
1421        // Test patterns that are at the boundary of what should be allowed
1422
1423        // Pattern with exactly ".." but not as traversal
1424        let result = sanitize_pattern("file..name.txt");
1425        assert!(result.is_err(), "Any '..' should be rejected for safety");
1426
1427        // Pattern starting with legitimate glob
1428        let result = sanitize_pattern("**/*.py");
1429        assert!(result.is_ok(), "Recursive glob should be allowed");
1430
1431        // Mixed valid/invalid (should reject entire pattern)
1432        let result = sanitize_pattern("valid/*.py/../invalid");
1433        assert!(result.is_err(), "Mixed pattern should be rejected");
1434    }
1435
1436    #[test]
1437    fn test_sanitize_pattern_security_bypass_attempts() {
1438        // Test patterns that might try to bypass security checks
1439
1440        // URL-encoded null byte
1441        let result = sanitize_pattern("file%00.txt");
1442        assert!(result.is_ok(), "URL encoding should not be decoded");
1443
1444        // Double-encoded traversal
1445        let result = sanitize_pattern("file%2e%2e/secret");
1446        assert!(result.is_ok(), "Double encoding should not be decoded");
1447
1448        // Unicode normalization attacks
1449        let result = sanitize_pattern("file\u{002e}\u{002e}/secret");
1450        assert!(result.is_err(), "Unicode dots should be treated as '..'");
1451
1452        // Null byte at end
1453        let result = sanitize_pattern("legitimate-pattern\0");
1454        assert!(result.is_err(), "Trailing null should be caught");
1455    }
1456
1457    // === ERROR HANDLING TESTS ===
1458
1459    #[test]
1460    fn test_error_handling_classification() {
1461        // Test that we correctly classify errors as critical vs non-critical
1462        use crate::utils::error::ContextCreatorError;
1463
1464        // Simulate critical errors
1465        let critical_errors = [
1466            ContextCreatorError::FileProcessingError {
1467                path: "test.txt".to_string(),
1468                error: "Permission denied".to_string(),
1469            },
1470            ContextCreatorError::InvalidConfiguration("Invalid pattern".to_string()),
1471        ];
1472
1473        // Check that permission denied is considered critical
1474        let error_string = critical_errors[0].to_string();
1475        assert!(error_string.contains("Permission denied"));
1476
1477        // Check that invalid configuration is considered critical
1478        let error_string = critical_errors[1].to_string();
1479        assert!(error_string.contains("Invalid"));
1480    }
1481
1482    #[test]
1483    fn test_pattern_sanitization_integration() {
1484        // Test that sanitization is actually called in the build_walker path
1485        use tempfile::TempDir;
1486
1487        let temp_dir = TempDir::new().unwrap();
1488        let root = temp_dir.path();
1489
1490        // Create WalkOptions with a pattern that should be sanitized
1491        let options = WalkOptions {
1492            max_file_size: Some(1024),
1493            follow_links: false,
1494            include_hidden: false,
1495            parallel: false,
1496            ignore_file: ".context-creator-ignore".to_string(),
1497            ignore_patterns: vec![],
1498            include_patterns: vec!["../../../etc/passwd".to_string()], // Should be rejected
1499            custom_priorities: vec![],
1500            filter_binary_files: false,
1501        };
1502
1503        // This should fail due to sanitization
1504        let result = build_walker(root, &options);
1505        assert!(
1506            result.is_err(),
1507            "Directory traversal pattern should be rejected by sanitization"
1508        );
1509
1510        if let Err(e) = result {
1511            let error_msg = e.to_string();
1512            assert!(error_msg.contains("Directory traversal") || error_msg.contains("Invalid"));
1513        }
1514    }
1515
1516    #[test]
1517    fn test_walk_options_filters_binary_files_with_prompt() {
1518        use crate::cli::Config;
1519
1520        let config = Config {
1521            prompt: Some("test prompt".to_string()),
1522            paths: Some(vec![PathBuf::from(".")]),
1523            llm_tool: crate::cli::LlmTool::Gemini,
1524            semantic_depth: 3,
1525            ..Default::default()
1526        };
1527
1528        let options = WalkOptions::from_config(&config).unwrap();
1529        assert!(options.filter_binary_files);
1530    }
1531
1532    #[test]
1533    fn test_walk_options_no_binary_filter_without_prompt() {
1534        use crate::cli::Config;
1535
1536        let config = Config {
1537            paths: Some(vec![PathBuf::from(".")]),
1538            llm_tool: crate::cli::LlmTool::Gemini,
1539            semantic_depth: 3,
1540            ..Default::default()
1541        };
1542
1543        let options = WalkOptions::from_config(&config).unwrap();
1544        assert!(!options.filter_binary_files);
1545    }
1546
1547    // === Binary File Filtering Tests (TDD - Red Phase) ===
1548
1549    #[test]
1550    fn test_filter_binary_files_when_enabled() {
1551        // Given: A directory with mixed file types
1552        let temp_dir = TempDir::new().unwrap();
1553        let root = temp_dir.path();
1554
1555        // Create test files
1556        File::create(root.join("image.jpg")).unwrap();
1557        File::create(root.join("video.mp4")).unwrap();
1558        File::create(root.join("main.rs")).unwrap();
1559        File::create(root.join("config.json")).unwrap();
1560
1561        // When: Walking with filter_binary_files = true
1562        let options = WalkOptions {
1563            filter_binary_files: true,
1564            ..Default::default()
1565        };
1566        let files = walk_directory(root, options).unwrap();
1567
1568        // Then: Only text files are returned
1569        assert_eq!(files.len(), 2);
1570        assert!(files
1571            .iter()
1572            .any(|f| f.relative_path == PathBuf::from("main.rs")));
1573        assert!(files
1574            .iter()
1575            .any(|f| f.relative_path == PathBuf::from("config.json")));
1576        assert!(!files
1577            .iter()
1578            .any(|f| f.relative_path == PathBuf::from("image.jpg")));
1579        assert!(!files
1580            .iter()
1581            .any(|f| f.relative_path == PathBuf::from("video.mp4")));
1582    }
1583
1584    #[test]
1585    fn test_no_filtering_when_disabled() {
1586        // Given: Same mixed directory
1587        let temp_dir = TempDir::new().unwrap();
1588        let root = temp_dir.path();
1589
1590        // Create test files
1591        File::create(root.join("image.jpg")).unwrap();
1592        File::create(root.join("video.mp4")).unwrap();
1593        File::create(root.join("main.rs")).unwrap();
1594        File::create(root.join("config.json")).unwrap();
1595
1596        // When: Walking with filter_binary_files = false
1597        let options = WalkOptions {
1598            filter_binary_files: false,
1599            ..Default::default()
1600        };
1601        let files = walk_directory(root, options).unwrap();
1602
1603        // Then: All files are returned
1604        assert_eq!(files.len(), 4);
1605        assert!(files
1606            .iter()
1607            .any(|f| f.relative_path == PathBuf::from("main.rs")));
1608        assert!(files
1609            .iter()
1610            .any(|f| f.relative_path == PathBuf::from("config.json")));
1611        assert!(files
1612            .iter()
1613            .any(|f| f.relative_path == PathBuf::from("image.jpg")));
1614        assert!(files
1615            .iter()
1616            .any(|f| f.relative_path == PathBuf::from("video.mp4")));
1617    }
1618
1619    #[test]
1620    fn test_edge_case_files_without_extensions() {
1621        // Given: Files without extensions and text files with misleading names
1622        let temp_dir = TempDir::new().unwrap();
1623        let root = temp_dir.path();
1624
1625        // Create edge case files
1626        File::create(root.join("README")).unwrap();
1627        File::create(root.join("LICENSE")).unwrap();
1628        File::create(root.join("Makefile")).unwrap();
1629        File::create(root.join("Dockerfile")).unwrap();
1630        File::create(root.join("binary.exe")).unwrap();
1631
1632        // When: Walking with filter_binary_files = true
1633        let options = WalkOptions {
1634            filter_binary_files: true,
1635            ..Default::default()
1636        };
1637        let files = walk_directory(root, options).unwrap();
1638
1639        // Then: Text files without extensions are kept, binaries are filtered
1640        assert_eq!(files.len(), 4);
1641        assert!(files
1642            .iter()
1643            .any(|f| f.relative_path == PathBuf::from("README")));
1644        assert!(files
1645            .iter()
1646            .any(|f| f.relative_path == PathBuf::from("LICENSE")));
1647        assert!(files
1648            .iter()
1649            .any(|f| f.relative_path == PathBuf::from("Makefile")));
1650        assert!(files
1651            .iter()
1652            .any(|f| f.relative_path == PathBuf::from("Dockerfile")));
1653        assert!(!files
1654            .iter()
1655            .any(|f| f.relative_path == PathBuf::from("binary.exe")));
1656    }
1657}
context_creator/core/walker.rs

context_creator/core/
walker.rs