1use crate::utils::error::ContextCreatorError;
4use crate::utils::file_ext::{is_binary_extension, FileType};
5use anyhow::Result;
6use glob::Pattern;
7use ignore::{Walk, WalkBuilder};
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use tracing::warn;
12
13#[derive(Debug, Clone)]
26pub struct CompiledPriority {
27 pub matcher: Pattern,
29 pub weight: f32,
31 pub original_pattern: String,
33}
34
35impl CompiledPriority {
36 pub fn new(pattern: &str, weight: f32) -> Result<Self, glob::PatternError> {
38 let matcher = Pattern::new(pattern)?;
39 Ok(Self {
40 matcher,
41 weight,
42 original_pattern: pattern.to_string(),
43 })
44 }
45
46 pub fn try_from_config_priority(
48 priority: &crate::config::Priority,
49 ) -> Result<Self, glob::PatternError> {
50 Self::new(&priority.pattern, priority.weight)
51 }
52}
53
54#[derive(Debug, Clone)]
56pub struct WalkOptions {
57 pub max_file_size: Option<usize>,
59 pub follow_links: bool,
61 pub include_hidden: bool,
63 pub parallel: bool,
65 pub ignore_file: String,
67 pub ignore_patterns: Vec<String>,
69 pub include_patterns: Vec<String>,
71 pub custom_priorities: Vec<CompiledPriority>,
73 pub filter_binary_files: bool,
75}
76
77impl WalkOptions {
78 pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
80 let mut custom_priorities = Vec::new();
82 for priority in &config.custom_priorities {
83 match CompiledPriority::try_from_config_priority(priority) {
84 Ok(compiled) => custom_priorities.push(compiled),
85 Err(e) => {
86 return Err(ContextCreatorError::ConfigError(format!(
87 "Invalid glob pattern '{}' in custom priorities: {e}",
88 priority.pattern
89 ))
90 .into());
91 }
92 }
93 }
94
95 let include_patterns = config
97 .get_include_patterns()
98 .into_iter()
99 .filter(|pattern| !pattern.trim().is_empty())
100 .collect();
101
102 let ignore_patterns = config
104 .get_ignore_patterns()
105 .into_iter()
106 .filter(|pattern| !pattern.trim().is_empty())
107 .collect();
108
109 Ok(WalkOptions {
110 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
112 include_hidden: false,
113 parallel: true,
114 ignore_file: ".context-creator-ignore".to_string(),
115 ignore_patterns,
116 include_patterns,
117 custom_priorities,
118 filter_binary_files: config.get_prompt().is_some(),
119 })
120 }
121}
122
123impl Default for WalkOptions {
124 fn default() -> Self {
125 WalkOptions {
126 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
128 include_hidden: false,
129 parallel: true,
130 ignore_file: ".context-creator-ignore".to_string(),
131 ignore_patterns: vec![],
132 include_patterns: vec![],
133 custom_priorities: vec![],
134 filter_binary_files: false,
135 }
136 }
137}
138
139#[derive(Debug, Clone)]
141pub struct FileInfo {
142 pub path: PathBuf,
144 pub relative_path: PathBuf,
146 pub size: u64,
148 pub file_type: FileType,
150 pub priority: f32,
152 pub imports: Vec<PathBuf>,
154 pub imported_by: Vec<PathBuf>,
156 pub function_calls: Vec<crate::core::semantic::analyzer::FunctionCall>,
158 pub type_references: Vec<crate::core::semantic::analyzer::TypeReference>,
160 pub exported_functions: Vec<crate::core::semantic::analyzer::FunctionDefinition>,
162}
163
164impl FileInfo {
165 pub fn file_type_display(&self) -> &'static str {
167 use crate::utils::file_ext::FileType;
168 match self.file_type {
169 FileType::Rust => "Rust",
170 FileType::Python => "Python",
171 FileType::JavaScript => "JavaScript",
172 FileType::TypeScript => "TypeScript",
173 FileType::Go => "Go",
174 FileType::Java => "Java",
175 FileType::Cpp => "C++",
176 FileType::C => "C",
177 FileType::CSharp => "C#",
178 FileType::Ruby => "Ruby",
179 FileType::Php => "PHP",
180 FileType::Swift => "Swift",
181 FileType::Kotlin => "Kotlin",
182 FileType::Scala => "Scala",
183 FileType::Haskell => "Haskell",
184 FileType::Dart => "Dart",
185 FileType::Lua => "Lua",
186 FileType::R => "R",
187 FileType::Julia => "Julia",
188 FileType::Elixir => "Elixir",
189 FileType::Elm => "Elm",
190 FileType::Markdown => "Markdown",
191 FileType::Json => "JSON",
192 FileType::Yaml => "YAML",
193 FileType::Toml => "TOML",
194 FileType::Xml => "XML",
195 FileType::Html => "HTML",
196 FileType::Css => "CSS",
197 FileType::Text => "Text",
198 FileType::Other => "Other",
199 }
200 }
201}
202
203pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
205 if !root.exists() {
206 return Err(ContextCreatorError::InvalidPath(format!(
207 "Path does not exist: {}",
208 root.display()
209 ))
210 .into());
211 }
212
213 if root.is_file() {
215 let metadata = root.metadata()?;
216 let file_type = FileType::from_path(root);
217 let relative_path = PathBuf::from(
218 root.file_name()
219 .ok_or_else(|| anyhow::anyhow!("Invalid file name"))?,
220 );
221 let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
222
223 let file_info = FileInfo {
224 path: root.to_path_buf(),
225 relative_path,
226 size: metadata.len(),
227 file_type,
228 priority,
229 imports: Vec::new(),
230 imported_by: Vec::new(),
231 function_calls: Vec::new(),
232 type_references: Vec::new(),
233 exported_functions: Vec::new(),
234 };
235 return Ok(vec![file_info]);
236 }
237
238 if !root.is_dir() {
239 return Err(ContextCreatorError::InvalidPath(format!(
240 "Path is neither a file nor a directory: {}",
241 root.display()
242 ))
243 .into());
244 }
245
246 let root = root.canonicalize()?;
247 let walker = build_walker(&root, &options)?;
248
249 if options.parallel {
250 walk_parallel(walker, &root, &options)
251 } else {
252 walk_sequential(walker, &root, &options)
253 }
254}
255
256pub fn sanitize_pattern(pattern: &str) -> Result<String> {
258 if pattern.len() > 1000 {
260 return Err(ContextCreatorError::InvalidConfiguration(
261 "Pattern too long (max 1000 characters)".to_string(),
262 )
263 .into());
264 }
265
266 if pattern.contains('\0')
268 || pattern.chars().any(|c| {
269 c.is_control() ||
270 c == '\u{2028}' || c == '\u{2029}' || c == '\u{FEFF}' })
274 {
275 return Err(ContextCreatorError::InvalidConfiguration(
276 "Pattern contains invalid characters (null bytes or control characters)".to_string(),
277 )
278 .into());
279 }
280
281 if pattern.starts_with('/') || pattern.starts_with('\\') {
283 return Err(ContextCreatorError::InvalidConfiguration(
284 "Absolute paths not allowed in patterns".to_string(),
285 )
286 .into());
287 }
288
289 if pattern.contains("..") {
291 return Err(ContextCreatorError::InvalidConfiguration(
292 "Directory traversal (..) not allowed in patterns".to_string(),
293 )
294 .into());
295 }
296
297 Ok(pattern.to_string())
298}
299
300fn build_walker(root: &Path, options: &WalkOptions) -> Result<Walk> {
302 let mut builder = WalkBuilder::new(root);
303
304 builder
306 .follow_links(options.follow_links)
307 .hidden(!options.include_hidden)
308 .git_ignore(true)
309 .git_global(true)
310 .git_exclude(true)
311 .ignore(true)
312 .parents(true)
313 .add_custom_ignore_filename(&options.ignore_file);
314
315 if !options.include_patterns.is_empty() || !options.ignore_patterns.is_empty() {
317 let mut override_builder = ignore::overrides::OverrideBuilder::new(root);
318
319 if options.include_patterns.is_empty() && !options.ignore_patterns.is_empty() {
321 override_builder.add("**/*").map_err(|e| {
323 ContextCreatorError::InvalidConfiguration(format!(
324 "Failed to add include-all pattern: {e}"
325 ))
326 })?;
327 }
328
329 for pattern in &options.include_patterns {
331 if !pattern.trim().is_empty() {
332 let sanitized_pattern = sanitize_pattern(pattern)?;
334
335 override_builder.add(&sanitized_pattern).map_err(|e| {
337 ContextCreatorError::InvalidConfiguration(format!(
338 "Invalid include pattern '{pattern}': {e}"
339 ))
340 })?;
341 }
342 }
343
344 for pattern in &options.ignore_patterns {
347 if !pattern.trim().is_empty() {
348 let sanitized_pattern = sanitize_pattern(pattern)?;
350
351 let ignore_pattern = format!("!{sanitized_pattern}");
353 override_builder.add(&ignore_pattern).map_err(|e| {
354 ContextCreatorError::InvalidConfiguration(format!(
355 "Invalid ignore pattern '{pattern}': {e}"
356 ))
357 })?;
358 }
359 }
360
361 let overrides = override_builder.build().map_err(|e| {
362 ContextCreatorError::InvalidConfiguration(format!(
363 "Failed to build pattern overrides: {e}"
364 ))
365 })?;
366
367 builder.overrides(overrides);
368 }
369
370 Ok(builder.build())
371}
372
373fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
375 let mut files = Vec::new();
376
377 for entry in walker {
378 let entry = entry?;
379 let path = entry.path();
380
381 if path.is_dir() {
383 continue;
384 }
385
386 if let Some(file_info) = process_file(path, root, options)? {
388 files.push(file_info);
389 }
390 }
391
392 Ok(files)
393}
394
395fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
397 use itertools::Itertools;
398
399 let root = Arc::new(root.to_path_buf());
400 let options = Arc::new(options.clone());
401
402 let entries: Vec<_> = walker
404 .filter_map(|e| e.ok())
405 .filter(|e| !e.path().is_dir())
406 .collect();
407
408 let results: Vec<Result<Option<FileInfo>, ContextCreatorError>> = entries
410 .into_par_iter()
411 .map(|entry| {
412 let path = entry.path();
413 match process_file(path, &root, &options) {
414 Ok(file_info) => Ok(file_info),
415 Err(e) => Err(ContextCreatorError::FileProcessingError {
416 path: path.display().to_string(),
417 error: e.to_string(),
418 }),
419 }
420 })
421 .collect();
422
423 let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition_result();
425
426 if !errors.is_empty() {
428 let critical_errors: Vec<_> = errors
429 .iter()
430 .filter(|e| {
431 e.to_string().contains("Permission denied") || e.to_string().contains("Invalid")
432 })
433 .collect();
434
435 if !critical_errors.is_empty() {
436 let error_summary: Vec<String> =
438 critical_errors.iter().map(|e| e.to_string()).collect();
439 return Err(anyhow::anyhow!(
440 "Critical file processing errors encountered: {}",
441 error_summary.join(", ")
442 ));
443 }
444
445 warn!("Warning: {} files could not be processed:", errors.len());
447 for error in &errors {
448 warn!(" {}", error);
449 }
450 }
451
452 let files: Vec<FileInfo> = successes.into_iter().flatten().collect();
454 Ok(files)
455}
456
457fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
459 let metadata = match std::fs::metadata(path) {
461 Ok(meta) => meta,
462 Err(_) => return Ok(None), };
464
465 let size = metadata.len();
466
467 if let Some(max_size) = options.max_file_size {
469 if size > max_size as u64 {
470 return Ok(None);
471 }
472 }
473
474 if options.filter_binary_files && is_binary_extension(path) {
476 return Ok(None);
477 }
478
479 let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
481
482 let file_type = FileType::from_path(path);
484
485 if options.filter_binary_files && file_type == FileType::Other {
487 return Ok(None);
488 }
489
490 let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
492
493 Ok(Some(FileInfo {
494 path: path.to_path_buf(),
495 relative_path,
496 size,
497 file_type,
498 priority,
499 imports: Vec::new(), imported_by: Vec::new(), function_calls: Vec::new(), type_references: Vec::new(), exported_functions: Vec::new(), }))
505}
506
507fn calculate_priority(
509 file_type: &FileType,
510 relative_path: &Path,
511 custom_priorities: &[CompiledPriority],
512) -> f32 {
513 let base_score = calculate_base_priority(file_type, relative_path);
515
516 for priority in custom_priorities {
518 if priority.matcher.matches_path(relative_path) {
519 return base_score + priority.weight;
520 }
521 }
522
523 base_score
525}
526
527fn calculate_base_priority(file_type: &FileType, relative_path: &Path) -> f32 {
529 let mut score: f32 = match file_type {
530 FileType::Rust => 1.0,
531 FileType::Python => 0.9,
532 FileType::JavaScript => 0.9,
533 FileType::TypeScript => 0.95,
534 FileType::Go => 0.9,
535 FileType::Java => 0.85,
536 FileType::Cpp => 0.85,
537 FileType::C => 0.8,
538 FileType::CSharp => 0.85,
539 FileType::Ruby => 0.8,
540 FileType::Php => 0.75,
541 FileType::Swift => 0.85,
542 FileType::Kotlin => 0.85,
543 FileType::Scala => 0.8,
544 FileType::Haskell => 0.75,
545 FileType::Dart => 0.85,
546 FileType::Lua => 0.7,
547 FileType::R => 0.75,
548 FileType::Julia => 0.8,
549 FileType::Elixir => 0.8,
550 FileType::Elm => 0.75,
551 FileType::Markdown => 0.6,
552 FileType::Json => 0.5,
553 FileType::Yaml => 0.5,
554 FileType::Toml => 0.5,
555 FileType::Xml => 0.4,
556 FileType::Html => 0.4,
557 FileType::Css => 0.4,
558 FileType::Text => 0.3,
559 FileType::Other => 0.2,
560 };
561
562 let path_str = relative_path.to_string_lossy().to_lowercase();
564 if path_str.contains("main") || path_str.contains("index") {
565 score *= 1.5;
566 }
567 if path_str.contains("lib") || path_str.contains("src") {
568 score *= 1.2;
569 }
570 if path_str.contains("test") || path_str.contains("spec") {
571 score *= 0.8;
572 }
573 if path_str.contains("example") || path_str.contains("sample") {
574 score *= 0.7;
575 }
576
577 if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
579 match file_type {
580 FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
581 _ => {}
582 }
583 }
584
585 score.min(2.0) }
587
588pub fn perform_semantic_analysis(
601 files: &mut [FileInfo],
602 config: &crate::cli::Config,
603 cache: &crate::core::cache::FileCache,
604) -> Result<()> {
605 crate::core::semantic_graph::perform_semantic_analysis_graph(files, config, cache)
607}
608
609#[allow(dead_code)]
611fn capitalize_first(s: &str) -> String {
612 let mut chars = s.chars();
613 match chars.next() {
614 None => String::new(),
615 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
616 }
617}
618
619#[cfg(test)]
620mod tests {
621 use super::*;
622 use std::fs::{self, File};
623 use tempfile::TempDir;
624
625 #[test]
626 fn test_walk_directory_basic() {
627 let temp_dir = TempDir::new().unwrap();
628 let root = temp_dir.path();
629
630 File::create(root.join("main.rs")).unwrap();
632 File::create(root.join("lib.rs")).unwrap();
633 fs::create_dir(root.join("src")).unwrap();
634 File::create(root.join("src/utils.rs")).unwrap();
635
636 let options = WalkOptions::default();
637 let files = walk_directory(root, options).unwrap();
638
639 assert_eq!(files.len(), 3);
640 assert!(files
641 .iter()
642 .any(|f| f.relative_path == PathBuf::from("main.rs")));
643 assert!(files
644 .iter()
645 .any(|f| f.relative_path == PathBuf::from("lib.rs")));
646 assert!(files
647 .iter()
648 .any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
649 }
650
651 #[test]
652 fn test_walk_with_contextignore() {
653 let temp_dir = TempDir::new().unwrap();
654 let root = temp_dir.path();
655
656 File::create(root.join("main.rs")).unwrap();
658 File::create(root.join("ignored.rs")).unwrap();
659
660 fs::write(root.join(".context-creator-ignore"), "ignored.rs").unwrap();
662
663 let options = WalkOptions::default();
664 let files = walk_directory(root, options).unwrap();
665
666 assert_eq!(files.len(), 1);
667 assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
668 }
669
670 #[test]
671 fn test_priority_calculation() {
672 let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
673 let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"), &[]);
674 let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"), &[]);
675
676 assert!(rust_priority > doc_priority);
677 assert!(rust_priority > test_priority);
678 }
679
680 #[test]
681 fn test_file_size_limit() {
682 let temp_dir = TempDir::new().unwrap();
683 let root = temp_dir.path();
684
685 let large_file = root.join("large.txt");
687 let data = vec![0u8; 1024 * 1024]; fs::write(&large_file, &data).unwrap();
689
690 File::create(root.join("small.txt")).unwrap();
692
693 let options = WalkOptions {
694 max_file_size: Some(512 * 1024), ..Default::default()
696 };
697
698 let files = walk_directory(root, options).unwrap();
699
700 assert_eq!(files.len(), 1);
701 assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
702 }
703
704 #[test]
705 fn test_walk_empty_directory() {
706 let temp_dir = TempDir::new().unwrap();
707 let root = temp_dir.path();
708
709 let options = WalkOptions::default();
710 let files = walk_directory(root, options).unwrap();
711
712 assert_eq!(files.len(), 0);
713 }
714
715 #[test]
716 fn test_walk_options_from_config() {
717 use crate::cli::Config;
718 use tempfile::TempDir;
719
720 let temp_dir = TempDir::new().unwrap();
721 let config = Config {
722 paths: Some(vec![temp_dir.path().to_path_buf()]),
723 ..Config::default()
724 };
725
726 let options = WalkOptions::from_config(&config).unwrap();
727
728 assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
729 assert!(!options.follow_links);
730 assert!(!options.include_hidden);
731 assert!(options.parallel);
732 assert_eq!(options.ignore_file, ".context-creator-ignore");
733 }
734
735 #[test]
736 fn test_walk_with_custom_options() {
737 let temp_dir = TempDir::new().unwrap();
738 let root = temp_dir.path();
739
740 File::create(root.join("main.rs")).unwrap();
742 File::create(root.join("test.rs")).unwrap();
743 File::create(root.join("readme.md")).unwrap();
744
745 let options = WalkOptions {
746 ignore_patterns: vec!["*.md".to_string()],
747 ..Default::default()
748 };
749
750 let files = walk_directory(root, options).unwrap();
751
752 assert!(files.len() >= 2);
754 assert!(files
755 .iter()
756 .any(|f| f.relative_path == PathBuf::from("main.rs")));
757 assert!(files
758 .iter()
759 .any(|f| f.relative_path == PathBuf::from("test.rs")));
760 }
761
762 #[test]
763 fn test_walk_with_include_patterns() {
764 let temp_dir = TempDir::new().unwrap();
765 let root = temp_dir.path();
766
767 File::create(root.join("main.rs")).unwrap();
769 File::create(root.join("lib.rs")).unwrap();
770 File::create(root.join("README.md")).unwrap();
771
772 let options = WalkOptions {
773 include_patterns: vec!["*.rs".to_string()],
774 ..Default::default()
775 };
776
777 let files = walk_directory(root, options).unwrap();
778
779 assert!(files.len() >= 2);
781 assert!(files
782 .iter()
783 .any(|f| f.relative_path == PathBuf::from("main.rs")));
784 assert!(files
785 .iter()
786 .any(|f| f.relative_path == PathBuf::from("lib.rs")));
787 }
788
789 #[test]
790 fn test_walk_subdirectories() {
791 let temp_dir = TempDir::new().unwrap();
792 let root = temp_dir.path();
793
794 fs::create_dir(root.join("src")).unwrap();
796 fs::create_dir(root.join("src").join("utils")).unwrap();
797 File::create(root.join("main.rs")).unwrap();
798 File::create(root.join("src").join("lib.rs")).unwrap();
799 File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
800
801 let options = WalkOptions::default();
802 let files = walk_directory(root, options).unwrap();
803
804 assert_eq!(files.len(), 3);
805 assert!(files
806 .iter()
807 .any(|f| f.relative_path == PathBuf::from("main.rs")));
808 assert!(files
809 .iter()
810 .any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
811 assert!(files
812 .iter()
813 .any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
814 }
815
816 #[test]
817 fn test_priority_edge_cases() {
818 let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"), &[]);
820 let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"), &[]);
821 let nested_main_priority =
822 calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
823
824 assert!(main_priority > lib_priority);
825 assert!(nested_main_priority > lib_priority);
826
827 let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"), &[]);
829 let nested_toml_priority =
830 calculate_priority(&FileType::Toml, Path::new("config/app.toml"), &[]);
831
832 assert!(toml_priority > nested_toml_priority);
833 }
834
835 #[test]
838 fn test_custom_priority_no_match_returns_base_priority() {
839 let custom_priorities = [CompiledPriority::new("docs/*.md", 5.0).unwrap()];
842
843 let priority = calculate_priority(
845 &FileType::Rust,
846 Path::new("src/main.rs"),
847 &custom_priorities,
848 );
849
850 let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
852 assert_eq!(priority, expected_base);
853 }
854
855 #[test]
856 fn test_custom_priority_single_match_adds_weight() {
857 let custom_priorities = [CompiledPriority::new("src/core/mod.rs", 10.0).unwrap()];
859
860 let priority = calculate_priority(
862 &FileType::Rust,
863 Path::new("src/core/mod.rs"),
864 &custom_priorities,
865 );
866
867 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &[]);
869 let expected = base_priority + 10.0;
870 assert_eq!(priority, expected);
871 }
872
873 #[test]
874 fn test_custom_priority_glob_pattern_match() {
875 let custom_priorities = [CompiledPriority::new("src/**/*.rs", 2.5).unwrap()];
877
878 let priority = calculate_priority(
880 &FileType::Rust,
881 Path::new("src/api/handlers.rs"),
882 &custom_priorities,
883 );
884
885 let base_priority =
887 calculate_priority(&FileType::Rust, Path::new("src/api/handlers.rs"), &[]);
888 let expected = base_priority + 2.5;
889 assert_eq!(priority, expected);
890 }
891
892 #[test]
893 fn test_custom_priority_negative_weight() {
894 let custom_priorities = [CompiledPriority::new("tests/*", -0.5).unwrap()];
896
897 let priority = calculate_priority(
899 &FileType::Rust,
900 Path::new("tests/test_utils.rs"),
901 &custom_priorities,
902 );
903
904 let base_priority =
906 calculate_priority(&FileType::Rust, Path::new("tests/test_utils.rs"), &[]);
907 let expected = base_priority - 0.5;
908 assert_eq!(priority, expected);
909 }
910
911 #[test]
912 fn test_custom_priority_first_match_wins() {
913 let custom_priorities = [
915 CompiledPriority::new("src/**/*.rs", 5.0).unwrap(),
916 CompiledPriority::new("src/main.rs", 100.0).unwrap(),
917 ];
918
919 let priority = calculate_priority(
921 &FileType::Rust,
922 Path::new("src/main.rs"),
923 &custom_priorities,
924 );
925
926 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
928 let expected = base_priority + 5.0;
929 assert_eq!(priority, expected);
930 }
931
932 #[test]
933 fn test_custom_priority_zero_weight() {
934 let custom_priorities = [CompiledPriority::new("*.rs", 0.0).unwrap()];
936
937 let priority = calculate_priority(
939 &FileType::Rust,
940 Path::new("src/main.rs"),
941 &custom_priorities,
942 );
943
944 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
946 assert_eq!(priority, base_priority);
947 }
948
949 #[test]
950 fn test_custom_priority_empty_list() {
951 let custom_priorities: &[CompiledPriority] = &[];
953
954 let priority =
956 calculate_priority(&FileType::Rust, Path::new("src/main.rs"), custom_priorities);
957
958 let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
960 assert_eq!(priority, expected_base);
961 }
962
963 #[test]
966 fn test_config_to_walker_data_flow() {
967 use crate::config::{ConfigFile, Priority};
968 use std::fs::{self, File};
969 use tempfile::TempDir;
970
971 let temp_dir = TempDir::new().unwrap();
973 let root = temp_dir.path();
974
975 File::create(root.join("high_priority.rs")).unwrap();
977 File::create(root.join("normal.txt")).unwrap();
978 fs::create_dir(root.join("logs")).unwrap();
979 File::create(root.join("logs/app.log")).unwrap();
980
981 let config_file = ConfigFile {
983 priorities: vec![
984 Priority {
985 pattern: "*.rs".to_string(),
986 weight: 10.0,
987 },
988 Priority {
989 pattern: "logs/*.log".to_string(),
990 weight: -5.0,
991 },
992 ],
993 ..Default::default()
994 };
995
996 let mut config = crate::cli::Config {
998 paths: Some(vec![root.to_path_buf()]),
999 semantic_depth: 3,
1000 ..Default::default()
1001 };
1002 config_file.apply_to_cli_config(&mut config);
1003
1004 let walk_options = WalkOptions::from_config(&config).unwrap();
1006
1007 let files = walk_directory(root, walk_options).unwrap();
1009
1010 let rs_file = files
1012 .iter()
1013 .find(|f| {
1014 f.relative_path
1015 .to_string_lossy()
1016 .contains("high_priority.rs")
1017 })
1018 .unwrap();
1019 let log_file = files
1020 .iter()
1021 .find(|f| f.relative_path.to_string_lossy().contains("app.log"))
1022 .unwrap();
1023 let txt_file = files
1024 .iter()
1025 .find(|f| f.relative_path.to_string_lossy().contains("normal.txt"))
1026 .unwrap();
1027
1028 let base_rs = calculate_base_priority(&rs_file.file_type, &rs_file.relative_path);
1030 let base_txt = calculate_base_priority(&txt_file.file_type, &txt_file.relative_path);
1031 let base_log = calculate_base_priority(&log_file.file_type, &log_file.relative_path);
1032
1033 assert_eq!(rs_file.priority, base_rs + 10.0);
1035
1036 assert_eq!(log_file.priority, base_log - 5.0);
1038
1039 assert_eq!(txt_file.priority, base_txt);
1041 }
1042
1043 #[test]
1044 fn test_invalid_glob_pattern_in_config() {
1045 use crate::config::{ConfigFile, Priority};
1046 use tempfile::TempDir;
1047
1048 let temp_dir = TempDir::new().unwrap();
1049
1050 let config_file = ConfigFile {
1052 priorities: vec![Priority {
1053 pattern: "[invalid_glob".to_string(),
1054 weight: 5.0,
1055 }],
1056 ..Default::default()
1057 };
1058
1059 let mut config = crate::cli::Config {
1060 paths: Some(vec![temp_dir.path().to_path_buf()]),
1061 semantic_depth: 3,
1062 ..Default::default()
1063 };
1064 config_file.apply_to_cli_config(&mut config);
1065
1066 let result = WalkOptions::from_config(&config);
1068 assert!(result.is_err());
1069
1070 let error_msg = result.unwrap_err().to_string();
1072 assert!(error_msg.contains("invalid_glob") || error_msg.contains("Invalid"));
1073 }
1074
1075 #[test]
1076 fn test_empty_custom_priorities_config() {
1077 use crate::config::ConfigFile;
1078 use tempfile::TempDir;
1079
1080 let temp_dir = TempDir::new().unwrap();
1081
1082 let config_file = ConfigFile {
1084 priorities: vec![], ..Default::default()
1086 };
1087
1088 let mut config = crate::cli::Config {
1089 paths: Some(vec![temp_dir.path().to_path_buf()]),
1090 semantic_depth: 3,
1091 ..Default::default()
1092 };
1093 config_file.apply_to_cli_config(&mut config);
1094
1095 let walk_options = WalkOptions::from_config(&config).unwrap();
1097
1098 assert!(walk_directory(temp_dir.path(), walk_options).is_ok());
1101 }
1102
1103 #[test]
1104 fn test_empty_pattern_in_config() {
1105 use crate::config::{ConfigFile, Priority};
1106 use tempfile::TempDir;
1107
1108 let temp_dir = TempDir::new().unwrap();
1109
1110 let config_file = ConfigFile {
1112 priorities: vec![Priority {
1113 pattern: "".to_string(),
1114 weight: 5.0,
1115 }],
1116 ..Default::default()
1117 };
1118
1119 let mut config = crate::cli::Config {
1120 paths: Some(vec![temp_dir.path().to_path_buf()]),
1121 semantic_depth: 3,
1122 ..Default::default()
1123 };
1124 config_file.apply_to_cli_config(&mut config);
1125
1126 let result = WalkOptions::from_config(&config);
1128 assert!(result.is_ok());
1129
1130 let walk_options = result.unwrap();
1132 assert_eq!(walk_options.custom_priorities.len(), 1);
1133 }
1134
1135 #[test]
1136 fn test_extreme_weights_in_config() {
1137 use crate::config::{ConfigFile, Priority};
1138 use tempfile::TempDir;
1139
1140 let temp_dir = TempDir::new().unwrap();
1141
1142 let config_file = ConfigFile {
1144 priorities: vec![
1145 Priority {
1146 pattern: "*.rs".to_string(),
1147 weight: f32::MAX,
1148 },
1149 Priority {
1150 pattern: "*.txt".to_string(),
1151 weight: f32::MIN,
1152 },
1153 Priority {
1154 pattern: "*.md".to_string(),
1155 weight: f32::INFINITY,
1156 },
1157 Priority {
1158 pattern: "*.log".to_string(),
1159 weight: f32::NEG_INFINITY,
1160 },
1161 ],
1162 ..Default::default()
1163 };
1164
1165 let mut config = crate::cli::Config {
1166 paths: Some(vec![temp_dir.path().to_path_buf()]),
1167 semantic_depth: 3,
1168 ..Default::default()
1169 };
1170 config_file.apply_to_cli_config(&mut config);
1171
1172 let result = WalkOptions::from_config(&config);
1174 assert!(result.is_ok());
1175
1176 let walk_options = result.unwrap();
1177 assert_eq!(walk_options.custom_priorities.len(), 4);
1178 }
1179
1180 #[test]
1181 fn test_file_info_file_type_display() {
1182 let file_info = FileInfo {
1183 path: PathBuf::from("test.rs"),
1184 relative_path: PathBuf::from("test.rs"),
1185 size: 1000,
1186 file_type: FileType::Rust,
1187 priority: 1.0,
1188 imports: Vec::new(),
1189 imported_by: Vec::new(),
1190 function_calls: Vec::new(),
1191 type_references: Vec::new(),
1192 exported_functions: Vec::new(),
1193 };
1194
1195 assert_eq!(file_info.file_type_display(), "Rust");
1196
1197 let file_info_md = FileInfo {
1198 path: PathBuf::from("README.md"),
1199 relative_path: PathBuf::from("README.md"),
1200 size: 500,
1201 file_type: FileType::Markdown,
1202 priority: 0.6,
1203 imports: Vec::new(),
1204 imported_by: Vec::new(),
1205 function_calls: Vec::new(),
1206 type_references: Vec::new(),
1207 exported_functions: Vec::new(),
1208 };
1209
1210 assert_eq!(file_info_md.file_type_display(), "Markdown");
1211 }
1212
1213 #[test]
1216 fn test_walk_options_from_config_with_include_patterns() {
1217 let config = crate::cli::Config {
1219 include: Some(vec!["**/*.rs".to_string(), "**/test[0-9].py".to_string()]),
1220 semantic_depth: 3,
1221 ..Default::default()
1222 };
1223
1224 let options = WalkOptions::from_config(&config).unwrap();
1225
1226 assert_eq!(options.include_patterns, vec!["**/*.rs", "**/test[0-9].py"]);
1228 }
1229
1230 #[test]
1231 fn test_walk_options_from_config_empty_include_patterns() {
1232 let config = crate::cli::Config {
1234 semantic_depth: 3,
1235 ..Default::default()
1236 };
1237
1238 let options = WalkOptions::from_config(&config).unwrap();
1239 assert_eq!(options.include_patterns, Vec::<String>::new());
1240 }
1241
1242 #[test]
1243 fn test_walk_options_filters_empty_patterns() {
1244 let config = crate::cli::Config {
1246 include: Some(vec![
1247 "**/*.rs".to_string(),
1248 "".to_string(),
1249 " ".to_string(),
1250 "*.py".to_string(),
1251 ]),
1252 semantic_depth: 3,
1253 ..Default::default()
1254 };
1255
1256 let options = WalkOptions::from_config(&config).unwrap();
1257
1258 assert_eq!(options.include_patterns, vec!["**/*.rs", "*.py"]);
1260 }
1261
1262 #[test]
1265 fn test_sanitize_pattern_valid_patterns() {
1266 let valid_patterns = vec![
1268 "*.py",
1269 "**/*.rs",
1270 "src/**/*.{js,ts}",
1271 "test[0-9].py",
1272 "**/*{model,service}*.py",
1273 "**/db/**",
1274 "some-file.txt",
1275 "dir/subdir/*.md",
1276 ];
1277
1278 for pattern in valid_patterns {
1279 let result = sanitize_pattern(pattern);
1280 assert!(result.is_ok(), "Pattern '{pattern}' should be valid");
1281 assert_eq!(result.unwrap(), pattern);
1282 }
1283 }
1284
1285 #[test]
1286 fn test_sanitize_pattern_length_limit() {
1287 let short_pattern = "a".repeat(999);
1289 let exact_limit = "a".repeat(1000);
1290 let too_long = "a".repeat(1001);
1291
1292 assert!(sanitize_pattern(&short_pattern).is_ok());
1293 assert!(sanitize_pattern(&exact_limit).is_ok());
1294
1295 let result = sanitize_pattern(&too_long);
1296 assert!(result.is_err());
1297 assert!(result.unwrap_err().to_string().contains("Pattern too long"));
1298 }
1299
1300 #[test]
1301 fn test_sanitize_pattern_null_bytes() {
1302 let patterns_with_nulls = vec!["test\0.py", "\0*.rs", "**/*.js\0", "dir/\0file.txt"];
1304
1305 for pattern in patterns_with_nulls {
1306 let result = sanitize_pattern(pattern);
1307 assert!(
1308 result.is_err(),
1309 "Pattern with null byte should be rejected: {pattern:?}"
1310 );
1311 assert!(result
1312 .unwrap_err()
1313 .to_string()
1314 .contains("invalid characters"));
1315 }
1316 }
1317
1318 #[test]
1319 fn test_sanitize_pattern_control_characters() {
1320 let control_chars = vec![
1322 "test\x01.py", "file\x08.txt", "dir\x0c/*.rs", "test\x1f.md", "*.py\x7f", ];
1328
1329 for pattern in control_chars {
1330 let result = sanitize_pattern(pattern);
1331 assert!(
1332 result.is_err(),
1333 "Pattern with control char should be rejected: {pattern:?}"
1334 );
1335 assert!(result
1336 .unwrap_err()
1337 .to_string()
1338 .contains("invalid characters"));
1339 }
1340 }
1341
1342 #[test]
1343 fn test_sanitize_pattern_absolute_paths() {
1344 let absolute_paths = vec![
1346 "/etc/passwd",
1347 "/usr/bin/*.sh",
1348 "/home/user/file.txt",
1349 "\\Windows\\System32\\*.dll", "\\Program Files\\*",
1351 ];
1352
1353 for pattern in absolute_paths {
1354 let result = sanitize_pattern(pattern);
1355 assert!(
1356 result.is_err(),
1357 "Absolute path should be rejected: {pattern}"
1358 );
1359 assert!(result
1360 .unwrap_err()
1361 .to_string()
1362 .contains("Absolute paths not allowed"));
1363 }
1364 }
1365
1366 #[test]
1367 fn test_sanitize_pattern_directory_traversal() {
1368 let traversal_patterns = vec![
1370 "../../../etc/passwd",
1371 "dir/../../../file.txt",
1372 "**/../secret/*",
1373 "test/../../*.py",
1374 "../config.toml",
1375 "subdir/../../other.rs",
1376 ];
1377
1378 for pattern in traversal_patterns {
1379 let result = sanitize_pattern(pattern);
1380 assert!(
1381 result.is_err(),
1382 "Directory traversal should be rejected: {pattern}"
1383 );
1384 assert!(result
1385 .unwrap_err()
1386 .to_string()
1387 .contains("Directory traversal"));
1388 }
1389 }
1390
1391 #[test]
1392 fn test_sanitize_pattern_edge_cases() {
1393 let result = sanitize_pattern("");
1397 assert!(result.is_ok(), "Empty string should be allowed");
1398
1399 let result = sanitize_pattern(" ");
1401 assert!(result.is_ok(), "Whitespace-only should be allowed");
1402
1403 let result = sanitize_pattern("файл*.txt");
1405 assert!(result.is_ok(), "Unicode should be allowed");
1406
1407 let result = sanitize_pattern("file[!abc]*.{py,rs}");
1409 assert!(result.is_ok(), "Complex glob patterns should be allowed");
1410
1411 let result = sanitize_pattern("file\nname.txt");
1413 assert!(result.is_err(), "Newlines should be rejected");
1414
1415 let result = sanitize_pattern("file\tname.txt");
1416 assert!(result.is_err(), "Tabs should be rejected");
1417 }
1418
1419 #[test]
1420 fn test_sanitize_pattern_boundary_conditions() {
1421 let result = sanitize_pattern("file..name.txt");
1425 assert!(result.is_err(), "Any '..' should be rejected for safety");
1426
1427 let result = sanitize_pattern("**/*.py");
1429 assert!(result.is_ok(), "Recursive glob should be allowed");
1430
1431 let result = sanitize_pattern("valid/*.py/../invalid");
1433 assert!(result.is_err(), "Mixed pattern should be rejected");
1434 }
1435
1436 #[test]
1437 fn test_sanitize_pattern_security_bypass_attempts() {
1438 let result = sanitize_pattern("file%00.txt");
1442 assert!(result.is_ok(), "URL encoding should not be decoded");
1443
1444 let result = sanitize_pattern("file%2e%2e/secret");
1446 assert!(result.is_ok(), "Double encoding should not be decoded");
1447
1448 let result = sanitize_pattern("file\u{002e}\u{002e}/secret");
1450 assert!(result.is_err(), "Unicode dots should be treated as '..'");
1451
1452 let result = sanitize_pattern("legitimate-pattern\0");
1454 assert!(result.is_err(), "Trailing null should be caught");
1455 }
1456
1457 #[test]
1460 fn test_error_handling_classification() {
1461 use crate::utils::error::ContextCreatorError;
1463
1464 let critical_errors = [
1466 ContextCreatorError::FileProcessingError {
1467 path: "test.txt".to_string(),
1468 error: "Permission denied".to_string(),
1469 },
1470 ContextCreatorError::InvalidConfiguration("Invalid pattern".to_string()),
1471 ];
1472
1473 let error_string = critical_errors[0].to_string();
1475 assert!(error_string.contains("Permission denied"));
1476
1477 let error_string = critical_errors[1].to_string();
1479 assert!(error_string.contains("Invalid"));
1480 }
1481
1482 #[test]
1483 fn test_pattern_sanitization_integration() {
1484 use tempfile::TempDir;
1486
1487 let temp_dir = TempDir::new().unwrap();
1488 let root = temp_dir.path();
1489
1490 let options = WalkOptions {
1492 max_file_size: Some(1024),
1493 follow_links: false,
1494 include_hidden: false,
1495 parallel: false,
1496 ignore_file: ".context-creator-ignore".to_string(),
1497 ignore_patterns: vec![],
1498 include_patterns: vec!["../../../etc/passwd".to_string()], custom_priorities: vec![],
1500 filter_binary_files: false,
1501 };
1502
1503 let result = build_walker(root, &options);
1505 assert!(
1506 result.is_err(),
1507 "Directory traversal pattern should be rejected by sanitization"
1508 );
1509
1510 if let Err(e) = result {
1511 let error_msg = e.to_string();
1512 assert!(error_msg.contains("Directory traversal") || error_msg.contains("Invalid"));
1513 }
1514 }
1515
1516 #[test]
1517 fn test_walk_options_filters_binary_files_with_prompt() {
1518 use crate::cli::Config;
1519
1520 let config = Config {
1521 prompt: Some("test prompt".to_string()),
1522 paths: Some(vec![PathBuf::from(".")]),
1523 llm_tool: crate::cli::LlmTool::Gemini,
1524 semantic_depth: 3,
1525 ..Default::default()
1526 };
1527
1528 let options = WalkOptions::from_config(&config).unwrap();
1529 assert!(options.filter_binary_files);
1530 }
1531
1532 #[test]
1533 fn test_walk_options_no_binary_filter_without_prompt() {
1534 use crate::cli::Config;
1535
1536 let config = Config {
1537 paths: Some(vec![PathBuf::from(".")]),
1538 llm_tool: crate::cli::LlmTool::Gemini,
1539 semantic_depth: 3,
1540 ..Default::default()
1541 };
1542
1543 let options = WalkOptions::from_config(&config).unwrap();
1544 assert!(!options.filter_binary_files);
1545 }
1546
1547 #[test]
1550 fn test_filter_binary_files_when_enabled() {
1551 let temp_dir = TempDir::new().unwrap();
1553 let root = temp_dir.path();
1554
1555 File::create(root.join("image.jpg")).unwrap();
1557 File::create(root.join("video.mp4")).unwrap();
1558 File::create(root.join("main.rs")).unwrap();
1559 File::create(root.join("config.json")).unwrap();
1560
1561 let options = WalkOptions {
1563 filter_binary_files: true,
1564 ..Default::default()
1565 };
1566 let files = walk_directory(root, options).unwrap();
1567
1568 assert_eq!(files.len(), 2);
1570 assert!(files
1571 .iter()
1572 .any(|f| f.relative_path == PathBuf::from("main.rs")));
1573 assert!(files
1574 .iter()
1575 .any(|f| f.relative_path == PathBuf::from("config.json")));
1576 assert!(!files
1577 .iter()
1578 .any(|f| f.relative_path == PathBuf::from("image.jpg")));
1579 assert!(!files
1580 .iter()
1581 .any(|f| f.relative_path == PathBuf::from("video.mp4")));
1582 }
1583
1584 #[test]
1585 fn test_no_filtering_when_disabled() {
1586 let temp_dir = TempDir::new().unwrap();
1588 let root = temp_dir.path();
1589
1590 File::create(root.join("image.jpg")).unwrap();
1592 File::create(root.join("video.mp4")).unwrap();
1593 File::create(root.join("main.rs")).unwrap();
1594 File::create(root.join("config.json")).unwrap();
1595
1596 let options = WalkOptions {
1598 filter_binary_files: false,
1599 ..Default::default()
1600 };
1601 let files = walk_directory(root, options).unwrap();
1602
1603 assert_eq!(files.len(), 4);
1605 assert!(files
1606 .iter()
1607 .any(|f| f.relative_path == PathBuf::from("main.rs")));
1608 assert!(files
1609 .iter()
1610 .any(|f| f.relative_path == PathBuf::from("config.json")));
1611 assert!(files
1612 .iter()
1613 .any(|f| f.relative_path == PathBuf::from("image.jpg")));
1614 assert!(files
1615 .iter()
1616 .any(|f| f.relative_path == PathBuf::from("video.mp4")));
1617 }
1618
1619 #[test]
1620 fn test_edge_case_files_without_extensions() {
1621 let temp_dir = TempDir::new().unwrap();
1623 let root = temp_dir.path();
1624
1625 File::create(root.join("README")).unwrap();
1627 File::create(root.join("LICENSE")).unwrap();
1628 File::create(root.join("Makefile")).unwrap();
1629 File::create(root.join("Dockerfile")).unwrap();
1630 File::create(root.join("binary.exe")).unwrap();
1631
1632 let options = WalkOptions {
1634 filter_binary_files: true,
1635 ..Default::default()
1636 };
1637 let files = walk_directory(root, options).unwrap();
1638
1639 assert_eq!(files.len(), 4);
1641 assert!(files
1642 .iter()
1643 .any(|f| f.relative_path == PathBuf::from("README")));
1644 assert!(files
1645 .iter()
1646 .any(|f| f.relative_path == PathBuf::from("LICENSE")));
1647 assert!(files
1648 .iter()
1649 .any(|f| f.relative_path == PathBuf::from("Makefile")));
1650 assert!(files
1651 .iter()
1652 .any(|f| f.relative_path == PathBuf::from("Dockerfile")));
1653 assert!(!files
1654 .iter()
1655 .any(|f| f.relative_path == PathBuf::from("binary.exe")));
1656 }
1657}