1use crate::utils::error::ContextCreatorError;
4use crate::utils::file_ext::FileType;
5use anyhow::Result;
6use glob::Pattern;
7use ignore::{Walk, WalkBuilder};
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12#[derive(Debug, Clone)]
25pub struct CompiledPriority {
26 pub matcher: Pattern,
28 pub weight: f32,
30 pub original_pattern: String,
32}
33
34impl CompiledPriority {
35 pub fn new(pattern: &str, weight: f32) -> Result<Self, glob::PatternError> {
37 let matcher = Pattern::new(pattern)?;
38 Ok(Self {
39 matcher,
40 weight,
41 original_pattern: pattern.to_string(),
42 })
43 }
44
45 pub fn try_from_config_priority(
47 priority: &crate::config::Priority,
48 ) -> Result<Self, glob::PatternError> {
49 Self::new(&priority.pattern, priority.weight)
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct WalkOptions {
56 pub max_file_size: Option<usize>,
58 pub follow_links: bool,
60 pub include_hidden: bool,
62 pub parallel: bool,
64 pub ignore_file: String,
66 pub ignore_patterns: Vec<String>,
68 pub include_patterns: Vec<String>,
70 pub custom_priorities: Vec<CompiledPriority>,
72 pub filter_binary_files: bool,
74}
75
76impl WalkOptions {
77 pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
79 let mut custom_priorities = Vec::new();
81 for priority in &config.custom_priorities {
82 match CompiledPriority::try_from_config_priority(priority) {
83 Ok(compiled) => custom_priorities.push(compiled),
84 Err(e) => {
85 return Err(ContextCreatorError::ConfigError(format!(
86 "Invalid glob pattern '{}' in custom priorities: {e}",
87 priority.pattern
88 ))
89 .into());
90 }
91 }
92 }
93
94 let include_patterns = config
96 .get_include_patterns()
97 .into_iter()
98 .filter(|pattern| !pattern.trim().is_empty())
99 .collect();
100
101 let ignore_patterns = config
103 .get_ignore_patterns()
104 .into_iter()
105 .filter(|pattern| !pattern.trim().is_empty())
106 .collect();
107
108 Ok(WalkOptions {
109 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
111 include_hidden: false,
112 parallel: true,
113 ignore_file: ".context-creator-ignore".to_string(),
114 ignore_patterns,
115 include_patterns,
116 custom_priorities,
117 filter_binary_files: config.get_prompt().is_some(),
118 })
119 }
120}
121
122impl Default for WalkOptions {
123 fn default() -> Self {
124 WalkOptions {
125 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
127 include_hidden: false,
128 parallel: true,
129 ignore_file: ".context-creator-ignore".to_string(),
130 ignore_patterns: vec![],
131 include_patterns: vec![],
132 custom_priorities: vec![],
133 filter_binary_files: false,
134 }
135 }
136}
137
138#[derive(Debug, Clone)]
140pub struct FileInfo {
141 pub path: PathBuf,
143 pub relative_path: PathBuf,
145 pub size: u64,
147 pub file_type: FileType,
149 pub priority: f32,
151 pub imports: Vec<PathBuf>,
153 pub imported_by: Vec<PathBuf>,
155 pub function_calls: Vec<crate::core::semantic::analyzer::FunctionCall>,
157 pub type_references: Vec<crate::core::semantic::analyzer::TypeReference>,
159}
160
161impl FileInfo {
162 pub fn file_type_display(&self) -> &'static str {
164 use crate::utils::file_ext::FileType;
165 match self.file_type {
166 FileType::Rust => "Rust",
167 FileType::Python => "Python",
168 FileType::JavaScript => "JavaScript",
169 FileType::TypeScript => "TypeScript",
170 FileType::Go => "Go",
171 FileType::Java => "Java",
172 FileType::Cpp => "C++",
173 FileType::C => "C",
174 FileType::CSharp => "C#",
175 FileType::Ruby => "Ruby",
176 FileType::Php => "PHP",
177 FileType::Swift => "Swift",
178 FileType::Kotlin => "Kotlin",
179 FileType::Scala => "Scala",
180 FileType::Haskell => "Haskell",
181 FileType::Dart => "Dart",
182 FileType::Lua => "Lua",
183 FileType::R => "R",
184 FileType::Julia => "Julia",
185 FileType::Elixir => "Elixir",
186 FileType::Elm => "Elm",
187 FileType::Markdown => "Markdown",
188 FileType::Json => "JSON",
189 FileType::Yaml => "YAML",
190 FileType::Toml => "TOML",
191 FileType::Xml => "XML",
192 FileType::Html => "HTML",
193 FileType::Css => "CSS",
194 FileType::Text => "Text",
195 FileType::Other => "Other",
196 }
197 }
198}
199
200pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
202 if !root.exists() {
203 return Err(ContextCreatorError::InvalidPath(format!(
204 "Directory does not exist: {}",
205 root.display()
206 ))
207 .into());
208 }
209
210 if !root.is_dir() {
211 return Err(ContextCreatorError::InvalidPath(format!(
212 "Path is not a directory: {}",
213 root.display()
214 ))
215 .into());
216 }
217
218 let root = root.canonicalize()?;
219 let walker = build_walker(&root, &options)?;
220
221 if options.parallel {
222 walk_parallel(walker, &root, &options)
223 } else {
224 walk_sequential(walker, &root, &options)
225 }
226}
227
228pub fn sanitize_pattern(pattern: &str) -> Result<String> {
230 if pattern.len() > 1000 {
232 return Err(ContextCreatorError::InvalidConfiguration(
233 "Pattern too long (max 1000 characters)".to_string(),
234 )
235 .into());
236 }
237
238 if pattern.contains('\0')
240 || pattern.chars().any(|c| {
241 c.is_control() ||
242 c == '\u{2028}' || c == '\u{2029}' || c == '\u{FEFF}' })
246 {
247 return Err(ContextCreatorError::InvalidConfiguration(
248 "Pattern contains invalid characters (null bytes or control characters)".to_string(),
249 )
250 .into());
251 }
252
253 if pattern.starts_with('/') || pattern.starts_with('\\') {
255 return Err(ContextCreatorError::InvalidConfiguration(
256 "Absolute paths not allowed in patterns".to_string(),
257 )
258 .into());
259 }
260
261 if pattern.contains("..") {
263 return Err(ContextCreatorError::InvalidConfiguration(
264 "Directory traversal (..) not allowed in patterns".to_string(),
265 )
266 .into());
267 }
268
269 Ok(pattern.to_string())
270}
271
272fn build_walker(root: &Path, options: &WalkOptions) -> Result<Walk> {
274 let mut builder = WalkBuilder::new(root);
275
276 builder
278 .follow_links(options.follow_links)
279 .hidden(!options.include_hidden)
280 .git_ignore(true)
281 .git_global(true)
282 .git_exclude(true)
283 .ignore(true)
284 .parents(true)
285 .add_custom_ignore_filename(&options.ignore_file);
286
287 for pattern in &options.ignore_patterns {
289 if !pattern.trim().is_empty() {
290 let sanitized_pattern = sanitize_pattern(pattern)?;
292
293 if builder.add_ignore(&sanitized_pattern).is_none() {
295 return Err(ContextCreatorError::InvalidConfiguration(format!(
296 "Invalid ignore pattern '{pattern}': pattern could not be added"
297 ))
298 .into());
299 }
300 }
301 }
302
303 if !options.include_patterns.is_empty() {
305 let mut override_builder = ignore::overrides::OverrideBuilder::new(root);
306
307 for pattern in &options.include_patterns {
308 if !pattern.trim().is_empty() {
309 let sanitized_pattern = sanitize_pattern(pattern)?;
311
312 override_builder.add(&sanitized_pattern).map_err(|e| {
314 ContextCreatorError::InvalidConfiguration(format!(
315 "Invalid include pattern '{pattern}': {e}"
316 ))
317 })?;
318 }
319 }
320
321 let overrides = override_builder.build().map_err(|e| {
322 ContextCreatorError::InvalidConfiguration(format!(
323 "Failed to build include pattern overrides: {e}"
324 ))
325 })?;
326
327 builder.overrides(overrides);
328 }
329
330 Ok(builder.build())
331}
332
333fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
335 let mut files = Vec::new();
336
337 for entry in walker {
338 let entry = entry?;
339 let path = entry.path();
340
341 if path.is_dir() {
343 continue;
344 }
345
346 if let Some(file_info) = process_file(path, root, options)? {
348 files.push(file_info);
349 }
350 }
351
352 Ok(files)
353}
354
355fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
357 use itertools::Itertools;
358
359 let root = Arc::new(root.to_path_buf());
360 let options = Arc::new(options.clone());
361
362 let entries: Vec<_> = walker
364 .filter_map(|e| e.ok())
365 .filter(|e| !e.path().is_dir())
366 .collect();
367
368 let results: Vec<Result<Option<FileInfo>, ContextCreatorError>> = entries
370 .into_par_iter()
371 .map(|entry| {
372 let path = entry.path();
373 match process_file(path, &root, &options) {
374 Ok(file_info) => Ok(file_info),
375 Err(e) => Err(ContextCreatorError::FileProcessingError {
376 path: path.display().to_string(),
377 error: e.to_string(),
378 }),
379 }
380 })
381 .collect();
382
383 let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition_result();
385
386 if !errors.is_empty() {
388 let critical_errors: Vec<_> = errors
389 .iter()
390 .filter(|e| {
391 e.to_string().contains("Permission denied") || e.to_string().contains("Invalid")
392 })
393 .collect();
394
395 if !critical_errors.is_empty() {
396 let error_summary: Vec<String> =
398 critical_errors.iter().map(|e| e.to_string()).collect();
399 return Err(anyhow::anyhow!(
400 "Critical file processing errors encountered: {}",
401 error_summary.join(", ")
402 ));
403 }
404
405 eprintln!("Warning: {} files could not be processed:", errors.len());
407 for error in &errors {
408 eprintln!(" {error}");
409 }
410 }
411
412 let files: Vec<FileInfo> = successes.into_iter().flatten().collect();
414 Ok(files)
415}
416
417fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
419 let metadata = match std::fs::metadata(path) {
421 Ok(meta) => meta,
422 Err(_) => return Ok(None), };
424
425 let size = metadata.len();
426
427 if let Some(max_size) = options.max_file_size {
429 if size > max_size as u64 {
430 return Ok(None);
431 }
432 }
433
434 let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
436
437 let file_type = FileType::from_path(path);
439
440 let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
442
443 Ok(Some(FileInfo {
444 path: path.to_path_buf(),
445 relative_path,
446 size,
447 file_type,
448 priority,
449 imports: Vec::new(), imported_by: Vec::new(), function_calls: Vec::new(), type_references: Vec::new(), }))
454}
455
456fn calculate_priority(
458 file_type: &FileType,
459 relative_path: &Path,
460 custom_priorities: &[CompiledPriority],
461) -> f32 {
462 let base_score = calculate_base_priority(file_type, relative_path);
464
465 for priority in custom_priorities {
467 if priority.matcher.matches_path(relative_path) {
468 return base_score + priority.weight;
469 }
470 }
471
472 base_score
474}
475
476fn calculate_base_priority(file_type: &FileType, relative_path: &Path) -> f32 {
478 let mut score: f32 = match file_type {
479 FileType::Rust => 1.0,
480 FileType::Python => 0.9,
481 FileType::JavaScript => 0.9,
482 FileType::TypeScript => 0.95,
483 FileType::Go => 0.9,
484 FileType::Java => 0.85,
485 FileType::Cpp => 0.85,
486 FileType::C => 0.8,
487 FileType::CSharp => 0.85,
488 FileType::Ruby => 0.8,
489 FileType::Php => 0.75,
490 FileType::Swift => 0.85,
491 FileType::Kotlin => 0.85,
492 FileType::Scala => 0.8,
493 FileType::Haskell => 0.75,
494 FileType::Dart => 0.85,
495 FileType::Lua => 0.7,
496 FileType::R => 0.75,
497 FileType::Julia => 0.8,
498 FileType::Elixir => 0.8,
499 FileType::Elm => 0.75,
500 FileType::Markdown => 0.6,
501 FileType::Json => 0.5,
502 FileType::Yaml => 0.5,
503 FileType::Toml => 0.5,
504 FileType::Xml => 0.4,
505 FileType::Html => 0.4,
506 FileType::Css => 0.4,
507 FileType::Text => 0.3,
508 FileType::Other => 0.2,
509 };
510
511 let path_str = relative_path.to_string_lossy().to_lowercase();
513 if path_str.contains("main") || path_str.contains("index") {
514 score *= 1.5;
515 }
516 if path_str.contains("lib") || path_str.contains("src") {
517 score *= 1.2;
518 }
519 if path_str.contains("test") || path_str.contains("spec") {
520 score *= 0.8;
521 }
522 if path_str.contains("example") || path_str.contains("sample") {
523 score *= 0.7;
524 }
525
526 if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
528 match file_type {
529 FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
530 _ => {}
531 }
532 }
533
534 score.min(2.0) }
536
537pub fn perform_semantic_analysis(
550 files: &mut [FileInfo],
551 config: &crate::cli::Config,
552 cache: &crate::core::cache::FileCache,
553) -> Result<()> {
554 crate::core::semantic_graph::perform_semantic_analysis_graph(files, config, cache)
556}
557
558#[allow(dead_code)]
560fn capitalize_first(s: &str) -> String {
561 let mut chars = s.chars();
562 match chars.next() {
563 None => String::new(),
564 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
565 }
566}
567
568#[cfg(test)]
569mod tests {
570 use super::*;
571 use std::fs::{self, File};
572 use tempfile::TempDir;
573
574 #[test]
575 fn test_walk_directory_basic() {
576 let temp_dir = TempDir::new().unwrap();
577 let root = temp_dir.path();
578
579 File::create(root.join("main.rs")).unwrap();
581 File::create(root.join("lib.rs")).unwrap();
582 fs::create_dir(root.join("src")).unwrap();
583 File::create(root.join("src/utils.rs")).unwrap();
584
585 let options = WalkOptions::default();
586 let files = walk_directory(root, options).unwrap();
587
588 assert_eq!(files.len(), 3);
589 assert!(files
590 .iter()
591 .any(|f| f.relative_path == PathBuf::from("main.rs")));
592 assert!(files
593 .iter()
594 .any(|f| f.relative_path == PathBuf::from("lib.rs")));
595 assert!(files
596 .iter()
597 .any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
598 }
599
600 #[test]
601 fn test_walk_with_contextignore() {
602 let temp_dir = TempDir::new().unwrap();
603 let root = temp_dir.path();
604
605 File::create(root.join("main.rs")).unwrap();
607 File::create(root.join("ignored.rs")).unwrap();
608
609 fs::write(root.join(".context-creator-ignore"), "ignored.rs").unwrap();
611
612 let options = WalkOptions::default();
613 let files = walk_directory(root, options).unwrap();
614
615 assert_eq!(files.len(), 1);
616 assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
617 }
618
619 #[test]
620 fn test_priority_calculation() {
621 let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
622 let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"), &[]);
623 let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"), &[]);
624
625 assert!(rust_priority > doc_priority);
626 assert!(rust_priority > test_priority);
627 }
628
629 #[test]
630 fn test_file_size_limit() {
631 let temp_dir = TempDir::new().unwrap();
632 let root = temp_dir.path();
633
634 let large_file = root.join("large.txt");
636 let data = vec![0u8; 1024 * 1024]; fs::write(&large_file, &data).unwrap();
638
639 File::create(root.join("small.txt")).unwrap();
641
642 let options = WalkOptions {
643 max_file_size: Some(512 * 1024), ..Default::default()
645 };
646
647 let files = walk_directory(root, options).unwrap();
648
649 assert_eq!(files.len(), 1);
650 assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
651 }
652
653 #[test]
654 fn test_walk_empty_directory() {
655 let temp_dir = TempDir::new().unwrap();
656 let root = temp_dir.path();
657
658 let options = WalkOptions::default();
659 let files = walk_directory(root, options).unwrap();
660
661 assert_eq!(files.len(), 0);
662 }
663
664 #[test]
665 fn test_walk_options_from_config() {
666 use crate::cli::Config;
667 use tempfile::TempDir;
668
669 let temp_dir = TempDir::new().unwrap();
670 let config = Config {
671 paths: Some(vec![temp_dir.path().to_path_buf()]),
672 ..Config::default()
673 };
674
675 let options = WalkOptions::from_config(&config).unwrap();
676
677 assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
678 assert!(!options.follow_links);
679 assert!(!options.include_hidden);
680 assert!(options.parallel);
681 assert_eq!(options.ignore_file, ".context-creator-ignore");
682 }
683
684 #[test]
685 fn test_walk_with_custom_options() {
686 let temp_dir = TempDir::new().unwrap();
687 let root = temp_dir.path();
688
689 File::create(root.join("main.rs")).unwrap();
691 File::create(root.join("test.rs")).unwrap();
692 File::create(root.join("readme.md")).unwrap();
693
694 let options = WalkOptions {
695 ignore_patterns: vec!["*.md".to_string()],
696 ..Default::default()
697 };
698
699 let files = walk_directory(root, options).unwrap();
700
701 assert!(files.len() >= 2);
703 assert!(files
704 .iter()
705 .any(|f| f.relative_path == PathBuf::from("main.rs")));
706 assert!(files
707 .iter()
708 .any(|f| f.relative_path == PathBuf::from("test.rs")));
709 }
710
711 #[test]
712 fn test_walk_with_include_patterns() {
713 let temp_dir = TempDir::new().unwrap();
714 let root = temp_dir.path();
715
716 File::create(root.join("main.rs")).unwrap();
718 File::create(root.join("lib.rs")).unwrap();
719 File::create(root.join("README.md")).unwrap();
720
721 let options = WalkOptions {
722 include_patterns: vec!["*.rs".to_string()],
723 ..Default::default()
724 };
725
726 let files = walk_directory(root, options).unwrap();
727
728 assert!(files.len() >= 2);
730 assert!(files
731 .iter()
732 .any(|f| f.relative_path == PathBuf::from("main.rs")));
733 assert!(files
734 .iter()
735 .any(|f| f.relative_path == PathBuf::from("lib.rs")));
736 }
737
738 #[test]
739 fn test_walk_subdirectories() {
740 let temp_dir = TempDir::new().unwrap();
741 let root = temp_dir.path();
742
743 fs::create_dir(root.join("src")).unwrap();
745 fs::create_dir(root.join("src").join("utils")).unwrap();
746 File::create(root.join("main.rs")).unwrap();
747 File::create(root.join("src").join("lib.rs")).unwrap();
748 File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
749
750 let options = WalkOptions::default();
751 let files = walk_directory(root, options).unwrap();
752
753 assert_eq!(files.len(), 3);
754 assert!(files
755 .iter()
756 .any(|f| f.relative_path == PathBuf::from("main.rs")));
757 assert!(files
758 .iter()
759 .any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
760 assert!(files
761 .iter()
762 .any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
763 }
764
765 #[test]
766 fn test_priority_edge_cases() {
767 let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"), &[]);
769 let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"), &[]);
770 let nested_main_priority =
771 calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
772
773 assert!(main_priority > lib_priority);
774 assert!(nested_main_priority > lib_priority);
775
776 let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"), &[]);
778 let nested_toml_priority =
779 calculate_priority(&FileType::Toml, Path::new("config/app.toml"), &[]);
780
781 assert!(toml_priority > nested_toml_priority);
782 }
783
784 #[test]
787 fn test_custom_priority_no_match_returns_base_priority() {
788 let custom_priorities = [CompiledPriority::new("docs/*.md", 5.0).unwrap()];
791
792 let priority = calculate_priority(
794 &FileType::Rust,
795 Path::new("src/main.rs"),
796 &custom_priorities,
797 );
798
799 let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
801 assert_eq!(priority, expected_base);
802 }
803
804 #[test]
805 fn test_custom_priority_single_match_adds_weight() {
806 let custom_priorities = [CompiledPriority::new("src/core/mod.rs", 10.0).unwrap()];
808
809 let priority = calculate_priority(
811 &FileType::Rust,
812 Path::new("src/core/mod.rs"),
813 &custom_priorities,
814 );
815
816 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &[]);
818 let expected = base_priority + 10.0;
819 assert_eq!(priority, expected);
820 }
821
822 #[test]
823 fn test_custom_priority_glob_pattern_match() {
824 let custom_priorities = [CompiledPriority::new("src/**/*.rs", 2.5).unwrap()];
826
827 let priority = calculate_priority(
829 &FileType::Rust,
830 Path::new("src/api/handlers.rs"),
831 &custom_priorities,
832 );
833
834 let base_priority =
836 calculate_priority(&FileType::Rust, Path::new("src/api/handlers.rs"), &[]);
837 let expected = base_priority + 2.5;
838 assert_eq!(priority, expected);
839 }
840
841 #[test]
842 fn test_custom_priority_negative_weight() {
843 let custom_priorities = [CompiledPriority::new("tests/*", -0.5).unwrap()];
845
846 let priority = calculate_priority(
848 &FileType::Rust,
849 Path::new("tests/test_utils.rs"),
850 &custom_priorities,
851 );
852
853 let base_priority =
855 calculate_priority(&FileType::Rust, Path::new("tests/test_utils.rs"), &[]);
856 let expected = base_priority - 0.5;
857 assert_eq!(priority, expected);
858 }
859
860 #[test]
861 fn test_custom_priority_first_match_wins() {
862 let custom_priorities = [
864 CompiledPriority::new("src/**/*.rs", 5.0).unwrap(),
865 CompiledPriority::new("src/main.rs", 100.0).unwrap(),
866 ];
867
868 let priority = calculate_priority(
870 &FileType::Rust,
871 Path::new("src/main.rs"),
872 &custom_priorities,
873 );
874
875 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
877 let expected = base_priority + 5.0;
878 assert_eq!(priority, expected);
879 }
880
881 #[test]
882 fn test_custom_priority_zero_weight() {
883 let custom_priorities = [CompiledPriority::new("*.rs", 0.0).unwrap()];
885
886 let priority = calculate_priority(
888 &FileType::Rust,
889 Path::new("src/main.rs"),
890 &custom_priorities,
891 );
892
893 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
895 assert_eq!(priority, base_priority);
896 }
897
898 #[test]
899 fn test_custom_priority_empty_list() {
900 let custom_priorities: &[CompiledPriority] = &[];
902
903 let priority =
905 calculate_priority(&FileType::Rust, Path::new("src/main.rs"), custom_priorities);
906
907 let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
909 assert_eq!(priority, expected_base);
910 }
911
912 #[test]
915 fn test_config_to_walker_data_flow() {
916 use crate::config::{ConfigFile, Priority};
917 use std::fs::{self, File};
918 use tempfile::TempDir;
919
920 let temp_dir = TempDir::new().unwrap();
922 let root = temp_dir.path();
923
924 File::create(root.join("high_priority.rs")).unwrap();
926 File::create(root.join("normal.txt")).unwrap();
927 fs::create_dir(root.join("logs")).unwrap();
928 File::create(root.join("logs/app.log")).unwrap();
929
930 let config_file = ConfigFile {
932 priorities: vec![
933 Priority {
934 pattern: "*.rs".to_string(),
935 weight: 10.0,
936 },
937 Priority {
938 pattern: "logs/*.log".to_string(),
939 weight: -5.0,
940 },
941 ],
942 ..Default::default()
943 };
944
945 let mut config = crate::cli::Config {
947 prompt: None,
948 paths: Some(vec![root.to_path_buf()]),
949 include: None,
950 ignore: None,
951 repo: None,
952 read_stdin: false,
953 output_file: None,
954 max_tokens: None,
955 llm_tool: crate::cli::LlmTool::default(),
956 quiet: false,
957 verbose: false,
958 config: None,
959 progress: false,
960 copy: false,
961 enhanced_context: false,
962 trace_imports: false,
963 include_callers: false,
964 include_types: false,
965 semantic_depth: 3,
966 custom_priorities: vec![],
967 config_token_limits: None,
968 config_defaults_max_tokens: None,
969 };
970 config_file.apply_to_cli_config(&mut config);
971
972 let walk_options = WalkOptions::from_config(&config).unwrap();
974
975 let files = walk_directory(root, walk_options).unwrap();
977
978 let rs_file = files
980 .iter()
981 .find(|f| {
982 f.relative_path
983 .to_string_lossy()
984 .contains("high_priority.rs")
985 })
986 .unwrap();
987 let log_file = files
988 .iter()
989 .find(|f| f.relative_path.to_string_lossy().contains("app.log"))
990 .unwrap();
991 let txt_file = files
992 .iter()
993 .find(|f| f.relative_path.to_string_lossy().contains("normal.txt"))
994 .unwrap();
995
996 let base_rs = calculate_base_priority(&rs_file.file_type, &rs_file.relative_path);
998 let base_txt = calculate_base_priority(&txt_file.file_type, &txt_file.relative_path);
999 let base_log = calculate_base_priority(&log_file.file_type, &log_file.relative_path);
1000
1001 assert_eq!(rs_file.priority, base_rs + 10.0);
1003
1004 assert_eq!(log_file.priority, base_log - 5.0);
1006
1007 assert_eq!(txt_file.priority, base_txt);
1009 }
1010
1011 #[test]
1012 fn test_invalid_glob_pattern_in_config() {
1013 use crate::config::{ConfigFile, Priority};
1014 use tempfile::TempDir;
1015
1016 let temp_dir = TempDir::new().unwrap();
1017
1018 let config_file = ConfigFile {
1020 priorities: vec![Priority {
1021 pattern: "[invalid_glob".to_string(),
1022 weight: 5.0,
1023 }],
1024 ..Default::default()
1025 };
1026
1027 let mut config = crate::cli::Config {
1028 prompt: None,
1029 paths: Some(vec![temp_dir.path().to_path_buf()]),
1030 include: None,
1031 ignore: None,
1032 repo: None,
1033 read_stdin: false,
1034 output_file: None,
1035 max_tokens: None,
1036 llm_tool: crate::cli::LlmTool::default(),
1037 quiet: false,
1038 verbose: false,
1039 config: None,
1040 progress: false,
1041 copy: false,
1042 enhanced_context: false,
1043 trace_imports: false,
1044 include_callers: false,
1045 include_types: false,
1046 semantic_depth: 3,
1047 custom_priorities: vec![],
1048 config_token_limits: None,
1049 config_defaults_max_tokens: None,
1050 };
1051 config_file.apply_to_cli_config(&mut config);
1052
1053 let result = WalkOptions::from_config(&config);
1055 assert!(result.is_err());
1056
1057 let error_msg = result.unwrap_err().to_string();
1059 assert!(error_msg.contains("invalid_glob") || error_msg.contains("Invalid"));
1060 }
1061
1062 #[test]
1063 fn test_empty_custom_priorities_config() {
1064 use crate::config::ConfigFile;
1065 use tempfile::TempDir;
1066
1067 let temp_dir = TempDir::new().unwrap();
1068
1069 let config_file = ConfigFile {
1071 priorities: vec![], ..Default::default()
1073 };
1074
1075 let mut config = crate::cli::Config {
1076 prompt: None,
1077 paths: Some(vec![temp_dir.path().to_path_buf()]),
1078 include: None,
1079 ignore: None,
1080 repo: None,
1081 read_stdin: false,
1082 output_file: None,
1083 max_tokens: None,
1084 llm_tool: crate::cli::LlmTool::default(),
1085 quiet: false,
1086 verbose: false,
1087 config: None,
1088 progress: false,
1089 copy: false,
1090 enhanced_context: false,
1091 trace_imports: false,
1092 include_callers: false,
1093 include_types: false,
1094 semantic_depth: 3,
1095 custom_priorities: vec![],
1096 config_token_limits: None,
1097 config_defaults_max_tokens: None,
1098 };
1099 config_file.apply_to_cli_config(&mut config);
1100
1101 let walk_options = WalkOptions::from_config(&config).unwrap();
1103
1104 assert!(walk_directory(temp_dir.path(), walk_options).is_ok());
1107 }
1108
1109 #[test]
1110 fn test_empty_pattern_in_config() {
1111 use crate::config::{ConfigFile, Priority};
1112 use tempfile::TempDir;
1113
1114 let temp_dir = TempDir::new().unwrap();
1115
1116 let config_file = ConfigFile {
1118 priorities: vec![Priority {
1119 pattern: "".to_string(),
1120 weight: 5.0,
1121 }],
1122 ..Default::default()
1123 };
1124
1125 let mut config = crate::cli::Config {
1126 prompt: None,
1127 paths: Some(vec![temp_dir.path().to_path_buf()]),
1128 include: None,
1129 ignore: None,
1130 repo: None,
1131 read_stdin: false,
1132 output_file: None,
1133 max_tokens: None,
1134 llm_tool: crate::cli::LlmTool::default(),
1135 quiet: false,
1136 verbose: false,
1137 config: None,
1138 progress: false,
1139 copy: false,
1140 enhanced_context: false,
1141 trace_imports: false,
1142 include_callers: false,
1143 include_types: false,
1144 semantic_depth: 3,
1145 custom_priorities: vec![],
1146 config_token_limits: None,
1147 config_defaults_max_tokens: None,
1148 };
1149 config_file.apply_to_cli_config(&mut config);
1150
1151 let result = WalkOptions::from_config(&config);
1153 assert!(result.is_ok());
1154
1155 let walk_options = result.unwrap();
1157 assert_eq!(walk_options.custom_priorities.len(), 1);
1158 }
1159
1160 #[test]
1161 fn test_extreme_weights_in_config() {
1162 use crate::config::{ConfigFile, Priority};
1163 use tempfile::TempDir;
1164
1165 let temp_dir = TempDir::new().unwrap();
1166
1167 let config_file = ConfigFile {
1169 priorities: vec![
1170 Priority {
1171 pattern: "*.rs".to_string(),
1172 weight: f32::MAX,
1173 },
1174 Priority {
1175 pattern: "*.txt".to_string(),
1176 weight: f32::MIN,
1177 },
1178 Priority {
1179 pattern: "*.md".to_string(),
1180 weight: f32::INFINITY,
1181 },
1182 Priority {
1183 pattern: "*.log".to_string(),
1184 weight: f32::NEG_INFINITY,
1185 },
1186 ],
1187 ..Default::default()
1188 };
1189
1190 let mut config = crate::cli::Config {
1191 prompt: None,
1192 paths: Some(vec![temp_dir.path().to_path_buf()]),
1193 include: None,
1194 ignore: None,
1195 repo: None,
1196 read_stdin: false,
1197 output_file: None,
1198 max_tokens: None,
1199 llm_tool: crate::cli::LlmTool::default(),
1200 quiet: false,
1201 verbose: false,
1202 config: None,
1203 progress: false,
1204 copy: false,
1205 enhanced_context: false,
1206 trace_imports: false,
1207 include_callers: false,
1208 include_types: false,
1209 semantic_depth: 3,
1210 custom_priorities: vec![],
1211 config_token_limits: None,
1212 config_defaults_max_tokens: None,
1213 };
1214 config_file.apply_to_cli_config(&mut config);
1215
1216 let result = WalkOptions::from_config(&config);
1218 assert!(result.is_ok());
1219
1220 let walk_options = result.unwrap();
1221 assert_eq!(walk_options.custom_priorities.len(), 4);
1222 }
1223
1224 #[test]
1225 fn test_file_info_file_type_display() {
1226 let file_info = FileInfo {
1227 path: PathBuf::from("test.rs"),
1228 relative_path: PathBuf::from("test.rs"),
1229 size: 1000,
1230 file_type: FileType::Rust,
1231 priority: 1.0,
1232 imports: Vec::new(),
1233 imported_by: Vec::new(),
1234 function_calls: Vec::new(),
1235 type_references: Vec::new(),
1236 };
1237
1238 assert_eq!(file_info.file_type_display(), "Rust");
1239
1240 let file_info_md = FileInfo {
1241 path: PathBuf::from("README.md"),
1242 relative_path: PathBuf::from("README.md"),
1243 size: 500,
1244 file_type: FileType::Markdown,
1245 priority: 0.6,
1246 imports: Vec::new(),
1247 imported_by: Vec::new(),
1248 function_calls: Vec::new(),
1249 type_references: Vec::new(),
1250 };
1251
1252 assert_eq!(file_info_md.file_type_display(), "Markdown");
1253 }
1254
1255 #[test]
1258 fn test_walk_options_from_config_with_include_patterns() {
1259 let config = crate::cli::Config {
1261 prompt: None,
1262 paths: None,
1263 include: Some(vec!["**/*.rs".to_string(), "**/test[0-9].py".to_string()]),
1264 ignore: None,
1265 repo: None,
1266 read_stdin: false,
1267 output_file: None,
1268 max_tokens: None,
1269 llm_tool: crate::cli::LlmTool::default(),
1270 quiet: false,
1271 verbose: false,
1272 config: None,
1273 progress: false,
1274 copy: false,
1275 enhanced_context: false,
1276 trace_imports: false,
1277 include_callers: false,
1278 include_types: false,
1279 semantic_depth: 3,
1280 custom_priorities: vec![],
1281 config_token_limits: None,
1282 config_defaults_max_tokens: None,
1283 };
1284
1285 let options = WalkOptions::from_config(&config).unwrap();
1286
1287 assert_eq!(options.include_patterns, vec!["**/*.rs", "**/test[0-9].py"]);
1289 }
1290
1291 #[test]
1292 fn test_walk_options_from_config_empty_include_patterns() {
1293 let config = crate::cli::Config {
1295 prompt: None,
1296 paths: None,
1297 include: None,
1298 ignore: None,
1299 repo: None,
1300 read_stdin: false,
1301 output_file: None,
1302 max_tokens: None,
1303 llm_tool: crate::cli::LlmTool::default(),
1304 quiet: false,
1305 verbose: false,
1306 config: None,
1307 progress: false,
1308 copy: false,
1309 enhanced_context: false,
1310 trace_imports: false,
1311 include_callers: false,
1312 include_types: false,
1313 semantic_depth: 3,
1314 custom_priorities: vec![],
1315 config_token_limits: None,
1316 config_defaults_max_tokens: None,
1317 };
1318
1319 let options = WalkOptions::from_config(&config).unwrap();
1320 assert_eq!(options.include_patterns, Vec::<String>::new());
1321 }
1322
1323 #[test]
1324 fn test_walk_options_filters_empty_patterns() {
1325 let config = crate::cli::Config {
1327 prompt: None,
1328 paths: None,
1329 include: Some(vec![
1330 "**/*.rs".to_string(),
1331 "".to_string(),
1332 " ".to_string(),
1333 "*.py".to_string(),
1334 ]),
1335 ignore: None,
1336 repo: None,
1337 read_stdin: false,
1338 output_file: None,
1339 max_tokens: None,
1340 llm_tool: crate::cli::LlmTool::default(),
1341 quiet: false,
1342 verbose: false,
1343 config: None,
1344 progress: false,
1345 copy: false,
1346 enhanced_context: false,
1347 trace_imports: false,
1348 include_callers: false,
1349 include_types: false,
1350 semantic_depth: 3,
1351 custom_priorities: vec![],
1352 config_token_limits: None,
1353 config_defaults_max_tokens: None,
1354 };
1355
1356 let options = WalkOptions::from_config(&config).unwrap();
1357
1358 assert_eq!(options.include_patterns, vec!["**/*.rs", "*.py"]);
1360 }
1361
1362 #[test]
1365 fn test_sanitize_pattern_valid_patterns() {
1366 let valid_patterns = vec![
1368 "*.py",
1369 "**/*.rs",
1370 "src/**/*.{js,ts}",
1371 "test[0-9].py",
1372 "**/*{model,service}*.py",
1373 "**/db/**",
1374 "some-file.txt",
1375 "dir/subdir/*.md",
1376 ];
1377
1378 for pattern in valid_patterns {
1379 let result = sanitize_pattern(pattern);
1380 assert!(result.is_ok(), "Pattern '{pattern}' should be valid");
1381 assert_eq!(result.unwrap(), pattern);
1382 }
1383 }
1384
1385 #[test]
1386 fn test_sanitize_pattern_length_limit() {
1387 let short_pattern = "a".repeat(999);
1389 let exact_limit = "a".repeat(1000);
1390 let too_long = "a".repeat(1001);
1391
1392 assert!(sanitize_pattern(&short_pattern).is_ok());
1393 assert!(sanitize_pattern(&exact_limit).is_ok());
1394
1395 let result = sanitize_pattern(&too_long);
1396 assert!(result.is_err());
1397 assert!(result.unwrap_err().to_string().contains("Pattern too long"));
1398 }
1399
1400 #[test]
1401 fn test_sanitize_pattern_null_bytes() {
1402 let patterns_with_nulls = vec!["test\0.py", "\0*.rs", "**/*.js\0", "dir/\0file.txt"];
1404
1405 for pattern in patterns_with_nulls {
1406 let result = sanitize_pattern(pattern);
1407 assert!(
1408 result.is_err(),
1409 "Pattern with null byte should be rejected: {pattern:?}"
1410 );
1411 assert!(result
1412 .unwrap_err()
1413 .to_string()
1414 .contains("invalid characters"));
1415 }
1416 }
1417
1418 #[test]
1419 fn test_sanitize_pattern_control_characters() {
1420 let control_chars = vec![
1422 "test\x01.py", "file\x08.txt", "dir\x0c/*.rs", "test\x1f.md", "*.py\x7f", ];
1428
1429 for pattern in control_chars {
1430 let result = sanitize_pattern(pattern);
1431 assert!(
1432 result.is_err(),
1433 "Pattern with control char should be rejected: {pattern:?}"
1434 );
1435 assert!(result
1436 .unwrap_err()
1437 .to_string()
1438 .contains("invalid characters"));
1439 }
1440 }
1441
1442 #[test]
1443 fn test_sanitize_pattern_absolute_paths() {
1444 let absolute_paths = vec![
1446 "/etc/passwd",
1447 "/usr/bin/*.sh",
1448 "/home/user/file.txt",
1449 "\\Windows\\System32\\*.dll", "\\Program Files\\*",
1451 ];
1452
1453 for pattern in absolute_paths {
1454 let result = sanitize_pattern(pattern);
1455 assert!(
1456 result.is_err(),
1457 "Absolute path should be rejected: {pattern}"
1458 );
1459 assert!(result
1460 .unwrap_err()
1461 .to_string()
1462 .contains("Absolute paths not allowed"));
1463 }
1464 }
1465
1466 #[test]
1467 fn test_sanitize_pattern_directory_traversal() {
1468 let traversal_patterns = vec![
1470 "../../../etc/passwd",
1471 "dir/../../../file.txt",
1472 "**/../secret/*",
1473 "test/../../*.py",
1474 "../config.toml",
1475 "subdir/../../other.rs",
1476 ];
1477
1478 for pattern in traversal_patterns {
1479 let result = sanitize_pattern(pattern);
1480 assert!(
1481 result.is_err(),
1482 "Directory traversal should be rejected: {pattern}"
1483 );
1484 assert!(result
1485 .unwrap_err()
1486 .to_string()
1487 .contains("Directory traversal"));
1488 }
1489 }
1490
1491 #[test]
1492 fn test_sanitize_pattern_edge_cases() {
1493 let result = sanitize_pattern("");
1497 assert!(result.is_ok(), "Empty string should be allowed");
1498
1499 let result = sanitize_pattern(" ");
1501 assert!(result.is_ok(), "Whitespace-only should be allowed");
1502
1503 let result = sanitize_pattern("файл*.txt");
1505 assert!(result.is_ok(), "Unicode should be allowed");
1506
1507 let result = sanitize_pattern("file[!abc]*.{py,rs}");
1509 assert!(result.is_ok(), "Complex glob patterns should be allowed");
1510
1511 let result = sanitize_pattern("file\nname.txt");
1513 assert!(result.is_err(), "Newlines should be rejected");
1514
1515 let result = sanitize_pattern("file\tname.txt");
1516 assert!(result.is_err(), "Tabs should be rejected");
1517 }
1518
1519 #[test]
1520 fn test_sanitize_pattern_boundary_conditions() {
1521 let result = sanitize_pattern("file..name.txt");
1525 assert!(result.is_err(), "Any '..' should be rejected for safety");
1526
1527 let result = sanitize_pattern("**/*.py");
1529 assert!(result.is_ok(), "Recursive glob should be allowed");
1530
1531 let result = sanitize_pattern("valid/*.py/../invalid");
1533 assert!(result.is_err(), "Mixed pattern should be rejected");
1534 }
1535
1536 #[test]
1537 fn test_sanitize_pattern_security_bypass_attempts() {
1538 let result = sanitize_pattern("file%00.txt");
1542 assert!(result.is_ok(), "URL encoding should not be decoded");
1543
1544 let result = sanitize_pattern("file%2e%2e/secret");
1546 assert!(result.is_ok(), "Double encoding should not be decoded");
1547
1548 let result = sanitize_pattern("file\u{002e}\u{002e}/secret");
1550 assert!(result.is_err(), "Unicode dots should be treated as '..'");
1551
1552 let result = sanitize_pattern("legitimate-pattern\0");
1554 assert!(result.is_err(), "Trailing null should be caught");
1555 }
1556
1557 #[test]
1560 fn test_error_handling_classification() {
1561 use crate::utils::error::ContextCreatorError;
1563
1564 let critical_errors = [
1566 ContextCreatorError::FileProcessingError {
1567 path: "test.txt".to_string(),
1568 error: "Permission denied".to_string(),
1569 },
1570 ContextCreatorError::InvalidConfiguration("Invalid pattern".to_string()),
1571 ];
1572
1573 let error_string = critical_errors[0].to_string();
1575 assert!(error_string.contains("Permission denied"));
1576
1577 let error_string = critical_errors[1].to_string();
1579 assert!(error_string.contains("Invalid"));
1580 }
1581
1582 #[test]
1583 fn test_pattern_sanitization_integration() {
1584 use tempfile::TempDir;
1586
1587 let temp_dir = TempDir::new().unwrap();
1588 let root = temp_dir.path();
1589
1590 let options = WalkOptions {
1592 max_file_size: Some(1024),
1593 follow_links: false,
1594 include_hidden: false,
1595 parallel: false,
1596 ignore_file: ".context-creator-ignore".to_string(),
1597 ignore_patterns: vec![],
1598 include_patterns: vec!["../../../etc/passwd".to_string()], custom_priorities: vec![],
1600 filter_binary_files: false,
1601 };
1602
1603 let result = build_walker(root, &options);
1605 assert!(
1606 result.is_err(),
1607 "Directory traversal pattern should be rejected by sanitization"
1608 );
1609
1610 if let Err(e) = result {
1611 let error_msg = e.to_string();
1612 assert!(error_msg.contains("Directory traversal") || error_msg.contains("Invalid"));
1613 }
1614 }
1615
1616 #[test]
1617 fn test_walk_options_filters_binary_files_with_prompt() {
1618 use crate::cli::Config;
1619
1620 let config = Config {
1621 prompt: Some("test prompt".to_string()),
1622 paths: Some(vec![PathBuf::from(".")]),
1623 include: None,
1624 ignore: None,
1625 repo: None,
1626 read_stdin: false,
1627 output_file: None,
1628 max_tokens: None,
1629 llm_tool: crate::cli::LlmTool::Gemini,
1630 quiet: false,
1631 verbose: false,
1632 config: None,
1633 progress: false,
1634 copy: false,
1635 enhanced_context: false,
1636 trace_imports: false,
1637 include_callers: false,
1638 include_types: false,
1639 semantic_depth: 3,
1640 custom_priorities: vec![],
1641 config_token_limits: None,
1642 config_defaults_max_tokens: None,
1643 };
1644
1645 let options = WalkOptions::from_config(&config).unwrap();
1646 assert!(options.filter_binary_files);
1647 }
1648
1649 #[test]
1650 fn test_walk_options_no_binary_filter_without_prompt() {
1651 use crate::cli::Config;
1652
1653 let config = Config {
1654 prompt: None,
1655 paths: Some(vec![PathBuf::from(".")]),
1656 include: None,
1657 ignore: None,
1658 repo: None,
1659 read_stdin: false,
1660 output_file: None,
1661 max_tokens: None,
1662 llm_tool: crate::cli::LlmTool::Gemini,
1663 quiet: false,
1664 verbose: false,
1665 config: None,
1666 progress: false,
1667 copy: false,
1668 enhanced_context: false,
1669 trace_imports: false,
1670 include_callers: false,
1671 include_types: false,
1672 semantic_depth: 3,
1673 custom_priorities: vec![],
1674 config_token_limits: None,
1675 config_defaults_max_tokens: None,
1676 };
1677
1678 let options = WalkOptions::from_config(&config).unwrap();
1679 assert!(!options.filter_binary_files);
1680 }
1681}