1use crate::utils::error::ContextCreatorError;
4use crate::utils::file_ext::FileType;
5use anyhow::Result;
6use glob::Pattern;
7use ignore::{Walk, WalkBuilder};
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use tracing::warn;
12
13#[derive(Debug, Clone)]
26pub struct CompiledPriority {
27 pub matcher: Pattern,
29 pub weight: f32,
31 pub original_pattern: String,
33}
34
35impl CompiledPriority {
36 pub fn new(pattern: &str, weight: f32) -> Result<Self, glob::PatternError> {
38 let matcher = Pattern::new(pattern)?;
39 Ok(Self {
40 matcher,
41 weight,
42 original_pattern: pattern.to_string(),
43 })
44 }
45
46 pub fn try_from_config_priority(
48 priority: &crate::config::Priority,
49 ) -> Result<Self, glob::PatternError> {
50 Self::new(&priority.pattern, priority.weight)
51 }
52}
53
54#[derive(Debug, Clone)]
56pub struct WalkOptions {
57 pub max_file_size: Option<usize>,
59 pub follow_links: bool,
61 pub include_hidden: bool,
63 pub parallel: bool,
65 pub ignore_file: String,
67 pub ignore_patterns: Vec<String>,
69 pub include_patterns: Vec<String>,
71 pub custom_priorities: Vec<CompiledPriority>,
73 pub filter_binary_files: bool,
75}
76
77impl WalkOptions {
78 pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
80 let mut custom_priorities = Vec::new();
82 for priority in &config.custom_priorities {
83 match CompiledPriority::try_from_config_priority(priority) {
84 Ok(compiled) => custom_priorities.push(compiled),
85 Err(e) => {
86 return Err(ContextCreatorError::ConfigError(format!(
87 "Invalid glob pattern '{}' in custom priorities: {e}",
88 priority.pattern
89 ))
90 .into());
91 }
92 }
93 }
94
95 let include_patterns = config
97 .get_include_patterns()
98 .into_iter()
99 .filter(|pattern| !pattern.trim().is_empty())
100 .collect();
101
102 let ignore_patterns = config
104 .get_ignore_patterns()
105 .into_iter()
106 .filter(|pattern| !pattern.trim().is_empty())
107 .collect();
108
109 Ok(WalkOptions {
110 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
112 include_hidden: false,
113 parallel: true,
114 ignore_file: ".context-creator-ignore".to_string(),
115 ignore_patterns,
116 include_patterns,
117 custom_priorities,
118 filter_binary_files: config.get_prompt().is_some(),
119 })
120 }
121}
122
123impl Default for WalkOptions {
124 fn default() -> Self {
125 WalkOptions {
126 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
128 include_hidden: false,
129 parallel: true,
130 ignore_file: ".context-creator-ignore".to_string(),
131 ignore_patterns: vec![],
132 include_patterns: vec![],
133 custom_priorities: vec![],
134 filter_binary_files: false,
135 }
136 }
137}
138
139#[derive(Debug, Clone)]
141pub struct FileInfo {
142 pub path: PathBuf,
144 pub relative_path: PathBuf,
146 pub size: u64,
148 pub file_type: FileType,
150 pub priority: f32,
152 pub imports: Vec<PathBuf>,
154 pub imported_by: Vec<PathBuf>,
156 pub function_calls: Vec<crate::core::semantic::analyzer::FunctionCall>,
158 pub type_references: Vec<crate::core::semantic::analyzer::TypeReference>,
160 pub exported_functions: Vec<crate::core::semantic::analyzer::FunctionDefinition>,
162}
163
164impl FileInfo {
165 pub fn file_type_display(&self) -> &'static str {
167 use crate::utils::file_ext::FileType;
168 match self.file_type {
169 FileType::Rust => "Rust",
170 FileType::Python => "Python",
171 FileType::JavaScript => "JavaScript",
172 FileType::TypeScript => "TypeScript",
173 FileType::Go => "Go",
174 FileType::Java => "Java",
175 FileType::Cpp => "C++",
176 FileType::C => "C",
177 FileType::CSharp => "C#",
178 FileType::Ruby => "Ruby",
179 FileType::Php => "PHP",
180 FileType::Swift => "Swift",
181 FileType::Kotlin => "Kotlin",
182 FileType::Scala => "Scala",
183 FileType::Haskell => "Haskell",
184 FileType::Dart => "Dart",
185 FileType::Lua => "Lua",
186 FileType::R => "R",
187 FileType::Julia => "Julia",
188 FileType::Elixir => "Elixir",
189 FileType::Elm => "Elm",
190 FileType::Markdown => "Markdown",
191 FileType::Json => "JSON",
192 FileType::Yaml => "YAML",
193 FileType::Toml => "TOML",
194 FileType::Xml => "XML",
195 FileType::Html => "HTML",
196 FileType::Css => "CSS",
197 FileType::Text => "Text",
198 FileType::Other => "Other",
199 }
200 }
201}
202
203pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
205 if !root.exists() {
206 return Err(ContextCreatorError::InvalidPath(format!(
207 "Path does not exist: {}",
208 root.display()
209 ))
210 .into());
211 }
212
213 if root.is_file() {
215 let metadata = root.metadata()?;
216 let file_type = FileType::from_path(root);
217 let relative_path = PathBuf::from(
218 root.file_name()
219 .ok_or_else(|| anyhow::anyhow!("Invalid file name"))?,
220 );
221 let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
222
223 let file_info = FileInfo {
224 path: root.to_path_buf(),
225 relative_path,
226 size: metadata.len(),
227 file_type,
228 priority,
229 imports: Vec::new(),
230 imported_by: Vec::new(),
231 function_calls: Vec::new(),
232 type_references: Vec::new(),
233 exported_functions: Vec::new(),
234 };
235 return Ok(vec![file_info]);
236 }
237
238 if !root.is_dir() {
239 return Err(ContextCreatorError::InvalidPath(format!(
240 "Path is neither a file nor a directory: {}",
241 root.display()
242 ))
243 .into());
244 }
245
246 let root = root.canonicalize()?;
247 let walker = build_walker(&root, &options)?;
248
249 if options.parallel {
250 walk_parallel(walker, &root, &options)
251 } else {
252 walk_sequential(walker, &root, &options)
253 }
254}
255
256pub fn sanitize_pattern(pattern: &str) -> Result<String> {
258 if pattern.len() > 1000 {
260 return Err(ContextCreatorError::InvalidConfiguration(
261 "Pattern too long (max 1000 characters)".to_string(),
262 )
263 .into());
264 }
265
266 if pattern.contains('\0')
268 || pattern.chars().any(|c| {
269 c.is_control() ||
270 c == '\u{2028}' || c == '\u{2029}' || c == '\u{FEFF}' })
274 {
275 return Err(ContextCreatorError::InvalidConfiguration(
276 "Pattern contains invalid characters (null bytes or control characters)".to_string(),
277 )
278 .into());
279 }
280
281 if pattern.starts_with('/') || pattern.starts_with('\\') {
283 return Err(ContextCreatorError::InvalidConfiguration(
284 "Absolute paths not allowed in patterns".to_string(),
285 )
286 .into());
287 }
288
289 if pattern.contains("..") {
291 return Err(ContextCreatorError::InvalidConfiguration(
292 "Directory traversal (..) not allowed in patterns".to_string(),
293 )
294 .into());
295 }
296
297 Ok(pattern.to_string())
298}
299
300fn build_walker(root: &Path, options: &WalkOptions) -> Result<Walk> {
302 let mut builder = WalkBuilder::new(root);
303
304 builder
306 .follow_links(options.follow_links)
307 .hidden(!options.include_hidden)
308 .git_ignore(true)
309 .git_global(true)
310 .git_exclude(true)
311 .ignore(true)
312 .parents(true)
313 .add_custom_ignore_filename(&options.ignore_file);
314
315 if !options.include_patterns.is_empty() || !options.ignore_patterns.is_empty() {
317 let mut override_builder = ignore::overrides::OverrideBuilder::new(root);
318
319 if options.include_patterns.is_empty() && !options.ignore_patterns.is_empty() {
321 override_builder.add("**/*").map_err(|e| {
323 ContextCreatorError::InvalidConfiguration(format!(
324 "Failed to add include-all pattern: {e}"
325 ))
326 })?;
327 }
328
329 for pattern in &options.include_patterns {
331 if !pattern.trim().is_empty() {
332 let sanitized_pattern = sanitize_pattern(pattern)?;
334
335 override_builder.add(&sanitized_pattern).map_err(|e| {
337 ContextCreatorError::InvalidConfiguration(format!(
338 "Invalid include pattern '{pattern}': {e}"
339 ))
340 })?;
341 }
342 }
343
344 for pattern in &options.ignore_patterns {
347 if !pattern.trim().is_empty() {
348 let sanitized_pattern = sanitize_pattern(pattern)?;
350
351 let ignore_pattern = format!("!{sanitized_pattern}");
353 override_builder.add(&ignore_pattern).map_err(|e| {
354 ContextCreatorError::InvalidConfiguration(format!(
355 "Invalid ignore pattern '{pattern}': {e}"
356 ))
357 })?;
358 }
359 }
360
361 let overrides = override_builder.build().map_err(|e| {
362 ContextCreatorError::InvalidConfiguration(format!(
363 "Failed to build pattern overrides: {e}"
364 ))
365 })?;
366
367 builder.overrides(overrides);
368 }
369
370 Ok(builder.build())
371}
372
373fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
375 let mut files = Vec::new();
376
377 for entry in walker {
378 let entry = entry?;
379 let path = entry.path();
380
381 if path.is_dir() {
383 continue;
384 }
385
386 if let Some(file_info) = process_file(path, root, options)? {
388 files.push(file_info);
389 }
390 }
391
392 Ok(files)
393}
394
395fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
397 use itertools::Itertools;
398
399 let root = Arc::new(root.to_path_buf());
400 let options = Arc::new(options.clone());
401
402 let entries: Vec<_> = walker
404 .filter_map(|e| e.ok())
405 .filter(|e| !e.path().is_dir())
406 .collect();
407
408 let results: Vec<Result<Option<FileInfo>, ContextCreatorError>> = entries
410 .into_par_iter()
411 .map(|entry| {
412 let path = entry.path();
413 match process_file(path, &root, &options) {
414 Ok(file_info) => Ok(file_info),
415 Err(e) => Err(ContextCreatorError::FileProcessingError {
416 path: path.display().to_string(),
417 error: e.to_string(),
418 }),
419 }
420 })
421 .collect();
422
423 let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition_result();
425
426 if !errors.is_empty() {
428 let critical_errors: Vec<_> = errors
429 .iter()
430 .filter(|e| {
431 e.to_string().contains("Permission denied") || e.to_string().contains("Invalid")
432 })
433 .collect();
434
435 if !critical_errors.is_empty() {
436 let error_summary: Vec<String> =
438 critical_errors.iter().map(|e| e.to_string()).collect();
439 return Err(anyhow::anyhow!(
440 "Critical file processing errors encountered: {}",
441 error_summary.join(", ")
442 ));
443 }
444
445 warn!("Warning: {} files could not be processed:", errors.len());
447 for error in &errors {
448 warn!(" {}", error);
449 }
450 }
451
452 let files: Vec<FileInfo> = successes.into_iter().flatten().collect();
454 Ok(files)
455}
456
457fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
459 let metadata = match std::fs::metadata(path) {
461 Ok(meta) => meta,
462 Err(_) => return Ok(None), };
464
465 let size = metadata.len();
466
467 if let Some(max_size) = options.max_file_size {
469 if size > max_size as u64 {
470 return Ok(None);
471 }
472 }
473
474 let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
476
477 let file_type = FileType::from_path(path);
479
480 let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
482
483 Ok(Some(FileInfo {
484 path: path.to_path_buf(),
485 relative_path,
486 size,
487 file_type,
488 priority,
489 imports: Vec::new(), imported_by: Vec::new(), function_calls: Vec::new(), type_references: Vec::new(), exported_functions: Vec::new(), }))
495}
496
497fn calculate_priority(
499 file_type: &FileType,
500 relative_path: &Path,
501 custom_priorities: &[CompiledPriority],
502) -> f32 {
503 let base_score = calculate_base_priority(file_type, relative_path);
505
506 for priority in custom_priorities {
508 if priority.matcher.matches_path(relative_path) {
509 return base_score + priority.weight;
510 }
511 }
512
513 base_score
515}
516
517fn calculate_base_priority(file_type: &FileType, relative_path: &Path) -> f32 {
519 let mut score: f32 = match file_type {
520 FileType::Rust => 1.0,
521 FileType::Python => 0.9,
522 FileType::JavaScript => 0.9,
523 FileType::TypeScript => 0.95,
524 FileType::Go => 0.9,
525 FileType::Java => 0.85,
526 FileType::Cpp => 0.85,
527 FileType::C => 0.8,
528 FileType::CSharp => 0.85,
529 FileType::Ruby => 0.8,
530 FileType::Php => 0.75,
531 FileType::Swift => 0.85,
532 FileType::Kotlin => 0.85,
533 FileType::Scala => 0.8,
534 FileType::Haskell => 0.75,
535 FileType::Dart => 0.85,
536 FileType::Lua => 0.7,
537 FileType::R => 0.75,
538 FileType::Julia => 0.8,
539 FileType::Elixir => 0.8,
540 FileType::Elm => 0.75,
541 FileType::Markdown => 0.6,
542 FileType::Json => 0.5,
543 FileType::Yaml => 0.5,
544 FileType::Toml => 0.5,
545 FileType::Xml => 0.4,
546 FileType::Html => 0.4,
547 FileType::Css => 0.4,
548 FileType::Text => 0.3,
549 FileType::Other => 0.2,
550 };
551
552 let path_str = relative_path.to_string_lossy().to_lowercase();
554 if path_str.contains("main") || path_str.contains("index") {
555 score *= 1.5;
556 }
557 if path_str.contains("lib") || path_str.contains("src") {
558 score *= 1.2;
559 }
560 if path_str.contains("test") || path_str.contains("spec") {
561 score *= 0.8;
562 }
563 if path_str.contains("example") || path_str.contains("sample") {
564 score *= 0.7;
565 }
566
567 if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
569 match file_type {
570 FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
571 _ => {}
572 }
573 }
574
575 score.min(2.0) }
577
578pub fn perform_semantic_analysis(
591 files: &mut [FileInfo],
592 config: &crate::cli::Config,
593 cache: &crate::core::cache::FileCache,
594) -> Result<()> {
595 crate::core::semantic_graph::perform_semantic_analysis_graph(files, config, cache)
597}
598
599#[allow(dead_code)]
601fn capitalize_first(s: &str) -> String {
602 let mut chars = s.chars();
603 match chars.next() {
604 None => String::new(),
605 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
606 }
607}
608
609#[cfg(test)]
610mod tests {
611 use super::*;
612 use std::fs::{self, File};
613 use tempfile::TempDir;
614
615 #[test]
616 fn test_walk_directory_basic() {
617 let temp_dir = TempDir::new().unwrap();
618 let root = temp_dir.path();
619
620 File::create(root.join("main.rs")).unwrap();
622 File::create(root.join("lib.rs")).unwrap();
623 fs::create_dir(root.join("src")).unwrap();
624 File::create(root.join("src/utils.rs")).unwrap();
625
626 let options = WalkOptions::default();
627 let files = walk_directory(root, options).unwrap();
628
629 assert_eq!(files.len(), 3);
630 assert!(files
631 .iter()
632 .any(|f| f.relative_path == PathBuf::from("main.rs")));
633 assert!(files
634 .iter()
635 .any(|f| f.relative_path == PathBuf::from("lib.rs")));
636 assert!(files
637 .iter()
638 .any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
639 }
640
641 #[test]
642 fn test_walk_with_contextignore() {
643 let temp_dir = TempDir::new().unwrap();
644 let root = temp_dir.path();
645
646 File::create(root.join("main.rs")).unwrap();
648 File::create(root.join("ignored.rs")).unwrap();
649
650 fs::write(root.join(".context-creator-ignore"), "ignored.rs").unwrap();
652
653 let options = WalkOptions::default();
654 let files = walk_directory(root, options).unwrap();
655
656 assert_eq!(files.len(), 1);
657 assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
658 }
659
660 #[test]
661 fn test_priority_calculation() {
662 let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
663 let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"), &[]);
664 let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"), &[]);
665
666 assert!(rust_priority > doc_priority);
667 assert!(rust_priority > test_priority);
668 }
669
670 #[test]
671 fn test_file_size_limit() {
672 let temp_dir = TempDir::new().unwrap();
673 let root = temp_dir.path();
674
675 let large_file = root.join("large.txt");
677 let data = vec![0u8; 1024 * 1024]; fs::write(&large_file, &data).unwrap();
679
680 File::create(root.join("small.txt")).unwrap();
682
683 let options = WalkOptions {
684 max_file_size: Some(512 * 1024), ..Default::default()
686 };
687
688 let files = walk_directory(root, options).unwrap();
689
690 assert_eq!(files.len(), 1);
691 assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
692 }
693
694 #[test]
695 fn test_walk_empty_directory() {
696 let temp_dir = TempDir::new().unwrap();
697 let root = temp_dir.path();
698
699 let options = WalkOptions::default();
700 let files = walk_directory(root, options).unwrap();
701
702 assert_eq!(files.len(), 0);
703 }
704
705 #[test]
706 fn test_walk_options_from_config() {
707 use crate::cli::Config;
708 use tempfile::TempDir;
709
710 let temp_dir = TempDir::new().unwrap();
711 let config = Config {
712 paths: Some(vec![temp_dir.path().to_path_buf()]),
713 ..Config::default()
714 };
715
716 let options = WalkOptions::from_config(&config).unwrap();
717
718 assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
719 assert!(!options.follow_links);
720 assert!(!options.include_hidden);
721 assert!(options.parallel);
722 assert_eq!(options.ignore_file, ".context-creator-ignore");
723 }
724
725 #[test]
726 fn test_walk_with_custom_options() {
727 let temp_dir = TempDir::new().unwrap();
728 let root = temp_dir.path();
729
730 File::create(root.join("main.rs")).unwrap();
732 File::create(root.join("test.rs")).unwrap();
733 File::create(root.join("readme.md")).unwrap();
734
735 let options = WalkOptions {
736 ignore_patterns: vec!["*.md".to_string()],
737 ..Default::default()
738 };
739
740 let files = walk_directory(root, options).unwrap();
741
742 assert!(files.len() >= 2);
744 assert!(files
745 .iter()
746 .any(|f| f.relative_path == PathBuf::from("main.rs")));
747 assert!(files
748 .iter()
749 .any(|f| f.relative_path == PathBuf::from("test.rs")));
750 }
751
752 #[test]
753 fn test_walk_with_include_patterns() {
754 let temp_dir = TempDir::new().unwrap();
755 let root = temp_dir.path();
756
757 File::create(root.join("main.rs")).unwrap();
759 File::create(root.join("lib.rs")).unwrap();
760 File::create(root.join("README.md")).unwrap();
761
762 let options = WalkOptions {
763 include_patterns: vec!["*.rs".to_string()],
764 ..Default::default()
765 };
766
767 let files = walk_directory(root, options).unwrap();
768
769 assert!(files.len() >= 2);
771 assert!(files
772 .iter()
773 .any(|f| f.relative_path == PathBuf::from("main.rs")));
774 assert!(files
775 .iter()
776 .any(|f| f.relative_path == PathBuf::from("lib.rs")));
777 }
778
779 #[test]
780 fn test_walk_subdirectories() {
781 let temp_dir = TempDir::new().unwrap();
782 let root = temp_dir.path();
783
784 fs::create_dir(root.join("src")).unwrap();
786 fs::create_dir(root.join("src").join("utils")).unwrap();
787 File::create(root.join("main.rs")).unwrap();
788 File::create(root.join("src").join("lib.rs")).unwrap();
789 File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
790
791 let options = WalkOptions::default();
792 let files = walk_directory(root, options).unwrap();
793
794 assert_eq!(files.len(), 3);
795 assert!(files
796 .iter()
797 .any(|f| f.relative_path == PathBuf::from("main.rs")));
798 assert!(files
799 .iter()
800 .any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
801 assert!(files
802 .iter()
803 .any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
804 }
805
806 #[test]
807 fn test_priority_edge_cases() {
808 let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"), &[]);
810 let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"), &[]);
811 let nested_main_priority =
812 calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
813
814 assert!(main_priority > lib_priority);
815 assert!(nested_main_priority > lib_priority);
816
817 let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"), &[]);
819 let nested_toml_priority =
820 calculate_priority(&FileType::Toml, Path::new("config/app.toml"), &[]);
821
822 assert!(toml_priority > nested_toml_priority);
823 }
824
825 #[test]
828 fn test_custom_priority_no_match_returns_base_priority() {
829 let custom_priorities = [CompiledPriority::new("docs/*.md", 5.0).unwrap()];
832
833 let priority = calculate_priority(
835 &FileType::Rust,
836 Path::new("src/main.rs"),
837 &custom_priorities,
838 );
839
840 let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
842 assert_eq!(priority, expected_base);
843 }
844
845 #[test]
846 fn test_custom_priority_single_match_adds_weight() {
847 let custom_priorities = [CompiledPriority::new("src/core/mod.rs", 10.0).unwrap()];
849
850 let priority = calculate_priority(
852 &FileType::Rust,
853 Path::new("src/core/mod.rs"),
854 &custom_priorities,
855 );
856
857 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &[]);
859 let expected = base_priority + 10.0;
860 assert_eq!(priority, expected);
861 }
862
863 #[test]
864 fn test_custom_priority_glob_pattern_match() {
865 let custom_priorities = [CompiledPriority::new("src/**/*.rs", 2.5).unwrap()];
867
868 let priority = calculate_priority(
870 &FileType::Rust,
871 Path::new("src/api/handlers.rs"),
872 &custom_priorities,
873 );
874
875 let base_priority =
877 calculate_priority(&FileType::Rust, Path::new("src/api/handlers.rs"), &[]);
878 let expected = base_priority + 2.5;
879 assert_eq!(priority, expected);
880 }
881
882 #[test]
883 fn test_custom_priority_negative_weight() {
884 let custom_priorities = [CompiledPriority::new("tests/*", -0.5).unwrap()];
886
887 let priority = calculate_priority(
889 &FileType::Rust,
890 Path::new("tests/test_utils.rs"),
891 &custom_priorities,
892 );
893
894 let base_priority =
896 calculate_priority(&FileType::Rust, Path::new("tests/test_utils.rs"), &[]);
897 let expected = base_priority - 0.5;
898 assert_eq!(priority, expected);
899 }
900
901 #[test]
902 fn test_custom_priority_first_match_wins() {
903 let custom_priorities = [
905 CompiledPriority::new("src/**/*.rs", 5.0).unwrap(),
906 CompiledPriority::new("src/main.rs", 100.0).unwrap(),
907 ];
908
909 let priority = calculate_priority(
911 &FileType::Rust,
912 Path::new("src/main.rs"),
913 &custom_priorities,
914 );
915
916 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
918 let expected = base_priority + 5.0;
919 assert_eq!(priority, expected);
920 }
921
922 #[test]
923 fn test_custom_priority_zero_weight() {
924 let custom_priorities = [CompiledPriority::new("*.rs", 0.0).unwrap()];
926
927 let priority = calculate_priority(
929 &FileType::Rust,
930 Path::new("src/main.rs"),
931 &custom_priorities,
932 );
933
934 let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
936 assert_eq!(priority, base_priority);
937 }
938
939 #[test]
940 fn test_custom_priority_empty_list() {
941 let custom_priorities: &[CompiledPriority] = &[];
943
944 let priority =
946 calculate_priority(&FileType::Rust, Path::new("src/main.rs"), custom_priorities);
947
948 let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
950 assert_eq!(priority, expected_base);
951 }
952
953 #[test]
956 fn test_config_to_walker_data_flow() {
957 use crate::config::{ConfigFile, Priority};
958 use std::fs::{self, File};
959 use tempfile::TempDir;
960
961 let temp_dir = TempDir::new().unwrap();
963 let root = temp_dir.path();
964
965 File::create(root.join("high_priority.rs")).unwrap();
967 File::create(root.join("normal.txt")).unwrap();
968 fs::create_dir(root.join("logs")).unwrap();
969 File::create(root.join("logs/app.log")).unwrap();
970
971 let config_file = ConfigFile {
973 priorities: vec![
974 Priority {
975 pattern: "*.rs".to_string(),
976 weight: 10.0,
977 },
978 Priority {
979 pattern: "logs/*.log".to_string(),
980 weight: -5.0,
981 },
982 ],
983 ..Default::default()
984 };
985
986 let mut config = crate::cli::Config {
988 prompt: None,
989 paths: Some(vec![root.to_path_buf()]),
990 include: None,
991 ignore: None,
992 remote: None,
993 read_stdin: false,
994 output_file: None,
995 max_tokens: None,
996 llm_tool: crate::cli::LlmTool::default(),
997 quiet: false,
998 verbose: 0,
999 log_format: crate::cli::LogFormat::default(),
1000 config: None,
1001 progress: false,
1002 copy: false,
1003 enhanced_context: false,
1004 trace_imports: false,
1005 include_callers: false,
1006 include_types: false,
1007 semantic_depth: 3,
1008 custom_priorities: vec![],
1009 config_token_limits: None,
1010 config_defaults_max_tokens: None,
1011 };
1012 config_file.apply_to_cli_config(&mut config);
1013
1014 let walk_options = WalkOptions::from_config(&config).unwrap();
1016
1017 let files = walk_directory(root, walk_options).unwrap();
1019
1020 let rs_file = files
1022 .iter()
1023 .find(|f| {
1024 f.relative_path
1025 .to_string_lossy()
1026 .contains("high_priority.rs")
1027 })
1028 .unwrap();
1029 let log_file = files
1030 .iter()
1031 .find(|f| f.relative_path.to_string_lossy().contains("app.log"))
1032 .unwrap();
1033 let txt_file = files
1034 .iter()
1035 .find(|f| f.relative_path.to_string_lossy().contains("normal.txt"))
1036 .unwrap();
1037
1038 let base_rs = calculate_base_priority(&rs_file.file_type, &rs_file.relative_path);
1040 let base_txt = calculate_base_priority(&txt_file.file_type, &txt_file.relative_path);
1041 let base_log = calculate_base_priority(&log_file.file_type, &log_file.relative_path);
1042
1043 assert_eq!(rs_file.priority, base_rs + 10.0);
1045
1046 assert_eq!(log_file.priority, base_log - 5.0);
1048
1049 assert_eq!(txt_file.priority, base_txt);
1051 }
1052
1053 #[test]
1054 fn test_invalid_glob_pattern_in_config() {
1055 use crate::config::{ConfigFile, Priority};
1056 use tempfile::TempDir;
1057
1058 let temp_dir = TempDir::new().unwrap();
1059
1060 let config_file = ConfigFile {
1062 priorities: vec![Priority {
1063 pattern: "[invalid_glob".to_string(),
1064 weight: 5.0,
1065 }],
1066 ..Default::default()
1067 };
1068
1069 let mut config = crate::cli::Config {
1070 prompt: None,
1071 paths: Some(vec![temp_dir.path().to_path_buf()]),
1072 include: None,
1073 ignore: None,
1074 remote: None,
1075 read_stdin: false,
1076 output_file: None,
1077 max_tokens: None,
1078 llm_tool: crate::cli::LlmTool::default(),
1079 quiet: false,
1080 verbose: 0,
1081 log_format: crate::cli::LogFormat::default(),
1082 config: None,
1083 progress: false,
1084 copy: false,
1085 enhanced_context: false,
1086 trace_imports: false,
1087 include_callers: false,
1088 include_types: false,
1089 semantic_depth: 3,
1090 custom_priorities: vec![],
1091 config_token_limits: None,
1092 config_defaults_max_tokens: None,
1093 };
1094 config_file.apply_to_cli_config(&mut config);
1095
1096 let result = WalkOptions::from_config(&config);
1098 assert!(result.is_err());
1099
1100 let error_msg = result.unwrap_err().to_string();
1102 assert!(error_msg.contains("invalid_glob") || error_msg.contains("Invalid"));
1103 }
1104
1105 #[test]
1106 fn test_empty_custom_priorities_config() {
1107 use crate::config::ConfigFile;
1108 use tempfile::TempDir;
1109
1110 let temp_dir = TempDir::new().unwrap();
1111
1112 let config_file = ConfigFile {
1114 priorities: vec![], ..Default::default()
1116 };
1117
1118 let mut config = crate::cli::Config {
1119 prompt: None,
1120 paths: Some(vec![temp_dir.path().to_path_buf()]),
1121 include: None,
1122 ignore: None,
1123 remote: None,
1124 read_stdin: false,
1125 output_file: None,
1126 max_tokens: None,
1127 llm_tool: crate::cli::LlmTool::default(),
1128 quiet: false,
1129 verbose: 0,
1130 log_format: crate::cli::LogFormat::default(),
1131 config: None,
1132 progress: false,
1133 copy: false,
1134 enhanced_context: false,
1135 trace_imports: false,
1136 include_callers: false,
1137 include_types: false,
1138 semantic_depth: 3,
1139 custom_priorities: vec![],
1140 config_token_limits: None,
1141 config_defaults_max_tokens: None,
1142 };
1143 config_file.apply_to_cli_config(&mut config);
1144
1145 let walk_options = WalkOptions::from_config(&config).unwrap();
1147
1148 assert!(walk_directory(temp_dir.path(), walk_options).is_ok());
1151 }
1152
1153 #[test]
1154 fn test_empty_pattern_in_config() {
1155 use crate::config::{ConfigFile, Priority};
1156 use tempfile::TempDir;
1157
1158 let temp_dir = TempDir::new().unwrap();
1159
1160 let config_file = ConfigFile {
1162 priorities: vec![Priority {
1163 pattern: "".to_string(),
1164 weight: 5.0,
1165 }],
1166 ..Default::default()
1167 };
1168
1169 let mut config = crate::cli::Config {
1170 prompt: None,
1171 paths: Some(vec![temp_dir.path().to_path_buf()]),
1172 include: None,
1173 ignore: None,
1174 remote: None,
1175 read_stdin: false,
1176 output_file: None,
1177 max_tokens: None,
1178 llm_tool: crate::cli::LlmTool::default(),
1179 quiet: false,
1180 verbose: 0,
1181 log_format: crate::cli::LogFormat::default(),
1182 config: None,
1183 progress: false,
1184 copy: false,
1185 enhanced_context: false,
1186 trace_imports: false,
1187 include_callers: false,
1188 include_types: false,
1189 semantic_depth: 3,
1190 custom_priorities: vec![],
1191 config_token_limits: None,
1192 config_defaults_max_tokens: None,
1193 };
1194 config_file.apply_to_cli_config(&mut config);
1195
1196 let result = WalkOptions::from_config(&config);
1198 assert!(result.is_ok());
1199
1200 let walk_options = result.unwrap();
1202 assert_eq!(walk_options.custom_priorities.len(), 1);
1203 }
1204
1205 #[test]
1206 fn test_extreme_weights_in_config() {
1207 use crate::config::{ConfigFile, Priority};
1208 use tempfile::TempDir;
1209
1210 let temp_dir = TempDir::new().unwrap();
1211
1212 let config_file = ConfigFile {
1214 priorities: vec![
1215 Priority {
1216 pattern: "*.rs".to_string(),
1217 weight: f32::MAX,
1218 },
1219 Priority {
1220 pattern: "*.txt".to_string(),
1221 weight: f32::MIN,
1222 },
1223 Priority {
1224 pattern: "*.md".to_string(),
1225 weight: f32::INFINITY,
1226 },
1227 Priority {
1228 pattern: "*.log".to_string(),
1229 weight: f32::NEG_INFINITY,
1230 },
1231 ],
1232 ..Default::default()
1233 };
1234
1235 let mut config = crate::cli::Config {
1236 prompt: None,
1237 paths: Some(vec![temp_dir.path().to_path_buf()]),
1238 include: None,
1239 ignore: None,
1240 remote: None,
1241 read_stdin: false,
1242 output_file: None,
1243 max_tokens: None,
1244 llm_tool: crate::cli::LlmTool::default(),
1245 quiet: false,
1246 verbose: 0,
1247 log_format: crate::cli::LogFormat::default(),
1248 config: None,
1249 progress: false,
1250 copy: false,
1251 enhanced_context: false,
1252 trace_imports: false,
1253 include_callers: false,
1254 include_types: false,
1255 semantic_depth: 3,
1256 custom_priorities: vec![],
1257 config_token_limits: None,
1258 config_defaults_max_tokens: None,
1259 };
1260 config_file.apply_to_cli_config(&mut config);
1261
1262 let result = WalkOptions::from_config(&config);
1264 assert!(result.is_ok());
1265
1266 let walk_options = result.unwrap();
1267 assert_eq!(walk_options.custom_priorities.len(), 4);
1268 }
1269
1270 #[test]
1271 fn test_file_info_file_type_display() {
1272 let file_info = FileInfo {
1273 path: PathBuf::from("test.rs"),
1274 relative_path: PathBuf::from("test.rs"),
1275 size: 1000,
1276 file_type: FileType::Rust,
1277 priority: 1.0,
1278 imports: Vec::new(),
1279 imported_by: Vec::new(),
1280 function_calls: Vec::new(),
1281 type_references: Vec::new(),
1282 exported_functions: Vec::new(),
1283 };
1284
1285 assert_eq!(file_info.file_type_display(), "Rust");
1286
1287 let file_info_md = FileInfo {
1288 path: PathBuf::from("README.md"),
1289 relative_path: PathBuf::from("README.md"),
1290 size: 500,
1291 file_type: FileType::Markdown,
1292 priority: 0.6,
1293 imports: Vec::new(),
1294 imported_by: Vec::new(),
1295 function_calls: Vec::new(),
1296 type_references: Vec::new(),
1297 exported_functions: Vec::new(),
1298 };
1299
1300 assert_eq!(file_info_md.file_type_display(), "Markdown");
1301 }
1302
1303 #[test]
1306 fn test_walk_options_from_config_with_include_patterns() {
1307 let config = crate::cli::Config {
1309 prompt: None,
1310 paths: None,
1311 include: Some(vec!["**/*.rs".to_string(), "**/test[0-9].py".to_string()]),
1312 ignore: None,
1313 remote: None,
1314 read_stdin: false,
1315 output_file: None,
1316 max_tokens: None,
1317 llm_tool: crate::cli::LlmTool::default(),
1318 quiet: false,
1319 verbose: 0,
1320 log_format: crate::cli::LogFormat::default(),
1321 config: None,
1322 progress: false,
1323 copy: false,
1324 enhanced_context: false,
1325 trace_imports: false,
1326 include_callers: false,
1327 include_types: false,
1328 semantic_depth: 3,
1329 custom_priorities: vec![],
1330 config_token_limits: None,
1331 config_defaults_max_tokens: None,
1332 };
1333
1334 let options = WalkOptions::from_config(&config).unwrap();
1335
1336 assert_eq!(options.include_patterns, vec!["**/*.rs", "**/test[0-9].py"]);
1338 }
1339
1340 #[test]
1341 fn test_walk_options_from_config_empty_include_patterns() {
1342 let config = crate::cli::Config {
1344 prompt: None,
1345 paths: None,
1346 include: None,
1347 ignore: None,
1348 remote: None,
1349 read_stdin: false,
1350 output_file: None,
1351 max_tokens: None,
1352 llm_tool: crate::cli::LlmTool::default(),
1353 quiet: false,
1354 verbose: 0,
1355 log_format: crate::cli::LogFormat::default(),
1356 config: None,
1357 progress: false,
1358 copy: false,
1359 enhanced_context: false,
1360 trace_imports: false,
1361 include_callers: false,
1362 include_types: false,
1363 semantic_depth: 3,
1364 custom_priorities: vec![],
1365 config_token_limits: None,
1366 config_defaults_max_tokens: None,
1367 };
1368
1369 let options = WalkOptions::from_config(&config).unwrap();
1370 assert_eq!(options.include_patterns, Vec::<String>::new());
1371 }
1372
1373 #[test]
1374 fn test_walk_options_filters_empty_patterns() {
1375 let config = crate::cli::Config {
1377 prompt: None,
1378 paths: None,
1379 include: Some(vec![
1380 "**/*.rs".to_string(),
1381 "".to_string(),
1382 " ".to_string(),
1383 "*.py".to_string(),
1384 ]),
1385 ignore: None,
1386 remote: None,
1387 read_stdin: false,
1388 output_file: None,
1389 max_tokens: None,
1390 llm_tool: crate::cli::LlmTool::default(),
1391 quiet: false,
1392 verbose: 0,
1393 log_format: crate::cli::LogFormat::default(),
1394 config: None,
1395 progress: false,
1396 copy: false,
1397 enhanced_context: false,
1398 trace_imports: false,
1399 include_callers: false,
1400 include_types: false,
1401 semantic_depth: 3,
1402 custom_priorities: vec![],
1403 config_token_limits: None,
1404 config_defaults_max_tokens: None,
1405 };
1406
1407 let options = WalkOptions::from_config(&config).unwrap();
1408
1409 assert_eq!(options.include_patterns, vec!["**/*.rs", "*.py"]);
1411 }
1412
1413 #[test]
1416 fn test_sanitize_pattern_valid_patterns() {
1417 let valid_patterns = vec![
1419 "*.py",
1420 "**/*.rs",
1421 "src/**/*.{js,ts}",
1422 "test[0-9].py",
1423 "**/*{model,service}*.py",
1424 "**/db/**",
1425 "some-file.txt",
1426 "dir/subdir/*.md",
1427 ];
1428
1429 for pattern in valid_patterns {
1430 let result = sanitize_pattern(pattern);
1431 assert!(result.is_ok(), "Pattern '{pattern}' should be valid");
1432 assert_eq!(result.unwrap(), pattern);
1433 }
1434 }
1435
1436 #[test]
1437 fn test_sanitize_pattern_length_limit() {
1438 let short_pattern = "a".repeat(999);
1440 let exact_limit = "a".repeat(1000);
1441 let too_long = "a".repeat(1001);
1442
1443 assert!(sanitize_pattern(&short_pattern).is_ok());
1444 assert!(sanitize_pattern(&exact_limit).is_ok());
1445
1446 let result = sanitize_pattern(&too_long);
1447 assert!(result.is_err());
1448 assert!(result.unwrap_err().to_string().contains("Pattern too long"));
1449 }
1450
1451 #[test]
1452 fn test_sanitize_pattern_null_bytes() {
1453 let patterns_with_nulls = vec!["test\0.py", "\0*.rs", "**/*.js\0", "dir/\0file.txt"];
1455
1456 for pattern in patterns_with_nulls {
1457 let result = sanitize_pattern(pattern);
1458 assert!(
1459 result.is_err(),
1460 "Pattern with null byte should be rejected: {pattern:?}"
1461 );
1462 assert!(result
1463 .unwrap_err()
1464 .to_string()
1465 .contains("invalid characters"));
1466 }
1467 }
1468
1469 #[test]
1470 fn test_sanitize_pattern_control_characters() {
1471 let control_chars = vec![
1473 "test\x01.py", "file\x08.txt", "dir\x0c/*.rs", "test\x1f.md", "*.py\x7f", ];
1479
1480 for pattern in control_chars {
1481 let result = sanitize_pattern(pattern);
1482 assert!(
1483 result.is_err(),
1484 "Pattern with control char should be rejected: {pattern:?}"
1485 );
1486 assert!(result
1487 .unwrap_err()
1488 .to_string()
1489 .contains("invalid characters"));
1490 }
1491 }
1492
1493 #[test]
1494 fn test_sanitize_pattern_absolute_paths() {
1495 let absolute_paths = vec![
1497 "/etc/passwd",
1498 "/usr/bin/*.sh",
1499 "/home/user/file.txt",
1500 "\\Windows\\System32\\*.dll", "\\Program Files\\*",
1502 ];
1503
1504 for pattern in absolute_paths {
1505 let result = sanitize_pattern(pattern);
1506 assert!(
1507 result.is_err(),
1508 "Absolute path should be rejected: {pattern}"
1509 );
1510 assert!(result
1511 .unwrap_err()
1512 .to_string()
1513 .contains("Absolute paths not allowed"));
1514 }
1515 }
1516
1517 #[test]
1518 fn test_sanitize_pattern_directory_traversal() {
1519 let traversal_patterns = vec![
1521 "../../../etc/passwd",
1522 "dir/../../../file.txt",
1523 "**/../secret/*",
1524 "test/../../*.py",
1525 "../config.toml",
1526 "subdir/../../other.rs",
1527 ];
1528
1529 for pattern in traversal_patterns {
1530 let result = sanitize_pattern(pattern);
1531 assert!(
1532 result.is_err(),
1533 "Directory traversal should be rejected: {pattern}"
1534 );
1535 assert!(result
1536 .unwrap_err()
1537 .to_string()
1538 .contains("Directory traversal"));
1539 }
1540 }
1541
1542 #[test]
1543 fn test_sanitize_pattern_edge_cases() {
1544 let result = sanitize_pattern("");
1548 assert!(result.is_ok(), "Empty string should be allowed");
1549
1550 let result = sanitize_pattern(" ");
1552 assert!(result.is_ok(), "Whitespace-only should be allowed");
1553
1554 let result = sanitize_pattern("файл*.txt");
1556 assert!(result.is_ok(), "Unicode should be allowed");
1557
1558 let result = sanitize_pattern("file[!abc]*.{py,rs}");
1560 assert!(result.is_ok(), "Complex glob patterns should be allowed");
1561
1562 let result = sanitize_pattern("file\nname.txt");
1564 assert!(result.is_err(), "Newlines should be rejected");
1565
1566 let result = sanitize_pattern("file\tname.txt");
1567 assert!(result.is_err(), "Tabs should be rejected");
1568 }
1569
1570 #[test]
1571 fn test_sanitize_pattern_boundary_conditions() {
1572 let result = sanitize_pattern("file..name.txt");
1576 assert!(result.is_err(), "Any '..' should be rejected for safety");
1577
1578 let result = sanitize_pattern("**/*.py");
1580 assert!(result.is_ok(), "Recursive glob should be allowed");
1581
1582 let result = sanitize_pattern("valid/*.py/../invalid");
1584 assert!(result.is_err(), "Mixed pattern should be rejected");
1585 }
1586
1587 #[test]
1588 fn test_sanitize_pattern_security_bypass_attempts() {
1589 let result = sanitize_pattern("file%00.txt");
1593 assert!(result.is_ok(), "URL encoding should not be decoded");
1594
1595 let result = sanitize_pattern("file%2e%2e/secret");
1597 assert!(result.is_ok(), "Double encoding should not be decoded");
1598
1599 let result = sanitize_pattern("file\u{002e}\u{002e}/secret");
1601 assert!(result.is_err(), "Unicode dots should be treated as '..'");
1602
1603 let result = sanitize_pattern("legitimate-pattern\0");
1605 assert!(result.is_err(), "Trailing null should be caught");
1606 }
1607
1608 #[test]
1611 fn test_error_handling_classification() {
1612 use crate::utils::error::ContextCreatorError;
1614
1615 let critical_errors = [
1617 ContextCreatorError::FileProcessingError {
1618 path: "test.txt".to_string(),
1619 error: "Permission denied".to_string(),
1620 },
1621 ContextCreatorError::InvalidConfiguration("Invalid pattern".to_string()),
1622 ];
1623
1624 let error_string = critical_errors[0].to_string();
1626 assert!(error_string.contains("Permission denied"));
1627
1628 let error_string = critical_errors[1].to_string();
1630 assert!(error_string.contains("Invalid"));
1631 }
1632
1633 #[test]
1634 fn test_pattern_sanitization_integration() {
1635 use tempfile::TempDir;
1637
1638 let temp_dir = TempDir::new().unwrap();
1639 let root = temp_dir.path();
1640
1641 let options = WalkOptions {
1643 max_file_size: Some(1024),
1644 follow_links: false,
1645 include_hidden: false,
1646 parallel: false,
1647 ignore_file: ".context-creator-ignore".to_string(),
1648 ignore_patterns: vec![],
1649 include_patterns: vec!["../../../etc/passwd".to_string()], custom_priorities: vec![],
1651 filter_binary_files: false,
1652 };
1653
1654 let result = build_walker(root, &options);
1656 assert!(
1657 result.is_err(),
1658 "Directory traversal pattern should be rejected by sanitization"
1659 );
1660
1661 if let Err(e) = result {
1662 let error_msg = e.to_string();
1663 assert!(error_msg.contains("Directory traversal") || error_msg.contains("Invalid"));
1664 }
1665 }
1666
1667 #[test]
1668 fn test_walk_options_filters_binary_files_with_prompt() {
1669 use crate::cli::Config;
1670
1671 let config = Config {
1672 prompt: Some("test prompt".to_string()),
1673 paths: Some(vec![PathBuf::from(".")]),
1674 include: None,
1675 ignore: None,
1676 remote: None,
1677 read_stdin: false,
1678 output_file: None,
1679 max_tokens: None,
1680 llm_tool: crate::cli::LlmTool::Gemini,
1681 quiet: false,
1682 verbose: 0,
1683 log_format: crate::cli::LogFormat::default(),
1684 config: None,
1685 progress: false,
1686 copy: false,
1687 enhanced_context: false,
1688 trace_imports: false,
1689 include_callers: false,
1690 include_types: false,
1691 semantic_depth: 3,
1692 custom_priorities: vec![],
1693 config_token_limits: None,
1694 config_defaults_max_tokens: None,
1695 };
1696
1697 let options = WalkOptions::from_config(&config).unwrap();
1698 assert!(options.filter_binary_files);
1699 }
1700
1701 #[test]
1702 fn test_walk_options_no_binary_filter_without_prompt() {
1703 use crate::cli::Config;
1704
1705 let config = Config {
1706 prompt: None,
1707 paths: Some(vec![PathBuf::from(".")]),
1708 include: None,
1709 ignore: None,
1710 remote: None,
1711 read_stdin: false,
1712 output_file: None,
1713 max_tokens: None,
1714 llm_tool: crate::cli::LlmTool::Gemini,
1715 quiet: false,
1716 verbose: 0,
1717 log_format: crate::cli::LogFormat::default(),
1718 config: None,
1719 progress: false,
1720 copy: false,
1721 enhanced_context: false,
1722 trace_imports: false,
1723 include_callers: false,
1724 include_types: false,
1725 semantic_depth: 3,
1726 custom_priorities: vec![],
1727 config_token_limits: None,
1728 config_defaults_max_tokens: None,
1729 };
1730
1731 let options = WalkOptions::from_config(&config).unwrap();
1732 assert!(!options.filter_binary_files);
1733 }
1734}