1use crate::utils::error::CodeDigestError;
4use crate::utils::file_ext::FileType;
5use anyhow::Result;
6use ignore::{Walk, WalkBuilder};
7use rayon::prelude::*;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10
11#[derive(Debug, Clone)]
13pub struct WalkOptions {
14 pub max_file_size: Option<usize>,
16 pub follow_links: bool,
18 pub include_hidden: bool,
20 pub parallel: bool,
22 pub ignore_file: String,
24 pub ignore_patterns: Vec<String>,
26 pub include_patterns: Vec<String>,
28}
29
30impl WalkOptions {
31 pub fn from_config(_config: &crate::cli::Config) -> Result<Self> {
33 Ok(WalkOptions {
34 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
36 include_hidden: false,
37 parallel: true,
38 ignore_file: ".digestignore".to_string(),
39 ignore_patterns: vec![],
40 include_patterns: vec![],
41 })
42 }
43}
44
45impl Default for WalkOptions {
46 fn default() -> Self {
47 WalkOptions {
48 max_file_size: Some(10 * 1024 * 1024), follow_links: false,
50 include_hidden: false,
51 parallel: true,
52 ignore_file: ".digestignore".to_string(),
53 ignore_patterns: vec![],
54 include_patterns: vec![],
55 }
56 }
57}
58
59#[derive(Debug, Clone)]
61pub struct FileInfo {
62 pub path: PathBuf,
64 pub relative_path: PathBuf,
66 pub size: u64,
68 pub file_type: FileType,
70 pub priority: f32,
72}
73
74impl FileInfo {
75 pub fn file_type_display(&self) -> &'static str {
77 use crate::utils::file_ext::FileType;
78 match self.file_type {
79 FileType::Rust => "Rust",
80 FileType::Python => "Python",
81 FileType::JavaScript => "JavaScript",
82 FileType::TypeScript => "TypeScript",
83 FileType::Go => "Go",
84 FileType::Java => "Java",
85 FileType::Cpp => "C++",
86 FileType::C => "C",
87 FileType::CSharp => "C#",
88 FileType::Ruby => "Ruby",
89 FileType::Php => "PHP",
90 FileType::Swift => "Swift",
91 FileType::Kotlin => "Kotlin",
92 FileType::Scala => "Scala",
93 FileType::Haskell => "Haskell",
94 FileType::Markdown => "Markdown",
95 FileType::Json => "JSON",
96 FileType::Yaml => "YAML",
97 FileType::Toml => "TOML",
98 FileType::Xml => "XML",
99 FileType::Html => "HTML",
100 FileType::Css => "CSS",
101 FileType::Text => "Text",
102 FileType::Other => "Other",
103 }
104 }
105}
106
107pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
109 if !root.exists() {
110 return Err(CodeDigestError::InvalidPath(format!(
111 "Directory does not exist: {}",
112 root.display()
113 ))
114 .into());
115 }
116
117 if !root.is_dir() {
118 return Err(CodeDigestError::InvalidPath(format!(
119 "Path is not a directory: {}",
120 root.display()
121 ))
122 .into());
123 }
124
125 let root = root.canonicalize()?;
126 let walker = build_walker(&root, &options);
127
128 if options.parallel {
129 walk_parallel(walker, &root, &options)
130 } else {
131 walk_sequential(walker, &root, &options)
132 }
133}
134
135fn build_walker(root: &Path, options: &WalkOptions) -> Walk {
137 let mut builder = WalkBuilder::new(root);
138
139 builder
141 .follow_links(options.follow_links)
142 .hidden(!options.include_hidden)
143 .git_ignore(true)
144 .git_global(true)
145 .git_exclude(true)
146 .ignore(true)
147 .parents(true)
148 .add_custom_ignore_filename(&options.ignore_file);
149
150 for pattern in &options.ignore_patterns {
152 let _ = builder.add_ignore(pattern);
153 }
154
155 for pattern in &options.include_patterns {
157 let _ = builder.add_ignore(format!("!{pattern}"));
158 }
159
160 builder.build()
161}
162
163fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
165 let mut files = Vec::new();
166
167 for entry in walker {
168 let entry = entry?;
169 let path = entry.path();
170
171 if path.is_dir() {
173 continue;
174 }
175
176 if let Some(file_info) = process_file(path, root, options)? {
178 files.push(file_info);
179 }
180 }
181
182 Ok(files)
183}
184
185fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
187 let root = Arc::new(root.to_path_buf());
188 let options = Arc::new(options.clone());
189
190 let entries: Vec<_> = walker.filter_map(|e| e.ok()).filter(|e| !e.path().is_dir()).collect();
192
193 let files: Vec<_> = entries
195 .into_par_iter()
196 .filter_map(|entry| {
197 let path = entry.path();
198 process_file(path, &root, &options).ok().flatten()
199 })
200 .collect();
201
202 Ok(files)
203}
204
205fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
207 let metadata = match std::fs::metadata(path) {
209 Ok(meta) => meta,
210 Err(_) => return Ok(None), };
212
213 let size = metadata.len();
214
215 if let Some(max_size) = options.max_file_size {
217 if size > max_size as u64 {
218 return Ok(None);
219 }
220 }
221
222 let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
224
225 let file_type = FileType::from_path(path);
227
228 let priority = calculate_priority(&file_type, &relative_path);
230
231 Ok(Some(FileInfo { path: path.to_path_buf(), relative_path, size, file_type, priority }))
232}
233
234fn calculate_priority(file_type: &FileType, relative_path: &Path) -> f32 {
236 let mut score: f32 = match file_type {
237 FileType::Rust => 1.0,
238 FileType::Python => 0.9,
239 FileType::JavaScript => 0.9,
240 FileType::TypeScript => 0.95,
241 FileType::Go => 0.9,
242 FileType::Java => 0.85,
243 FileType::Cpp => 0.85,
244 FileType::C => 0.8,
245 FileType::CSharp => 0.85,
246 FileType::Ruby => 0.8,
247 FileType::Php => 0.75,
248 FileType::Swift => 0.85,
249 FileType::Kotlin => 0.85,
250 FileType::Scala => 0.8,
251 FileType::Haskell => 0.75,
252 FileType::Markdown => 0.6,
253 FileType::Json => 0.5,
254 FileType::Yaml => 0.5,
255 FileType::Toml => 0.5,
256 FileType::Xml => 0.4,
257 FileType::Html => 0.4,
258 FileType::Css => 0.4,
259 FileType::Text => 0.3,
260 FileType::Other => 0.2,
261 };
262
263 let path_str = relative_path.to_string_lossy().to_lowercase();
265 if path_str.contains("main") || path_str.contains("index") {
266 score *= 1.5;
267 }
268 if path_str.contains("lib") || path_str.contains("src") {
269 score *= 1.2;
270 }
271 if path_str.contains("test") || path_str.contains("spec") {
272 score *= 0.8;
273 }
274 if path_str.contains("example") || path_str.contains("sample") {
275 score *= 0.7;
276 }
277
278 if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
280 match file_type {
281 FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
282 _ => {}
283 }
284 }
285
286 score.min(2.0) }
288
289#[cfg(test)]
290mod tests {
291 use super::*;
292 use std::fs::{self, File};
293 use tempfile::TempDir;
294
295 #[test]
296 fn test_walk_directory_basic() {
297 let temp_dir = TempDir::new().unwrap();
298 let root = temp_dir.path();
299
300 File::create(root.join("main.rs")).unwrap();
302 File::create(root.join("lib.rs")).unwrap();
303 fs::create_dir(root.join("src")).unwrap();
304 File::create(root.join("src/utils.rs")).unwrap();
305
306 let options = WalkOptions::default();
307 let files = walk_directory(root, options).unwrap();
308
309 assert_eq!(files.len(), 3);
310 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
311 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("lib.rs")));
312 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
313 }
314
315 #[test]
316 fn test_walk_with_digestignore() {
317 let temp_dir = TempDir::new().unwrap();
318 let root = temp_dir.path();
319
320 File::create(root.join("main.rs")).unwrap();
322 File::create(root.join("ignored.rs")).unwrap();
323
324 fs::write(root.join(".digestignore"), "ignored.rs").unwrap();
326
327 let options = WalkOptions::default();
328 let files = walk_directory(root, options).unwrap();
329
330 assert_eq!(files.len(), 1);
331 assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
332 }
333
334 #[test]
335 fn test_priority_calculation() {
336 let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"));
337 let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"));
338 let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"));
339
340 assert!(rust_priority > doc_priority);
341 assert!(rust_priority > test_priority);
342 }
343
344 #[test]
345 fn test_file_size_limit() {
346 let temp_dir = TempDir::new().unwrap();
347 let root = temp_dir.path();
348
349 let large_file = root.join("large.txt");
351 let data = vec![0u8; 1024 * 1024]; fs::write(&large_file, &data).unwrap();
353
354 File::create(root.join("small.txt")).unwrap();
356
357 let options = WalkOptions {
358 max_file_size: Some(512 * 1024), ..Default::default()
360 };
361
362 let files = walk_directory(root, options).unwrap();
363
364 assert_eq!(files.len(), 1);
365 assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
366 }
367
368 #[test]
369 fn test_walk_empty_directory() {
370 let temp_dir = TempDir::new().unwrap();
371 let root = temp_dir.path();
372
373 let options = WalkOptions::default();
374 let files = walk_directory(root, options).unwrap();
375
376 assert_eq!(files.len(), 0);
377 }
378
379 #[test]
380 fn test_walk_options_from_config() {
381 use crate::cli::Config;
382 use tempfile::TempDir;
383
384 let temp_dir = TempDir::new().unwrap();
385 let config = Config {
386 prompt: None,
387 directories: vec![temp_dir.path().to_path_buf()],
388 output_file: None,
389 max_tokens: None,
390 llm_tool: crate::cli::LlmTool::default(),
391 quiet: false,
392 verbose: false,
393 config: None,
394 progress: false,
395 repo: None,
396 };
397
398 let options = WalkOptions::from_config(&config).unwrap();
399
400 assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
401 assert!(!options.follow_links);
402 assert!(!options.include_hidden);
403 assert!(options.parallel);
404 assert_eq!(options.ignore_file, ".digestignore");
405 }
406
407 #[test]
408 fn test_walk_with_custom_options() {
409 let temp_dir = TempDir::new().unwrap();
410 let root = temp_dir.path();
411
412 File::create(root.join("main.rs")).unwrap();
414 File::create(root.join("test.rs")).unwrap();
415 File::create(root.join("readme.md")).unwrap();
416
417 let options =
418 WalkOptions { ignore_patterns: vec!["*.md".to_string()], ..Default::default() };
419
420 let files = walk_directory(root, options).unwrap();
421
422 assert!(files.len() >= 2);
424 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
425 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("test.rs")));
426 }
427
428 #[test]
429 fn test_walk_with_include_patterns() {
430 let temp_dir = TempDir::new().unwrap();
431 let root = temp_dir.path();
432
433 File::create(root.join("main.rs")).unwrap();
435 File::create(root.join("lib.rs")).unwrap();
436 File::create(root.join("README.md")).unwrap();
437
438 let options =
439 WalkOptions { include_patterns: vec!["*.rs".to_string()], ..Default::default() };
440
441 let files = walk_directory(root, options).unwrap();
442
443 assert!(files.len() >= 2);
445 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
446 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("lib.rs")));
447 }
448
449 #[test]
450 fn test_walk_subdirectories() {
451 let temp_dir = TempDir::new().unwrap();
452 let root = temp_dir.path();
453
454 fs::create_dir(root.join("src")).unwrap();
456 fs::create_dir(root.join("src").join("utils")).unwrap();
457 File::create(root.join("main.rs")).unwrap();
458 File::create(root.join("src").join("lib.rs")).unwrap();
459 File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
460
461 let options = WalkOptions::default();
462 let files = walk_directory(root, options).unwrap();
463
464 assert_eq!(files.len(), 3);
465 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
466 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
467 assert!(files.iter().any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
468 }
469
470 #[test]
471 fn test_priority_edge_cases() {
472 let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"));
474 let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"));
475 let nested_main_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"));
476
477 assert!(main_priority > lib_priority);
478 assert!(nested_main_priority > lib_priority);
479
480 let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"));
482 let nested_toml_priority =
483 calculate_priority(&FileType::Toml, Path::new("config/app.toml"));
484
485 assert!(toml_priority > nested_toml_priority);
486 }
487
488 #[test]
489 fn test_file_info_file_type_display() {
490 let file_info = FileInfo {
491 path: PathBuf::from("test.rs"),
492 relative_path: PathBuf::from("test.rs"),
493 size: 1000,
494 file_type: FileType::Rust,
495 priority: 1.0,
496 };
497
498 assert_eq!(file_info.file_type_display(), "Rust");
499
500 let file_info_md = FileInfo {
501 path: PathBuf::from("README.md"),
502 relative_path: PathBuf::from("README.md"),
503 size: 500,
504 file_type: FileType::Markdown,
505 priority: 0.6,
506 };
507
508 assert_eq!(file_info_md.file_type_display(), "Markdown");
509 }
510}