use crate::utils::error::CodeDigestError;
use crate::utils::file_ext::FileType;
use anyhow::Result;
use glob::Pattern;
use ignore::{Walk, WalkBuilder};
use rayon::prelude::*;
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct CompiledPriority {
pub matcher: Pattern,
pub weight: f32,
pub original_pattern: String,
}
impl CompiledPriority {
pub fn new(pattern: &str, weight: f32) -> Result<Self, glob::PatternError> {
let matcher = Pattern::new(pattern)?;
Ok(Self { matcher, weight, original_pattern: pattern.to_string() })
}
pub fn try_from_config_priority(
priority: &crate::config::Priority,
) -> Result<Self, glob::PatternError> {
Self::new(&priority.pattern, priority.weight)
}
}
#[derive(Debug, Clone)]
pub struct WalkOptions {
pub max_file_size: Option<usize>,
pub follow_links: bool,
pub include_hidden: bool,
pub parallel: bool,
pub ignore_file: String,
pub ignore_patterns: Vec<String>,
pub include_patterns: Vec<String>,
pub custom_priorities: Vec<CompiledPriority>,
}
impl WalkOptions {
pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
let mut custom_priorities = Vec::new();
for priority in &config.custom_priorities {
match CompiledPriority::try_from_config_priority(priority) {
Ok(compiled) => custom_priorities.push(compiled),
Err(e) => {
return Err(CodeDigestError::ConfigError(format!(
"Invalid glob pattern '{}' in custom priorities: {}",
priority.pattern, e
))
.into());
}
}
}
Ok(WalkOptions {
max_file_size: Some(10 * 1024 * 1024), follow_links: false,
include_hidden: false,
parallel: true,
ignore_file: ".digestignore".to_string(),
ignore_patterns: vec![],
include_patterns: vec![],
custom_priorities,
})
}
}
impl Default for WalkOptions {
fn default() -> Self {
WalkOptions {
max_file_size: Some(10 * 1024 * 1024), follow_links: false,
include_hidden: false,
parallel: true,
ignore_file: ".digestignore".to_string(),
ignore_patterns: vec![],
include_patterns: vec![],
custom_priorities: vec![],
}
}
}
#[derive(Debug, Clone)]
pub struct FileInfo {
pub path: PathBuf,
pub relative_path: PathBuf,
pub size: u64,
pub file_type: FileType,
pub priority: f32,
}
impl FileInfo {
pub fn file_type_display(&self) -> &'static str {
use crate::utils::file_ext::FileType;
match self.file_type {
FileType::Rust => "Rust",
FileType::Python => "Python",
FileType::JavaScript => "JavaScript",
FileType::TypeScript => "TypeScript",
FileType::Go => "Go",
FileType::Java => "Java",
FileType::Cpp => "C++",
FileType::C => "C",
FileType::CSharp => "C#",
FileType::Ruby => "Ruby",
FileType::Php => "PHP",
FileType::Swift => "Swift",
FileType::Kotlin => "Kotlin",
FileType::Scala => "Scala",
FileType::Haskell => "Haskell",
FileType::Markdown => "Markdown",
FileType::Json => "JSON",
FileType::Yaml => "YAML",
FileType::Toml => "TOML",
FileType::Xml => "XML",
FileType::Html => "HTML",
FileType::Css => "CSS",
FileType::Text => "Text",
FileType::Other => "Other",
}
}
}
pub fn walk_directory(root: &Path, options: WalkOptions) -> Result<Vec<FileInfo>> {
if !root.exists() {
return Err(CodeDigestError::InvalidPath(format!(
"Directory does not exist: {}",
root.display()
))
.into());
}
if !root.is_dir() {
return Err(CodeDigestError::InvalidPath(format!(
"Path is not a directory: {}",
root.display()
))
.into());
}
let root = root.canonicalize()?;
let walker = build_walker(&root, &options);
if options.parallel {
walk_parallel(walker, &root, &options)
} else {
walk_sequential(walker, &root, &options)
}
}
fn build_walker(root: &Path, options: &WalkOptions) -> Walk {
let mut builder = WalkBuilder::new(root);
builder
.follow_links(options.follow_links)
.hidden(!options.include_hidden)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.ignore(true)
.parents(true)
.add_custom_ignore_filename(&options.ignore_file);
for pattern in &options.ignore_patterns {
let _ = builder.add_ignore(pattern);
}
for pattern in &options.include_patterns {
let _ = builder.add_ignore(format!("!{pattern}"));
}
builder.build()
}
fn walk_sequential(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
let mut files = Vec::new();
for entry in walker {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
continue;
}
if let Some(file_info) = process_file(path, root, options)? {
files.push(file_info);
}
}
Ok(files)
}
fn walk_parallel(walker: Walk, root: &Path, options: &WalkOptions) -> Result<Vec<FileInfo>> {
use itertools::Itertools;
let root = Arc::new(root.to_path_buf());
let options = Arc::new(options.clone());
let entries: Vec<_> = walker.filter_map(|e| e.ok()).filter(|e| !e.path().is_dir()).collect();
let results: Vec<Result<Option<FileInfo>, CodeDigestError>> = entries
.into_par_iter()
.map(|entry| {
let path = entry.path();
match process_file(path, &root, &options) {
Ok(file_info) => Ok(file_info),
Err(e) => Err(CodeDigestError::FileProcessingError {
path: path.display().to_string(),
error: e.to_string(),
}),
}
})
.collect();
let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition_result();
if !errors.is_empty() {
eprintln!("Warning: {} files could not be processed:", errors.len());
for error in &errors {
eprintln!(" {error}");
}
}
let files: Vec<FileInfo> = successes.into_iter().flatten().collect();
Ok(files)
}
fn process_file(path: &Path, root: &Path, options: &WalkOptions) -> Result<Option<FileInfo>> {
let metadata = match std::fs::metadata(path) {
Ok(meta) => meta,
Err(_) => return Ok(None), };
let size = metadata.len();
if let Some(max_size) = options.max_file_size {
if size > max_size as u64 {
return Ok(None);
}
}
let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
let file_type = FileType::from_path(path);
let priority = calculate_priority(&file_type, &relative_path, &options.custom_priorities);
Ok(Some(FileInfo { path: path.to_path_buf(), relative_path, size, file_type, priority }))
}
fn calculate_priority(
file_type: &FileType,
relative_path: &Path,
custom_priorities: &[CompiledPriority],
) -> f32 {
let base_score = calculate_base_priority(file_type, relative_path);
for priority in custom_priorities {
if priority.matcher.matches_path(relative_path) {
return base_score + priority.weight;
}
}
base_score
}
fn calculate_base_priority(file_type: &FileType, relative_path: &Path) -> f32 {
let mut score: f32 = match file_type {
FileType::Rust => 1.0,
FileType::Python => 0.9,
FileType::JavaScript => 0.9,
FileType::TypeScript => 0.95,
FileType::Go => 0.9,
FileType::Java => 0.85,
FileType::Cpp => 0.85,
FileType::C => 0.8,
FileType::CSharp => 0.85,
FileType::Ruby => 0.8,
FileType::Php => 0.75,
FileType::Swift => 0.85,
FileType::Kotlin => 0.85,
FileType::Scala => 0.8,
FileType::Haskell => 0.75,
FileType::Markdown => 0.6,
FileType::Json => 0.5,
FileType::Yaml => 0.5,
FileType::Toml => 0.5,
FileType::Xml => 0.4,
FileType::Html => 0.4,
FileType::Css => 0.4,
FileType::Text => 0.3,
FileType::Other => 0.2,
};
let path_str = relative_path.to_string_lossy().to_lowercase();
if path_str.contains("main") || path_str.contains("index") {
score *= 1.5;
}
if path_str.contains("lib") || path_str.contains("src") {
score *= 1.2;
}
if path_str.contains("test") || path_str.contains("spec") {
score *= 0.8;
}
if path_str.contains("example") || path_str.contains("sample") {
score *= 0.7;
}
if relative_path.parent().is_none() || relative_path.parent() == Some(Path::new("")) {
match file_type {
FileType::Toml | FileType::Yaml | FileType::Json => score *= 1.3,
_ => {}
}
}
score.min(2.0) }
#[cfg(test)]
mod tests {
use super::*;
use std::fs::{self, File};
use tempfile::TempDir;
#[test]
fn test_walk_directory_basic() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
File::create(root.join("main.rs")).unwrap();
File::create(root.join("lib.rs")).unwrap();
fs::create_dir(root.join("src")).unwrap();
File::create(root.join("src/utils.rs")).unwrap();
let options = WalkOptions::default();
let files = walk_directory(root, options).unwrap();
assert_eq!(files.len(), 3);
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("lib.rs")));
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("src/utils.rs")));
}
#[test]
fn test_walk_with_digestignore() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
File::create(root.join("main.rs")).unwrap();
File::create(root.join("ignored.rs")).unwrap();
fs::write(root.join(".digestignore"), "ignored.rs").unwrap();
let options = WalkOptions::default();
let files = walk_directory(root, options).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].relative_path, PathBuf::from("main.rs"));
}
#[test]
fn test_priority_calculation() {
let rust_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
let test_priority = calculate_priority(&FileType::Rust, Path::new("tests/test.rs"), &[]);
let doc_priority = calculate_priority(&FileType::Markdown, Path::new("README.md"), &[]);
assert!(rust_priority > doc_priority);
assert!(rust_priority > test_priority);
}
#[test]
fn test_file_size_limit() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let large_file = root.join("large.txt");
let data = vec![0u8; 1024 * 1024]; fs::write(&large_file, &data).unwrap();
File::create(root.join("small.txt")).unwrap();
let options = WalkOptions {
max_file_size: Some(512 * 1024), ..Default::default()
};
let files = walk_directory(root, options).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].relative_path, PathBuf::from("small.txt"));
}
#[test]
fn test_walk_empty_directory() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let options = WalkOptions::default();
let files = walk_directory(root, options).unwrap();
assert_eq!(files.len(), 0);
}
#[test]
fn test_walk_options_from_config() {
use crate::cli::Config;
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let config = Config {
prompt: None,
paths: Some(vec![temp_dir.path().to_path_buf()]),
output_file: None,
max_tokens: None,
llm_tool: crate::cli::LlmTool::default(),
quiet: false,
verbose: false,
config: None,
progress: false,
repo: None,
read_stdin: false,
copy: false,
enhanced_context: false,
custom_priorities: vec![],
};
let options = WalkOptions::from_config(&config).unwrap();
assert_eq!(options.max_file_size, Some(10 * 1024 * 1024));
assert!(!options.follow_links);
assert!(!options.include_hidden);
assert!(options.parallel);
assert_eq!(options.ignore_file, ".digestignore");
}
#[test]
fn test_walk_with_custom_options() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
File::create(root.join("main.rs")).unwrap();
File::create(root.join("test.rs")).unwrap();
File::create(root.join("readme.md")).unwrap();
let options =
WalkOptions { ignore_patterns: vec!["*.md".to_string()], ..Default::default() };
let files = walk_directory(root, options).unwrap();
assert!(files.len() >= 2);
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("test.rs")));
}
#[test]
fn test_walk_with_include_patterns() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
File::create(root.join("main.rs")).unwrap();
File::create(root.join("lib.rs")).unwrap();
File::create(root.join("README.md")).unwrap();
let options =
WalkOptions { include_patterns: vec!["*.rs".to_string()], ..Default::default() };
let files = walk_directory(root, options).unwrap();
assert!(files.len() >= 2);
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("lib.rs")));
}
#[test]
fn test_walk_subdirectories() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir(root.join("src")).unwrap();
fs::create_dir(root.join("src").join("utils")).unwrap();
File::create(root.join("main.rs")).unwrap();
File::create(root.join("src").join("lib.rs")).unwrap();
File::create(root.join("src").join("utils").join("helpers.rs")).unwrap();
let options = WalkOptions::default();
let files = walk_directory(root, options).unwrap();
assert_eq!(files.len(), 3);
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("main.rs")));
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("src/lib.rs")));
assert!(files.iter().any(|f| f.relative_path == PathBuf::from("src/utils/helpers.rs")));
}
#[test]
fn test_priority_edge_cases() {
let main_priority = calculate_priority(&FileType::Rust, Path::new("main.rs"), &[]);
let lib_priority = calculate_priority(&FileType::Rust, Path::new("lib.rs"), &[]);
let nested_main_priority =
calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
assert!(main_priority > lib_priority);
assert!(nested_main_priority > lib_priority);
let toml_priority = calculate_priority(&FileType::Toml, Path::new("Cargo.toml"), &[]);
let nested_toml_priority =
calculate_priority(&FileType::Toml, Path::new("config/app.toml"), &[]);
assert!(toml_priority > nested_toml_priority);
}
#[test]
fn test_custom_priority_no_match_returns_base_priority() {
let custom_priorities = [CompiledPriority::new("docs/*.md", 5.0).unwrap()];
let priority =
calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &custom_priorities);
let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
assert_eq!(priority, expected_base);
}
#[test]
fn test_custom_priority_single_match_adds_weight() {
let custom_priorities = [CompiledPriority::new("src/core/mod.rs", 10.0).unwrap()];
let priority =
calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &custom_priorities);
let base_priority = calculate_priority(&FileType::Rust, Path::new("src/core/mod.rs"), &[]);
let expected = base_priority + 10.0;
assert_eq!(priority, expected);
}
#[test]
fn test_custom_priority_glob_pattern_match() {
let custom_priorities = [CompiledPriority::new("src/**/*.rs", 2.5).unwrap()];
let priority = calculate_priority(
&FileType::Rust,
Path::new("src/api/handlers.rs"),
&custom_priorities,
);
let base_priority =
calculate_priority(&FileType::Rust, Path::new("src/api/handlers.rs"), &[]);
let expected = base_priority + 2.5;
assert_eq!(priority, expected);
}
#[test]
fn test_custom_priority_negative_weight() {
let custom_priorities = [CompiledPriority::new("tests/*", -0.5).unwrap()];
let priority = calculate_priority(
&FileType::Rust,
Path::new("tests/test_utils.rs"),
&custom_priorities,
);
let base_priority =
calculate_priority(&FileType::Rust, Path::new("tests/test_utils.rs"), &[]);
let expected = base_priority - 0.5;
assert_eq!(priority, expected);
}
#[test]
fn test_custom_priority_first_match_wins() {
let custom_priorities = [
CompiledPriority::new("src/**/*.rs", 5.0).unwrap(),
CompiledPriority::new("src/main.rs", 100.0).unwrap(),
];
let priority =
calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &custom_priorities);
let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
let expected = base_priority + 5.0;
assert_eq!(priority, expected);
}
#[test]
fn test_custom_priority_zero_weight() {
let custom_priorities = [CompiledPriority::new("*.rs", 0.0).unwrap()];
let priority =
calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &custom_priorities);
let base_priority = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
assert_eq!(priority, base_priority);
}
#[test]
fn test_custom_priority_empty_list() {
let custom_priorities: &[CompiledPriority] = &[];
let priority =
calculate_priority(&FileType::Rust, Path::new("src/main.rs"), custom_priorities);
let expected_base = calculate_priority(&FileType::Rust, Path::new("src/main.rs"), &[]);
assert_eq!(priority, expected_base);
}
#[test]
fn test_config_to_walker_data_flow() {
use crate::config::{ConfigFile, Priority};
use std::fs::{self, File};
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
File::create(root.join("high_priority.rs")).unwrap();
File::create(root.join("normal.txt")).unwrap();
fs::create_dir(root.join("logs")).unwrap();
File::create(root.join("logs/app.log")).unwrap();
let config_file = ConfigFile {
priorities: vec![
Priority { pattern: "*.rs".to_string(), weight: 10.0 },
Priority { pattern: "logs/*.log".to_string(), weight: -5.0 },
],
..Default::default()
};
let mut config = crate::cli::Config {
prompt: None,
paths: Some(vec![root.to_path_buf()]),
repo: None,
read_stdin: false,
output_file: None,
max_tokens: None,
llm_tool: crate::cli::LlmTool::default(),
quiet: false,
verbose: false,
config: None,
progress: false,
copy: false,
enhanced_context: false,
custom_priorities: vec![],
};
config_file.apply_to_cli_config(&mut config);
let walk_options = WalkOptions::from_config(&config).unwrap();
let files = walk_directory(root, walk_options).unwrap();
let rs_file = files
.iter()
.find(|f| f.relative_path.to_string_lossy().contains("high_priority.rs"))
.unwrap();
let log_file =
files.iter().find(|f| f.relative_path.to_string_lossy().contains("app.log")).unwrap();
let txt_file = files
.iter()
.find(|f| f.relative_path.to_string_lossy().contains("normal.txt"))
.unwrap();
let base_rs = calculate_base_priority(&rs_file.file_type, &rs_file.relative_path);
let base_txt = calculate_base_priority(&txt_file.file_type, &txt_file.relative_path);
let base_log = calculate_base_priority(&log_file.file_type, &log_file.relative_path);
assert_eq!(rs_file.priority, base_rs + 10.0);
assert_eq!(log_file.priority, base_log - 5.0);
assert_eq!(txt_file.priority, base_txt);
}
#[test]
fn test_invalid_glob_pattern_in_config() {
use crate::config::{ConfigFile, Priority};
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let config_file = ConfigFile {
priorities: vec![Priority { pattern: "[invalid_glob".to_string(), weight: 5.0 }],
..Default::default()
};
let mut config = crate::cli::Config {
prompt: None,
paths: Some(vec![temp_dir.path().to_path_buf()]),
repo: None,
read_stdin: false,
output_file: None,
max_tokens: None,
llm_tool: crate::cli::LlmTool::default(),
quiet: false,
verbose: false,
config: None,
progress: false,
copy: false,
enhanced_context: false,
custom_priorities: vec![],
};
config_file.apply_to_cli_config(&mut config);
let result = WalkOptions::from_config(&config);
assert!(result.is_err());
let error_msg = result.unwrap_err().to_string();
assert!(error_msg.contains("invalid_glob") || error_msg.contains("Invalid"));
}
#[test]
fn test_empty_custom_priorities_config() {
use crate::config::ConfigFile;
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let config_file = ConfigFile {
priorities: vec![], ..Default::default()
};
let mut config = crate::cli::Config {
prompt: None,
paths: Some(vec![temp_dir.path().to_path_buf()]),
repo: None,
read_stdin: false,
output_file: None,
max_tokens: None,
llm_tool: crate::cli::LlmTool::default(),
quiet: false,
verbose: false,
config: None,
progress: false,
copy: false,
enhanced_context: false,
custom_priorities: vec![],
};
config_file.apply_to_cli_config(&mut config);
let walk_options = WalkOptions::from_config(&config).unwrap();
assert!(walk_directory(temp_dir.path(), walk_options).is_ok());
}
#[test]
fn test_empty_pattern_in_config() {
use crate::config::{ConfigFile, Priority};
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let config_file = ConfigFile {
priorities: vec![Priority { pattern: "".to_string(), weight: 5.0 }],
..Default::default()
};
let mut config = crate::cli::Config {
prompt: None,
paths: Some(vec![temp_dir.path().to_path_buf()]),
repo: None,
read_stdin: false,
output_file: None,
max_tokens: None,
llm_tool: crate::cli::LlmTool::default(),
quiet: false,
verbose: false,
config: None,
progress: false,
copy: false,
enhanced_context: false,
custom_priorities: vec![],
};
config_file.apply_to_cli_config(&mut config);
let result = WalkOptions::from_config(&config);
assert!(result.is_ok());
let walk_options = result.unwrap();
assert_eq!(walk_options.custom_priorities.len(), 1);
}
#[test]
fn test_extreme_weights_in_config() {
use crate::config::{ConfigFile, Priority};
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let config_file = ConfigFile {
priorities: vec![
Priority { pattern: "*.rs".to_string(), weight: f32::MAX },
Priority { pattern: "*.txt".to_string(), weight: f32::MIN },
Priority { pattern: "*.md".to_string(), weight: f32::INFINITY },
Priority { pattern: "*.log".to_string(), weight: f32::NEG_INFINITY },
],
..Default::default()
};
let mut config = crate::cli::Config {
prompt: None,
paths: Some(vec![temp_dir.path().to_path_buf()]),
repo: None,
read_stdin: false,
output_file: None,
max_tokens: None,
llm_tool: crate::cli::LlmTool::default(),
quiet: false,
verbose: false,
config: None,
progress: false,
copy: false,
enhanced_context: false,
custom_priorities: vec![],
};
config_file.apply_to_cli_config(&mut config);
let result = WalkOptions::from_config(&config);
assert!(result.is_ok());
let walk_options = result.unwrap();
assert_eq!(walk_options.custom_priorities.len(), 4);
}
#[test]
fn test_file_info_file_type_display() {
let file_info = FileInfo {
path: PathBuf::from("test.rs"),
relative_path: PathBuf::from("test.rs"),
size: 1000,
file_type: FileType::Rust,
priority: 1.0,
};
assert_eq!(file_info.file_type_display(), "Rust");
let file_info_md = FileInfo {
path: PathBuf::from("README.md"),
relative_path: PathBuf::from("README.md"),
size: 500,
file_type: FileType::Markdown,
priority: 0.6,
};
assert_eq!(file_info_md.file_type_display(), "Markdown");
}
}