use crate::config::Config;
use crate::core_types::FileInfo;
use crate::errors::AppError;
use crate::filtering::{
check_process_last, is_file_type, is_lockfile, passes_extension_filters, passes_size_filter,
};
use anyhow::Result;
use ignore::DirEntry;
use log::{debug, trace, warn};
use std::path::{Path, PathBuf};
use tracing::instrument;
pub(crate) fn process_direntry(
entry_result: Result<DirEntry, ignore::Error>,
config: &Config,
) -> Result<Option<FileInfo>, AppError> {
let entry = match entry_result {
Ok(entry) => entry,
Err(ignore_error) => {
warn!("Walker error: {}", ignore_error);
return Ok(None); }
};
let absolute_path = entry.path().to_path_buf();
trace!("Processing entry: {}", absolute_path.display());
let relative_path = if config.input_is_file {
absolute_path
.file_name()
.map(PathBuf::from)
.unwrap_or_else(|| {
warn!(
"Could not get filename for file input: {}",
absolute_path.display()
);
absolute_path.clone()
})
} else {
absolute_path
.strip_prefix(&config.input_path)
.map(|p| p.to_path_buf())
.unwrap_or_else(|err| {
warn!(
"Failed to strip prefix '{}' from '{}': {}. Using absolute path.",
config.input_path.display(),
absolute_path.display(),
err
);
absolute_path.clone()
})
};
trace!("Calculated relative path: {}", relative_path.display());
let metadata = match entry.metadata() {
Ok(md) => md,
Err(e) => {
warn!(
"Skipping entry '{}' due to metadata error: {}",
absolute_path.display(),
e
);
return Ok(None);
}
};
if !is_file_type(&metadata) {
trace!("Skipping non-file entry: {}", absolute_path.display());
return Ok(None);
}
trace!("Entry is a file: {}", absolute_path.display());
if config.skip_lockfiles && is_lockfile(&absolute_path) {
debug!(
"Skipping lockfile due to --no-lockfiles flag: {}",
absolute_path.display()
);
return Ok(None);
}
trace!("File passed lockfile filter: {}", absolute_path.display());
if !passes_size_filter(&metadata, config) {
debug!(
"Skipping file due to size constraint: {} (Size: {} bytes)",
absolute_path.display(),
metadata.len()
);
return Ok(None);
}
trace!("File passed size filter: {}", absolute_path.display());
if !passes_extension_filters(&absolute_path, config) {
debug!(
"Skipping file due to extension filter: {}",
absolute_path.display()
);
return Ok(None);
}
trace!("File passed extension filter: {}", absolute_path.display());
if !passes_regex_filters(&absolute_path, &relative_path, config)? {
debug!(
"Skipping file due to regex filter: {}",
absolute_path.display()
);
return Ok(None);
}
trace!("File passed regex filters: {}", absolute_path.display());
let (is_last, last_order) =
check_process_last(&relative_path, absolute_path.file_name(), config);
if is_last {
trace!(
"File marked as 'process last' (order {:?}): {}",
last_order,
relative_path.display()
);
}
let file_info = FileInfo {
absolute_path,
relative_path,
size: metadata.len(),
processed_content: None, counts: None, is_process_last: is_last,
process_last_order: last_order,
is_binary: false, };
debug!(
"Entry passed metadata filters: {}",
file_info.relative_path.display()
);
Ok(Some(file_info))
}
#[instrument(level = "debug", skip(config), fields(relative_path = %relative_path.display(), filename = ?path.file_name()))]
fn passes_regex_filters(
path: &Path, relative_path: &Path, config: &Config,
) -> Result<bool, AppError> {
if let Some(path_regex_vec) = &config.path_regex {
let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
let matches = path_regex_vec
.iter()
.any(|re| re.is_match(&relative_path_str));
debug!(
"Checking path regex vector against relative path: regexes={:?}, path={}",
path_regex_vec, relative_path_str,
);
if !matches {
debug!("Path regex vector did not match relative path");
return Ok(false);
}
debug!("Path regex vector matched relative path");
}
if let Some(filename_regex_vec) = &config.filename_regex {
if let Some(filename) = path.file_name() {
let filename_str = filename.to_string_lossy();
let matches = filename_regex_vec
.iter()
.any(|re| re.is_match(&filename_str));
debug!(
"Checking filename regex vector: regexes={:?}, filename={}",
filename_regex_vec, filename_str,
);
if !matches {
debug!("Filename regex vector did not match");
return Ok(false);
}
debug!("Filename regex vector matched");
} else {
debug!("Path has no filename component, failing filename regex match");
return Ok(false);
}
}
Ok(true)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::Config;
use regex::Regex;
use std::fs::{self, File};
use std::path::PathBuf;
use tempfile::tempdir;
fn create_config_with_regex(
path_patterns: Option<Vec<&str>>,
filename_patterns: Option<Vec<&str>>,
) -> Config {
let path_regex = path_patterns.map(|patterns| {
patterns
.iter()
.map(|p| Regex::new(p).unwrap())
.collect::<Vec<_>>()
});
let filename_regex = filename_patterns.map(|patterns| {
patterns
.iter()
.map(|p| Regex::new(p).unwrap())
.collect::<Vec<_>>()
});
let mut config = Config::new_for_test();
config.path_regex = path_regex;
config.filename_regex = filename_regex;
config
}
fn create_paths(base: &Path, relative: &str) -> (PathBuf, PathBuf) {
(base.join(relative), PathBuf::from(relative))
}
#[test]
fn test_regex_no_filters() -> Result<(), AppError> {
let dir = tempdir().unwrap();
let (abs_path, rel_path) = create_paths(dir.path(), "test_file.txt");
File::create(&abs_path).unwrap();
let config = create_config_with_regex(None, None);
assert!(passes_regex_filters(&abs_path, &rel_path, &config)?);
Ok(())
}
#[test]
fn test_regex_path_match() -> Result<(), AppError> {
let dir = tempdir().unwrap();
let (abs_path1, rel_path1) = create_paths(dir.path(), "subdir/match_file.txt");
let (abs_path2, rel_path2) = create_paths(dir.path(), "no_match_file.txt");
fs::create_dir_all(abs_path1.parent().unwrap()).unwrap();
File::create(&abs_path1).unwrap();
File::create(&abs_path2).unwrap();
let config = create_config_with_regex(Some(vec!["^subdir/"]), None);
assert!(passes_regex_filters(&abs_path1, &rel_path1, &config)?);
assert!(!passes_regex_filters(&abs_path2, &rel_path2, &config)?);
Ok(())
}
#[test]
fn test_regex_path_match_windows_style_relative() -> Result<(), AppError> {
let dir = tempdir().unwrap();
let (abs_path, rel_path) = create_paths(dir.path(), "subdir\\match_file.txt");
fs::create_dir_all(abs_path.parent().unwrap()).unwrap();
File::create(&abs_path).unwrap();
let config_fwd = create_config_with_regex(Some(vec!["^subdir/match"]), None);
let config_bwd = create_config_with_regex(Some(vec![r"^subdir\\match"]), None);
assert!(passes_regex_filters(&abs_path, &rel_path, &config_fwd)?);
assert!(!passes_regex_filters(&abs_path, &rel_path, &config_bwd)?);
Ok(())
}
#[test]
fn test_regex_filename_match() -> Result<(), AppError> {
let dir = tempdir().unwrap();
let (abs_path1, rel_path1) = create_paths(dir.path(), "match_this.log");
let (abs_path2, rel_path2) = create_paths(dir.path(), "ignore_this.txt");
File::create(&abs_path1).unwrap();
File::create(&abs_path2).unwrap();
let config = create_config_with_regex(None, Some(vec![r"^match_.*\.log$"]));
assert!(passes_regex_filters(&abs_path1, &rel_path1, &config)?);
assert!(!passes_regex_filters(&abs_path2, &rel_path2, &config)?);
Ok(())
}
#[test]
fn test_regex_path_and_filename_match() -> Result<(), AppError> {
let dir = tempdir().unwrap();
let (abs_path1, rel_path1) = create_paths(dir.path(), "target_dir/target_file.rs"); let (abs_path2, rel_path2) = create_paths(dir.path(), "target_dir/other_file.rs"); let (abs_path3, rel_path3) = create_paths(dir.path(), "other_dir/target_file.rs"); let (abs_path4, rel_path4) = create_paths(dir.path(), "other_dir/another_file.txt"); fs::create_dir_all(abs_path1.parent().unwrap()).unwrap();
fs::create_dir_all(abs_path3.parent().unwrap()).unwrap();
File::create(&abs_path1).unwrap();
File::create(&abs_path2).unwrap();
File::create(&abs_path3).unwrap();
File::create(&abs_path4).unwrap();
let config = create_config_with_regex(
Some(vec!["^target_dir/"]), Some(vec![r"^target_file\.rs$"]), );
assert!(passes_regex_filters(&abs_path1, &rel_path1, &config)?); assert!(!passes_regex_filters(&abs_path2, &rel_path2, &config)?); assert!(!passes_regex_filters(&abs_path3, &rel_path3, &config)?); assert!(!passes_regex_filters(&abs_path4, &rel_path4, &config)?); Ok(())
}
#[test]
fn test_regex_no_filename() -> Result<(), AppError> {
let current_dir_abs = PathBuf::from("."); let current_dir_rel = PathBuf::from(".");
let config_filename = create_config_with_regex(None, Some(vec!["anything"])); assert!(!passes_regex_filters(
¤t_dir_abs,
¤t_dir_rel,
&config_filename
)?);
let config_path = create_config_with_regex(Some(vec![r"^\.$"]), None); assert!(passes_regex_filters(
¤t_dir_abs,
¤t_dir_rel,
&config_path
)?);
Ok(())
}
}