pub mod git;
pub mod walker;
use std::path::PathBuf;
use crate::config::Config;
use crate::error::PctxError;
use crate::filter::patterns::PatternMatcher;
pub struct ScanResult {
pub files: Vec<PathBuf>,
pub errors: Vec<(PathBuf, PctxError)>,
}
pub struct Scanner<'a> {
config: &'a Config,
user_pattern_matcher: PatternMatcher,
base_paths: Vec<PathBuf>,
}
impl<'a> Scanner<'a> {
pub fn new(config: &'a Config) -> Self {
let user_pattern_matcher =
PatternMatcher::new(&config.exclude_patterns, &config.include_patterns);
let base_paths: Vec<PathBuf> = config
.paths
.iter()
.filter_map(|p| dunce::canonicalize(p).ok())
.collect();
Self {
config,
user_pattern_matcher,
base_paths,
}
}
pub fn scan(&self) -> Result<ScanResult, PctxError> {
let mut all_files = Vec::new();
let mut errors = Vec::new();
for path in &self.config.paths {
if !path.exists() {
let err = PctxError::FileNotFound(path.clone());
if self.config.verbose {
eprintln!("Warning: {}", err);
}
errors.push((path.clone(), err));
continue;
}
let canonical = dunce::canonicalize(path).unwrap_or_else(|_| path.clone());
if canonical.is_file() {
if self.should_include(&canonical) {
all_files.push(canonical);
}
} else if canonical.is_dir() {
let files = if self.config.use_gitignore && git::is_inside_git_repo(&canonical) {
match git::scan_git_repo(&canonical, self.config) {
Ok(files) => files,
Err(e) => {
if self.config.verbose {
eprintln!(
"Warning: git scan failed ({}), falling back to directory walk",
e
);
}
walker::scan_directory(&canonical, self.config)?
}
}
} else {
walker::scan_directory(&canonical, self.config)?
};
for file in files {
if self.should_include(&file) {
all_files.push(file);
}
}
}
}
all_files.sort();
all_files.dedup();
Ok(ScanResult {
files: all_files,
errors,
})
}
pub fn scan_paths(&self, paths: Vec<PathBuf>) -> Result<ScanResult, PctxError> {
let mut all_files = Vec::new();
let mut errors = Vec::new();
for path in paths {
if !path.exists() {
if self.config.verbose {
eprintln!("Warning: file not found, skipping: {}", path.display());
}
errors.push((path.clone(), PctxError::FileNotFound(path.clone())));
continue;
}
let canonical = dunce::canonicalize(&path).unwrap_or_else(|_| path.clone());
if canonical.is_file() {
if self.should_include(&canonical) {
all_files.push(canonical);
}
} else if canonical.is_dir() {
let files = if self.config.use_gitignore && git::is_inside_git_repo(&canonical) {
match git::scan_git_repo(&canonical, self.config) {
Ok(files) => files,
Err(e) => {
if self.config.verbose {
eprintln!(
"Warning: git scan failed ({}), falling back to directory walk",
e
);
}
walker::scan_directory(&canonical, self.config)?
}
}
} else {
walker::scan_directory(&canonical, self.config)?
};
for file in files {
if self.should_include(&file) {
all_files.push(file);
}
}
}
}
all_files.sort();
all_files.dedup();
Ok(ScanResult {
files: all_files,
errors,
})
}
fn should_include(&self, path: &PathBuf) -> bool {
let relative = self
.base_paths
.iter()
.find_map(|base| path.strip_prefix(base).ok().map(|p| p.to_path_buf()))
.or_else(|| {
std::env::current_dir()
.ok()
.and_then(|cwd| path.strip_prefix(&cwd).ok().map(|p| p.to_path_buf()))
})
.unwrap_or_else(|| path.clone());
if self.user_pattern_matcher.is_excluded(&relative) {
return false;
}
if !self.user_pattern_matcher.is_included(&relative) {
return false;
}
if let Ok(metadata) = std::fs::metadata(path) {
if metadata.len() > self.config.max_file_size {
return false;
}
}
true
}
}