use std::collections::HashSet;
use std::fmt;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use ignore::overrides::OverrideBuilder;
use ignore::WalkBuilder;
use rayon::prelude::*;
use tracing::{debug, warn};
use crate::error::{Result, BrrrError};
use crate::lang::LanguageRegistry;
const MIN_FILES_FOR_PARALLEL: usize = 15;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ScanErrorKind {
PermissionDenied,
BrokenSymlink,
IoError,
DirectoryLoop,
Other,
}
impl fmt::Display for ScanErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ScanErrorKind::PermissionDenied => write!(f, "permission denied"),
ScanErrorKind::BrokenSymlink => write!(f, "broken symlink"),
ScanErrorKind::IoError => write!(f, "I/O error"),
ScanErrorKind::DirectoryLoop => write!(f, "directory loop"),
ScanErrorKind::Other => write!(f, "other error"),
}
}
}
#[derive(Debug, Clone)]
pub struct ScanError {
pub path: Option<PathBuf>,
pub message: String,
pub kind: ScanErrorKind,
}
impl ScanError {
fn from_ignore_error(err: &ignore::Error) -> Self {
let message = err.to_string();
let path = Self::extract_path(err);
let kind = if let Some(io_err) = err.io_error() {
match io_err.kind() {
std::io::ErrorKind::PermissionDenied => ScanErrorKind::PermissionDenied,
std::io::ErrorKind::NotFound => ScanErrorKind::BrokenSymlink,
_ => ScanErrorKind::IoError,
}
} else {
Self::classify_from_message(&message)
};
Self {
path,
message,
kind,
}
}
fn extract_path(err: &ignore::Error) -> Option<PathBuf> {
match err {
ignore::Error::WithPath { path, .. } => Some(path.clone()),
ignore::Error::WithDepth { err: inner, .. } => Self::extract_path(inner),
ignore::Error::Loop { child, .. } => Some(child.clone()),
_ => None,
}
}
fn classify_from_message(message: &str) -> ScanErrorKind {
let msg_lower = message.to_lowercase();
if msg_lower.contains("loop") || msg_lower.contains("cycle") {
ScanErrorKind::DirectoryLoop
} else if msg_lower.contains("symlink") || msg_lower.contains("link") {
ScanErrorKind::BrokenSymlink
} else if msg_lower.contains("permission") {
ScanErrorKind::PermissionDenied
} else {
ScanErrorKind::Other
}
}
}
impl fmt::Display for ScanError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(ref path) = self.path {
write!(f, "{}: {} ({})", path.display(), self.message, self.kind)
} else {
write!(f, "{} ({})", self.message, self.kind)
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ErrorHandling {
#[default]
CollectAndContinue,
#[allow(dead_code)]
FailFast,
#[allow(dead_code)]
LogOnly,
}
#[derive(Debug, Clone)]
pub struct FileMetadata {
pub path: PathBuf,
pub size: u64,
pub language: Option<String>,
}
impl FileMetadata {
fn from_path_with_language(path: PathBuf, cached_language: Option<String>) -> Option<Self> {
let metadata = fs::metadata(&path).ok()?;
if !metadata.is_file() {
return None;
}
Some(Self {
path,
size: metadata.len(),
language: cached_language,
})
}
}
#[derive(Debug, Clone)]
struct ScannedFile {
path: PathBuf,
language: Option<&'static str>,
}
#[derive(Debug, Clone, Default)]
pub struct ScanConfig {
pub language: Option<String>,
pub extensions: Vec<String>,
pub include_patterns: Vec<String>,
pub exclude_patterns: Vec<String>,
pub follow_symlinks: bool,
pub max_depth: Option<usize>,
pub collect_metadata: bool,
pub parallel: bool,
pub disable_default_excludes: bool,
pub error_handling: ErrorHandling,
pub no_ignore: bool,
}
impl ScanConfig {
pub fn for_language(lang: &str) -> Self {
Self {
language: Some(lang.to_string()),
..Default::default()
}
}
#[allow(dead_code)]
pub fn for_extensions(exts: &[&str]) -> Self {
Self {
extensions: exts.iter().map(|s| (*s).to_string()).collect(),
..Default::default()
}
}
#[allow(dead_code)]
pub fn with_includes(mut self, patterns: &[&str]) -> Self {
self.include_patterns = patterns.iter().map(|s| (*s).to_string()).collect();
self
}
#[allow(dead_code)]
pub fn with_excludes(mut self, patterns: &[&str]) -> Self {
self.exclude_patterns = patterns.iter().map(|s| (*s).to_string()).collect();
self
}
#[allow(dead_code)]
pub fn with_metadata(mut self) -> Self {
self.collect_metadata = true;
self
}
#[allow(dead_code)]
pub fn with_max_depth(mut self, depth: usize) -> Self {
self.max_depth = Some(depth);
self
}
#[allow(dead_code)]
pub fn with_default_excludes_disabled(mut self) -> Self {
self.disable_default_excludes = true;
self
}
#[allow(dead_code)]
pub fn with_error_handling(mut self, handling: ErrorHandling) -> Self {
self.error_handling = handling;
self
}
#[allow(dead_code)]
pub fn fail_on_error(mut self) -> Self {
self.error_handling = ErrorHandling::FailFast;
self
}
#[allow(dead_code)]
pub fn with_no_ignore(mut self, no_ignore: bool) -> Self {
self.no_ignore = no_ignore;
self
}
}
struct ExtensionFilter {
extensions: Option<HashSet<String>>,
}
impl ExtensionFilter {
fn new(extensions: HashSet<String>) -> Self {
Self {
extensions: if extensions.is_empty() {
None
} else {
Some(extensions)
},
}
}
#[inline]
fn matches(&self, path: &Path) -> bool {
match &self.extensions {
Some(exts) => path
.extension()
.and_then(|e| e.to_str())
.map(|e| exts.contains(&e.to_lowercase()))
.unwrap_or(false),
None => true, }
}
#[inline]
fn is_filtering(&self) -> bool {
self.extensions.is_some()
}
}
struct LanguageFilter<'a> {
target_language: Option<&'a str>,
registry: &'a LanguageRegistry,
}
struct LanguageMatchResult {
matches: bool,
language: Option<&'static str>,
}
impl<'a> LanguageFilter<'a> {
fn new(resolved_name: Option<&'a str>, registry: &'a LanguageRegistry) -> Self {
Self {
target_language: resolved_name,
registry,
}
}
#[inline]
fn matches_with_cache(&self, path: &Path, ext_filter: &ExtensionFilter) -> LanguageMatchResult {
let detected = self.registry.detect_language(path);
let language = detected.map(|l| l.name());
let matches = match self.target_language {
Some(target_name) => {
language.is_some_and(|l| l == target_name)
}
None => {
if ext_filter.is_filtering() {
true } else {
language.is_some()
}
}
};
LanguageMatchResult { matches, language }
}
}
#[derive(Debug, Clone)]
pub struct ScanResult {
pub files: Vec<PathBuf>,
pub metadata: Vec<FileMetadata>,
pub total_bytes: u64,
pub by_language: std::collections::HashMap<String, usize>,
pub errors: Vec<ScanError>,
pub warnings: Vec<String>,
}
impl ScanResult {
fn new() -> Self {
Self {
files: Vec::new(),
metadata: Vec::new(),
total_bytes: 0,
by_language: std::collections::HashMap::new(),
errors: Vec::new(),
warnings: Vec::new(),
}
}
fn add_file(&mut self, path: PathBuf) {
self.files.push(path);
}
fn add_metadata(&mut self, meta: FileMetadata) {
self.total_bytes += meta.size;
if let Some(ref lang) = meta.language {
*self.by_language.entry(lang.clone()).or_insert(0) += 1;
}
self.metadata.push(meta);
}
fn add_error(&mut self, error: ScanError) {
self.errors.push(error);
}
fn add_warning(&mut self, warning: String) {
self.warnings.push(warning);
}
pub fn has_errors(&self) -> bool {
!self.errors.is_empty()
}
pub fn error_counts(&self) -> std::collections::HashMap<ScanErrorKind, usize> {
let mut counts = std::collections::HashMap::new();
for error in &self.errors {
*counts.entry(error.kind).or_insert(0) += 1;
}
counts
}
pub fn error_summary(&self) -> String {
if self.errors.is_empty() {
return String::from("No errors");
}
let counts = self.error_counts();
let parts: Vec<String> = counts
.iter()
.map(|(kind, count)| format!("{}: {}", kind, count))
.collect();
format!(
"{} total errors ({})",
self.errors.len(),
parts.join(", ")
)
}
}
pub struct ProjectScanner {
root: PathBuf,
}
impl ProjectScanner {
pub fn new(path: &str) -> Result<Self> {
let root = PathBuf::from(path);
if !root.exists() {
return Err(BrrrError::Io(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("Project root does not exist: {}", path),
)));
}
if !root.is_dir() {
return Err(BrrrError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("Project root is not a directory: {}", path),
)));
}
Ok(Self { root })
}
#[allow(dead_code)]
pub fn root(&self) -> &Path {
&self.root
}
pub fn scan_files(&self) -> Result<Vec<PathBuf>> {
let result = self.scan_files_with_errors()?;
if result.has_errors() {
warn!(
"File scan completed with errors: {}",
result.error_summary()
);
for error in &result.errors {
debug!("Scan error: {}", error);
}
}
Ok(result.files)
}
pub fn scan_files_with_errors(&self) -> Result<ScanResult> {
let registry = LanguageRegistry::global();
let mut result = ScanResult::new();
for entry_result in self.build_walker(None)? {
match entry_result {
Ok(entry) => {
if entry.path().is_file() {
if registry.detect_language(entry.path()).is_some() {
result.add_file(entry.path().to_path_buf());
}
}
}
Err(e) => {
let scan_error = ScanError::from_ignore_error(&e);
warn!("Failed to scan entry: {}", scan_error);
debug!("Error details: {:?}", e);
result.add_error(scan_error);
}
}
}
Ok(result)
}
#[allow(dead_code)]
pub fn scan_language(&self, lang_name: &str) -> Result<Vec<PathBuf>> {
let result = self.scan_language_with_errors(lang_name)?;
if result.has_errors() {
warn!(
"Language scan completed with errors: {}",
result.error_summary()
);
for error in &result.errors {
debug!("Scan error: {}", error);
}
}
Ok(result.files)
}
#[allow(dead_code)]
pub fn scan_language_with_errors(&self, lang_name: &str) -> Result<ScanResult> {
let registry = LanguageRegistry::global();
let target_lang = registry
.get_by_name(lang_name)
.ok_or_else(|| BrrrError::UnsupportedLanguage(lang_name.to_string()))?;
let target_name = target_lang.name();
let mut result = ScanResult::new();
for entry_result in self.build_walker(None)? {
match entry_result {
Ok(entry) => {
if entry.path().is_file() {
if registry
.detect_language(entry.path())
.is_some_and(|l| l.name() == target_name)
{
result.add_file(entry.path().to_path_buf());
}
}
}
Err(e) => {
let scan_error = ScanError::from_ignore_error(&e);
warn!("Failed to scan entry: {}", scan_error);
debug!("Error details: {:?}", e);
result.add_error(scan_error);
}
}
}
Ok(result)
}
#[allow(dead_code)]
pub fn scan_extensions(&self, extensions: &[&str]) -> Result<Vec<PathBuf>> {
let result = self.scan_extensions_with_errors(extensions)?;
if result.has_errors() {
warn!(
"Extension scan completed with errors: {}",
result.error_summary()
);
for error in &result.errors {
debug!("Scan error: {}", error);
}
}
Ok(result.files)
}
#[allow(dead_code)]
pub fn scan_extensions_with_errors(&self, extensions: &[&str]) -> Result<ScanResult> {
let ext_set: std::collections::HashSet<String> = extensions
.iter()
.map(|e| e.trim_start_matches('.').to_lowercase())
.collect();
let mut result = ScanResult::new();
for entry_result in self.build_walker(None)? {
match entry_result {
Ok(entry) => {
if entry.path().is_file() {
let matches = entry
.path()
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext_set.contains(&ext.to_lowercase()))
.unwrap_or(false);
if matches {
result.add_file(entry.path().to_path_buf());
}
}
}
Err(e) => {
let scan_error = ScanError::from_ignore_error(&e);
warn!("Failed to scan entry: {}", scan_error);
debug!("Error details: {:?}", e);
result.add_error(scan_error);
}
}
}
Ok(result)
}
pub fn scan_with_config(&self, config: &ScanConfig) -> Result<ScanResult> {
let registry = LanguageRegistry::global();
let resolved_lang_name: Option<&str> = match &config.language {
Some(lang) => {
let resolved = registry
.get_by_name(lang)
.ok_or_else(|| BrrrError::UnsupportedLanguage(lang.clone()))?;
Some(resolved.name())
}
None => None,
};
let ext_filter = ExtensionFilter::new(
config
.extensions
.iter()
.map(|e| e.trim_start_matches('.').to_lowercase())
.collect(),
);
let lang_filter = LanguageFilter::new(resolved_lang_name, registry);
let walker = self.build_walker_with_config(config)?;
let mut result = ScanResult::new();
let mut filtered: Vec<ScannedFile> = Vec::new();
for entry_result in walker {
match entry_result {
Ok(entry) => {
let path = entry.path();
if path.is_file() && ext_filter.matches(path) {
let match_result = lang_filter.matches_with_cache(path, &ext_filter);
if match_result.matches {
filtered.push(ScannedFile {
path: path.to_path_buf(),
language: match_result.language,
});
}
}
}
Err(e) => {
let scan_error = ScanError::from_ignore_error(&e);
match config.error_handling {
ErrorHandling::FailFast => {
return Err(BrrrError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Scan failed: {}", scan_error),
)));
}
ErrorHandling::CollectAndContinue => {
warn!("Failed to scan entry: {}", scan_error);
debug!("Error details: {:?}", e);
result.add_error(scan_error);
}
ErrorHandling::LogOnly => {
warn!("Failed to scan entry: {}", scan_error);
debug!("Error details: {:?}", e);
}
}
}
}
}
if config.collect_metadata {
let use_parallel = config.parallel && filtered.len() >= MIN_FILES_FOR_PARALLEL;
if use_parallel {
let errors = Mutex::new(Vec::new());
let metadata: Vec<_> = filtered
.par_iter()
.filter_map(|scanned| {
let cached_lang = scanned.language.map(|s| s.to_string());
match FileMetadata::from_path_with_language(scanned.path.clone(), cached_lang) {
Some(meta) => Some(meta),
None => {
let warning = format!(
"Could not collect metadata for: {}",
scanned.path.display()
);
warn!("{}", warning);
if matches!(config.error_handling, ErrorHandling::CollectAndContinue)
{
errors.lock().unwrap().push(warning);
}
None
}
}
})
.collect();
for warning in errors.into_inner().unwrap() {
result.add_warning(warning);
}
for meta in metadata {
result.add_file(meta.path.clone());
result.add_metadata(meta);
}
} else {
for scanned in filtered {
let cached_lang = scanned.language.map(|s| s.to_string());
if let Some(meta) = FileMetadata::from_path_with_language(scanned.path.clone(), cached_lang) {
result.add_file(meta.path.clone());
result.add_metadata(meta);
} else {
let warning =
format!("Could not collect metadata for: {}", scanned.path.display());
warn!("{}", warning);
if matches!(config.error_handling, ErrorHandling::CollectAndContinue) {
result.add_warning(warning);
}
result.add_file(scanned.path);
}
}
}
} else {
result.files = filtered.into_iter().map(|f| f.path).collect();
}
if result.has_errors() {
warn!(
"Scan completed with errors: {}",
result.error_summary()
);
}
Ok(result)
}
#[allow(dead_code)]
pub fn scan_with_metadata(&self) -> Result<Vec<FileMetadata>> {
let config = ScanConfig {
collect_metadata: true,
parallel: true,
..Default::default()
};
Ok(self.scan_with_config(&config)?.metadata)
}
#[allow(dead_code)]
pub fn scan_language_with_metadata(&self, lang_name: &str) -> Result<Vec<FileMetadata>> {
let config = ScanConfig {
language: Some(lang_name.to_string()),
collect_metadata: true,
parallel: true,
..Default::default()
};
Ok(self.scan_with_config(&config)?.metadata)
}
fn build_walker(
&self,
max_depth: Option<usize>,
) -> Result<impl Iterator<Item = std::result::Result<ignore::DirEntry, ignore::Error>>> {
let mut builder = WalkBuilder::new(&self.root);
builder
.hidden(true) .parents(true) .git_ignore(true) .git_global(true) .git_exclude(true) .add_custom_ignore_filename(".brrrignore");
if let Some(depth) = max_depth {
builder.max_depth(Some(depth));
}
let mut overrides = OverrideBuilder::new(&self.root);
let _ = overrides.add("!**/node_modules/**");
let _ = overrides.add("!**/__pycache__/**");
let _ = overrides.add("!**/.venv/**");
let _ = overrides.add("!**/venv/**");
let _ = overrides.add("!**/target/debug/**");
let _ = overrides.add("!**/target/release/**");
let _ = overrides.add("!**/.git/**");
let _ = overrides.add("!**/dist/**");
let _ = overrides.add("!**/build/**");
let _ = overrides.add("!**/*.min.js");
let _ = overrides.add("!**/*.min.css");
if let Ok(built) = overrides.build() {
builder.overrides(built);
}
Ok(builder.build())
}
fn build_walker_with_config(
&self,
config: &ScanConfig,
) -> Result<impl Iterator<Item = std::result::Result<ignore::DirEntry, ignore::Error>>> {
let mut builder = WalkBuilder::new(&self.root);
if config.no_ignore {
builder
.hidden(false) .parents(false) .git_ignore(false) .git_global(false) .git_exclude(false) .ignore(false) .follow_links(config.follow_symlinks);
} else {
builder
.hidden(true)
.parents(true)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.follow_links(config.follow_symlinks)
.add_custom_ignore_filename(".brrrignore");
}
if let Some(depth) = config.max_depth {
builder.max_depth(Some(depth));
}
let mut overrides = OverrideBuilder::new(&self.root);
if !config.disable_default_excludes && !config.no_ignore {
let _ = overrides.add("!**/node_modules/**");
let _ = overrides.add("!**/__pycache__/**");
let _ = overrides.add("!**/.venv/**");
let _ = overrides.add("!**/venv/**");
let _ = overrides.add("!**/target/debug/**");
let _ = overrides.add("!**/target/release/**");
let _ = overrides.add("!**/.git/**");
}
for pattern in &config.exclude_patterns {
let exclude = if pattern.starts_with('!') {
pattern.clone()
} else {
format!("!{}", pattern)
};
let _ = overrides.add(&exclude);
}
for pattern in &config.include_patterns {
let _ = overrides.add(pattern);
}
if let Ok(built) = overrides.build() {
builder.overrides(built);
}
if config.parallel {
builder.threads(0); } else {
builder.threads(1);
}
Ok(builder.build())
}
#[allow(dead_code)]
pub fn estimate_file_count(&self) -> Result<usize> {
let registry = LanguageRegistry::global();
let mut error_count = 0;
let count = self
.build_walker(None)?
.filter_map(|e| match e {
Ok(entry) => Some(entry),
Err(err) => {
debug!("Error during file count: {:?}", err);
error_count += 1;
None
}
})
.filter(|e| {
e.path().is_file() && registry.detect_language(e.path()).is_some()
})
.count();
if error_count > 0 {
warn!(
"File count encountered {} errors (count may be incomplete)",
error_count
);
}
Ok(count)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use tempfile::TempDir;
fn create_test_project() -> TempDir {
let dir = TempDir::new().unwrap();
let root = dir.path();
File::create(root.join("main.py")).unwrap();
File::create(root.join("lib.py")).unwrap();
File::create(root.join("utils.rs")).unwrap();
File::create(root.join("app.ts")).unwrap();
std::fs::create_dir(root.join("src")).unwrap();
File::create(root.join("src/module.py")).unwrap();
File::create(root.join("src/helper.rs")).unwrap();
std::fs::create_dir(root.join("node_modules")).unwrap();
File::create(root.join("node_modules/dep.js")).unwrap();
dir
}
#[test]
fn test_scan_files() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let files = scanner.scan_files().unwrap();
assert!(files.iter().any(|p| p.ends_with("main.py")));
assert!(files.iter().any(|p| p.ends_with("utils.rs")));
assert!(files.iter().any(|p| p.ends_with("app.ts")));
assert!(!files
.iter()
.any(|p| p.to_str().unwrap().contains("node_modules")));
}
#[test]
fn test_scan_language() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let py_files = scanner.scan_language("python").unwrap();
assert_eq!(py_files.len(), 3); assert!(py_files.iter().all(|p| p.extension().unwrap() == "py"));
}
#[test]
fn test_scan_extensions() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let rs_files = scanner.scan_extensions(&[".rs"]).unwrap();
assert_eq!(rs_files.len(), 2); }
#[test]
fn test_scan_with_metadata() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let metadata = scanner.scan_with_metadata().unwrap();
assert!(!metadata.is_empty());
assert!(metadata.iter().all(|m| m.language.is_some()));
}
#[test]
fn test_scan_config() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let config = ScanConfig::for_language("python")
.with_excludes(&["**/src/**"])
.with_metadata();
let result = scanner.scan_with_config(&config).unwrap();
assert_eq!(result.files.len(), 2); assert!(result.by_language.contains_key("python"));
}
#[test]
fn test_unsupported_language_error() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let result = scanner.scan_language("brainfuck");
assert!(matches!(result, Err(BrrrError::UnsupportedLanguage(_))));
}
#[test]
fn test_scan_language_javascript_alias() {
let dir = TempDir::new().unwrap();
let root = dir.path();
File::create(root.join("app.js")).unwrap();
File::create(root.join("utils.mjs")).unwrap();
File::create(root.join("config.cjs")).unwrap();
std::fs::create_dir(root.join("src")).unwrap();
File::create(root.join("src/helper.js")).unwrap();
let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
let js_files = scanner.scan_language("javascript");
assert!(
js_files.is_ok(),
"scan_language('javascript') should work: {:?}",
js_files.err()
);
let files = js_files.unwrap();
assert_eq!(files.len(), 4, "Should find all 4 JS files");
assert!(
scanner.scan_language("js").is_ok(),
"scan_language('js') alias should work"
);
}
#[test]
fn test_nonexistent_path_error() {
let result = ProjectScanner::new("/nonexistent/path/12345");
assert!(matches!(result, Err(BrrrError::Io(_))));
}
#[test]
fn test_disable_default_excludes() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let default_config = ScanConfig::default();
let result = scanner.scan_with_config(&default_config).unwrap();
assert!(
!result
.files
.iter()
.any(|p| p.to_str().unwrap().contains("node_modules")),
"node_modules should be excluded by default"
);
let config_with_disabled = ScanConfig::default().with_default_excludes_disabled();
let result = scanner.scan_with_config(&config_with_disabled).unwrap();
assert!(
result
.files
.iter()
.any(|p| p.to_str().unwrap().contains("node_modules")),
"node_modules should be included when default excludes are disabled"
);
}
#[test]
fn test_disable_default_excludes_with_include_pattern() {
let dir = create_test_project();
std::fs::create_dir_all(dir.path().join("node_modules/vendor")).unwrap();
File::create(dir.path().join("node_modules/vendor/lib.js")).unwrap();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let config = ScanConfig::default()
.with_default_excludes_disabled()
.with_includes(&["**/node_modules/vendor/**"]);
let result = scanner.scan_with_config(&config).unwrap();
assert!(
result
.files
.iter()
.any(|p| p.to_str().unwrap().contains("node_modules/vendor")),
"should find vendored files in node_modules when default excludes are disabled"
);
}
#[test]
fn test_scan_files_with_errors_returns_scan_result() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let result = scanner.scan_files_with_errors().unwrap();
assert!(!result.files.is_empty());
assert!(!result.has_errors());
assert_eq!(result.error_summary(), "No errors");
}
#[test]
fn test_scan_language_with_errors_returns_scan_result() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let result = scanner.scan_language_with_errors("python").unwrap();
assert_eq!(result.files.len(), 3); assert!(!result.has_errors());
}
#[test]
fn test_scan_extensions_with_errors_returns_scan_result() {
let dir = create_test_project();
let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
let result = scanner.scan_extensions_with_errors(&[".rs"]).unwrap();
assert_eq!(result.files.len(), 2); assert!(!result.has_errors());
}
#[test]
fn test_error_handling_config() {
let config = ScanConfig::default().with_error_handling(ErrorHandling::FailFast);
assert_eq!(config.error_handling, ErrorHandling::FailFast);
let config = ScanConfig::default().fail_on_error();
assert_eq!(config.error_handling, ErrorHandling::FailFast);
let config = ScanConfig::default().with_error_handling(ErrorHandling::CollectAndContinue);
assert_eq!(config.error_handling, ErrorHandling::CollectAndContinue);
let config = ScanConfig::default().with_error_handling(ErrorHandling::LogOnly);
assert_eq!(config.error_handling, ErrorHandling::LogOnly);
}
#[test]
fn test_scan_error_kind_display() {
assert_eq!(
format!("{}", ScanErrorKind::PermissionDenied),
"permission denied"
);
assert_eq!(format!("{}", ScanErrorKind::BrokenSymlink), "broken symlink");
assert_eq!(format!("{}", ScanErrorKind::IoError), "I/O error");
assert_eq!(format!("{}", ScanErrorKind::DirectoryLoop), "directory loop");
assert_eq!(format!("{}", ScanErrorKind::Other), "other error");
}
#[test]
fn test_scan_error_display() {
let error_with_path = ScanError {
path: Some(PathBuf::from("/test/file.txt")),
message: "test error".to_string(),
kind: ScanErrorKind::PermissionDenied,
};
assert!(format!("{}", error_with_path).contains("/test/file.txt"));
assert!(format!("{}", error_with_path).contains("test error"));
assert!(format!("{}", error_with_path).contains("permission denied"));
let error_without_path = ScanError {
path: None,
message: "test error".to_string(),
kind: ScanErrorKind::IoError,
};
assert!(format!("{}", error_without_path).contains("test error"));
assert!(format!("{}", error_without_path).contains("I/O error"));
}
#[test]
fn test_scan_result_error_counts() {
let mut result = ScanResult::new();
result.add_error(ScanError {
path: Some(PathBuf::from("/a")),
message: "error 1".to_string(),
kind: ScanErrorKind::PermissionDenied,
});
result.add_error(ScanError {
path: Some(PathBuf::from("/b")),
message: "error 2".to_string(),
kind: ScanErrorKind::PermissionDenied,
});
result.add_error(ScanError {
path: Some(PathBuf::from("/c")),
message: "error 3".to_string(),
kind: ScanErrorKind::BrokenSymlink,
});
let counts = result.error_counts();
assert_eq!(counts.get(&ScanErrorKind::PermissionDenied), Some(&2));
assert_eq!(counts.get(&ScanErrorKind::BrokenSymlink), Some(&1));
assert!(result.has_errors());
let summary = result.error_summary();
assert!(summary.contains("3 total errors"));
}
#[test]
fn test_scan_result_warnings() {
let mut result = ScanResult::new();
result.add_warning("warning 1".to_string());
result.add_warning("warning 2".to_string());
assert_eq!(result.warnings.len(), 2);
assert!(result.warnings.contains(&"warning 1".to_string()));
assert!(result.warnings.contains(&"warning 2".to_string()));
}
#[test]
fn test_scan_extensions_case_insensitive() {
let dir = TempDir::new().unwrap();
let root = dir.path();
File::create(root.join("lowercase.py")).unwrap();
File::create(root.join("uppercase.PY")).unwrap();
File::create(root.join("mixed.Py")).unwrap();
File::create(root.join("mixed2.pY")).unwrap();
File::create(root.join("other.rs")).unwrap();
let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
let py_files = scanner.scan_extensions(&[".py"]).unwrap();
assert_eq!(py_files.len(), 4, "Should match all .py variants regardless of case");
let py_files_upper = scanner.scan_extensions(&[".PY"]).unwrap();
assert_eq!(py_files_upper.len(), 4, "Query with .PY should also match all variants");
let py_files_no_dot = scanner.scan_extensions(&["py"]).unwrap();
assert_eq!(py_files_no_dot.len(), 4, "Query without dot should work");
}
#[test]
fn test_scan_config_extensions_case_insensitive() {
let dir = TempDir::new().unwrap();
let root = dir.path();
File::create(root.join("test1.rs")).unwrap();
File::create(root.join("test2.RS")).unwrap();
File::create(root.join("test3.Rs")).unwrap();
let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
let config = ScanConfig::for_extensions(&[".rs"]);
let result = scanner.scan_with_config(&config).unwrap();
assert_eq!(result.files.len(), 3, "Should match all .rs variants regardless of case");
}
#[test]
fn test_estimate_file_count_accuracy() {
let dir = TempDir::new().unwrap();
let root = dir.path();
File::create(root.join("root1.py")).unwrap();
File::create(root.join("root2.py")).unwrap();
std::fs::create_dir(root.join("subdir1")).unwrap();
File::create(root.join("subdir1/file1.py")).unwrap();
File::create(root.join("subdir1/file2.py")).unwrap();
std::fs::create_dir(root.join("subdir2")).unwrap();
std::fs::create_dir(root.join("subdir2/nested")).unwrap();
File::create(root.join("subdir2/nested/deep.py")).unwrap();
File::create(root.join("readme.txt")).unwrap();
let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
let estimate = scanner.estimate_file_count().unwrap();
let actual_files = scanner.scan_files().unwrap();
assert_eq!(
estimate,
actual_files.len(),
"estimate_file_count() should match scan_files() count exactly.\n\
Estimate: {}, Actual: {}",
estimate,
actual_files.len()
);
assert_eq!(actual_files.len(), 5);
}
}