use anyhow::{anyhow, Result};
use std::path::{Path, PathBuf};
#[allow(dead_code)] pub fn parse_extensions(s: &str) -> Result<Vec<String>> {
let trimmed = s.trim();
if trimmed.is_empty() {
return Err(anyhow!("extensions string is empty"));
}
let extensions: Vec<String> = trimmed
.split(',')
.map(str::trim)
.filter(|ext| !ext.is_empty())
.map(|ext| {
if ext.starts_with('.') {
ext.to_string()
} else {
format!(".{ext}")
}
})
.collect();
if extensions.is_empty() {
return Err(anyhow!("no valid extensions found"));
}
Ok(extensions)
}
pub struct FileDiscovery {
extensions: Vec<String>,
}
impl FileDiscovery {
#[must_use]
pub const fn new(extensions: Vec<String>) -> Self {
Self { extensions }
}
pub fn discover(&self, paths: &[PathBuf]) -> Result<Vec<PathBuf>> {
let mut results = Vec::new();
for path in paths {
if path.is_file() {
if self.matches_extension(path) {
results.push(path.clone());
}
} else if path.is_dir() {
self.walk_directory(path, &mut results)?;
} else {
return Err(anyhow!(
"path does not exist or is not accessible: {}",
path.display()
));
}
}
Ok(results)
}
fn matches_extension(&self, path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| {
let ext_with_dot = if ext.starts_with('.') {
ext.to_string()
} else {
format!(".{ext}")
};
self.extensions.contains(&ext_with_dot)
})
}
fn walk_directory(&self, dir: &Path, results: &mut Vec<PathBuf>) -> Result<()> {
self.walk_directory_recursive(dir, results)
}
fn walk_directory_recursive(&self, dir: &Path, results: &mut Vec<PathBuf>) -> Result<()> {
const SKIP_DIRS: &[&str] = &[
"target", "node_modules", "vendor", "dist", "build", ".git", ".svn", ".hg", ];
if !dir.is_dir() {
return Ok(());
}
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if entry.file_type()?.is_symlink() {
continue;
}
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
if name.starts_with('.') {
continue;
}
if SKIP_DIRS.contains(&name) {
continue;
}
if path.is_dir() {
self.walk_directory_recursive(&path, results)?;
} else if path.is_file() && self.matches_extension(&path) {
results.push(path);
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_parse_extensions_single() {
let result = parse_extensions(".md").expect("Failed to parse");
assert_eq!(result, vec![".md"]);
}
#[test]
fn test_parse_extensions_normalizes_without_dot() {
let result = parse_extensions("md").expect("Failed to parse");
assert_eq!(result, vec![".md"]);
}
#[test]
fn test_empty_directory() {
let temp_dir = TempDir::new().unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 0);
}
#[test]
fn test_single_markdown_file() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.md");
fs::write(&file_path, "# Test").unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0], file_path);
}
#[test]
fn test_skips_hidden_directories() {
let temp_dir = TempDir::new().unwrap();
let hidden_dir = temp_dir.path().join(".git");
fs::create_dir(&hidden_dir).unwrap();
fs::write(hidden_dir.join("file.md"), "content").unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 0, "Should skip hidden directories");
}
#[test]
fn test_skips_target_directory() {
let temp_dir = TempDir::new().unwrap();
let target_dir = temp_dir.path().join("target");
fs::create_dir(&target_dir).unwrap();
fs::write(target_dir.join("file.md"), "content").unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 0, "Should skip target directory");
}
#[test]
fn test_skips_node_modules() {
let temp_dir = TempDir::new().unwrap();
let nm_dir = temp_dir.path().join("node_modules");
fs::create_dir(&nm_dir).unwrap();
fs::write(nm_dir.join("readme.md"), "content").unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 0, "Should skip node_modules");
}
#[test]
fn test_recursive_search() {
let temp_dir = TempDir::new().unwrap();
let subdir = temp_dir.path().join("docs");
fs::create_dir(&subdir).unwrap();
let file_path = subdir.join("readme.md");
fs::write(&file_path, "# README").unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0], file_path);
}
#[test]
fn test_filters_non_markdown() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("test.md"), "markdown").unwrap();
fs::write(temp_dir.path().join("test.txt"), "text").unwrap();
fs::write(temp_dir.path().join("test.rs"), "rust").unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 1, "Should only find .md files");
}
#[test]
fn test_multiple_extensions() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("test.md"), "markdown").unwrap();
fs::write(temp_dir.path().join("test.txt"), "text").unwrap();
fs::write(temp_dir.path().join("test.rs"), "rust").unwrap();
let discovery = FileDiscovery::new(vec![".md".to_string(), ".txt".to_string()]);
let results = discovery
.discover(&[temp_dir.path().to_path_buf()])
.unwrap();
assert_eq!(results.len(), 2, "Should find .md and .txt files");
}
}