use std::collections::HashSet;
use std::path::{Path, PathBuf};
use crate::config::CodeGraphConfig;
use crate::language::LanguageKind;
const SOURCE_EXTENSIONS: &[&str] = &["ts", "tsx", "js", "jsx", "rs", "py", "go"];
pub fn walk_project(
root: &Path,
config: &CodeGraphConfig,
verbose: bool,
allowed_languages: Option<&HashSet<LanguageKind>>,
) -> anyhow::Result<Vec<PathBuf>> {
let mut files = Vec::new();
collect_files(root, config, verbose, allowed_languages, &mut files);
Ok(files)
}
pub fn walk_non_parsed_files(
root: &Path,
config: &CodeGraphConfig,
) -> anyhow::Result<Vec<PathBuf>> {
let mut files = Vec::new();
let compiled_excludes = compile_exclude_patterns(config);
let walker = ignore::WalkBuilder::new(root)
.standard_filters(true)
.require_git(false)
.build();
for result in walker {
let entry = match result {
Ok(e) => e,
Err(_) => continue,
};
let path = entry.path();
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
continue;
}
if path_contains_node_modules(path) {
continue;
}
if is_excluded_by_patterns(path, &compiled_excludes) {
continue;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if SOURCE_EXTENSIONS.contains(&ext) {
continue; }
files.push(path.to_path_buf());
}
Ok(files)
}
fn collect_files(
root: &Path,
config: &CodeGraphConfig,
verbose: bool,
allowed_languages: Option<&HashSet<LanguageKind>>,
out: &mut Vec<PathBuf>,
) {
let compiled_excludes = compile_exclude_patterns(config);
let walker = ignore::WalkBuilder::new(root)
.standard_filters(true)
.require_git(false)
.build();
for result in walker {
let entry = match result {
Ok(e) => e,
Err(err) => {
eprintln!("warning: {err}");
continue;
}
};
let path = entry.path();
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
continue;
}
if path_contains_node_modules(path) {
continue;
}
if is_excluded_by_patterns(path, &compiled_excludes) {
continue;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !SOURCE_EXTENSIONS.contains(&ext) {
continue;
}
if let Some(langs) = allowed_languages
&& !langs.iter().any(|lk| lk.matches_extension(ext))
{
continue;
}
if verbose {
eprintln!("{}", path.display());
}
out.push(path.to_path_buf());
}
}
fn path_contains_node_modules(path: &Path) -> bool {
path.components().any(|c| {
c.as_os_str()
.to_str()
.map(|s| s == "node_modules")
.unwrap_or(false)
})
}
fn compile_exclude_patterns(config: &CodeGraphConfig) -> Vec<glob::Pattern> {
match &config.exclude {
Some(patterns) => patterns
.iter()
.filter_map(|p| glob::Pattern::new(p).ok())
.collect(),
None => Vec::new(),
}
}
fn is_excluded_by_patterns(path: &Path, compiled: &[glob::Pattern]) -> bool {
if compiled.is_empty() {
return false;
}
let path_str = path.to_string_lossy();
for pattern in compiled {
if pattern.matches(&path_str) {
return true;
}
for component in path.components() {
if let Some(s) = component.as_os_str().to_str()
&& pattern.matches(s)
{
return true;
}
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn tmp() -> TempDir {
tempfile::tempdir().expect("tempdir")
}
#[test]
fn test_walk_non_parsed_finds_non_source_files() {
let dir = tmp();
fs::write(dir.path().join("main.rs"), "fn main() {}").unwrap();
fs::write(dir.path().join("app.ts"), "export {}").unwrap();
fs::write(dir.path().join("README.md"), "# Hello").unwrap();
fs::write(dir.path().join("config.toml"), "[settings]").unwrap();
fs::write(dir.path().join("Makefile"), "all:").unwrap();
let config = CodeGraphConfig::default();
let files = walk_non_parsed_files(dir.path(), &config).unwrap();
let names: Vec<String> = files
.iter()
.map(|f| f.file_name().unwrap().to_str().unwrap().to_string())
.collect();
assert!(
names.contains(&"README.md".to_string()),
"should find README.md"
);
assert!(
names.contains(&"config.toml".to_string()),
"should find config.toml"
);
assert!(
names.contains(&"Makefile".to_string()),
"should find Makefile"
);
assert!(
!names.contains(&"main.rs".to_string()),
"should NOT find source files"
);
assert!(
!names.contains(&"app.ts".to_string()),
"should NOT find source files"
);
}
#[test]
fn test_walk_non_parsed_respects_exclude_patterns() {
let dir = tmp();
fs::write(dir.path().join("README.md"), "# Hello").unwrap();
fs::write(dir.path().join("config.toml"), "[settings]").unwrap();
let config = CodeGraphConfig {
exclude: Some(vec!["*.toml".to_string()]),
impact: Default::default(),
};
let files = walk_non_parsed_files(dir.path(), &config).unwrap();
let names: Vec<String> = files
.iter()
.map(|f| f.file_name().unwrap().to_str().unwrap().to_string())
.collect();
assert!(
names.contains(&"README.md".to_string()),
"should find non-excluded files"
);
assert!(
!names.contains(&"config.toml".to_string()),
"should exclude *.toml files"
);
}
#[test]
fn test_walk_non_parsed_excludes_node_modules() {
let dir = tmp();
let nm = dir.path().join("node_modules").join("pkg");
fs::create_dir_all(&nm).unwrap();
fs::write(nm.join("package.json"), "{}").unwrap();
fs::write(dir.path().join("README.md"), "# Hello").unwrap();
let config = CodeGraphConfig::default();
let files = walk_non_parsed_files(dir.path(), &config).unwrap();
let names: Vec<String> = files
.iter()
.map(|f| f.to_str().unwrap().to_string())
.collect();
assert!(
!names.iter().any(|n| n.contains("node_modules")),
"should not include node_modules files"
);
}
#[test]
fn test_walk_project_returns_only_source_files() {
let dir = tmp();
fs::write(dir.path().join("main.rs"), "fn main() {}").unwrap();
fs::write(dir.path().join("README.md"), "# Hello").unwrap();
let config = CodeGraphConfig::default();
let files = walk_project(dir.path(), &config, false, None).unwrap();
let names: Vec<String> = files
.iter()
.map(|f| f.file_name().unwrap().to_str().unwrap().to_string())
.collect();
assert!(
names.contains(&"main.rs".to_string()),
"should find source files"
);
assert!(
!names.contains(&"README.md".to_string()),
"should NOT find non-source files"
);
}
}