use std::path::{Path, PathBuf};
#[rustfmt::skip]
pub const SOURCE_EXTS: &[&str] = &[
"rs", "py", "ts", "tsx", "js", "jsx", "mjs", "cjs", "go", "java",
"c", "cpp", "h", "hpp", "cs", "rb", "php", "swift", "kt", "kts",
"scala", "groovy", "gradle", "sh",
"md", "mdx",
"yaml", "yml", "toml", "json", "xml",
"txt", "log",
];
pub const SKIP_DIRS: &[&str] = &[
".git",
"target",
"node_modules",
".venv",
"venv",
"__pycache__",
"dist",
"build",
".build",
".next",
".nuxt",
".svelte-kit",
"vendor",
".cargo",
".npm",
".cache",
".pnpm-store",
".yarn",
".rustup",
".tox",
".bundle",
"coverage",
".nyc_output",
".pytest_cache",
".mypy_cache",
".ruff_cache",
".gradle",
".mvn",
".m2",
"out",
"bin",
"classes",
"generated",
"generated-sources",
"generated-test-sources",
"cdk.out",
"cdk.out2",
".aws-sam",
".turbo",
".idea",
".vscode",
".claude",
".claude-mpm",
".open-mpm",
".cursor",
".aider",
".continue",
".obsidian",
"fixtures",
"__fixtures__",
"testdata",
"test-data",
"test_data",
"testresources",
"test_resources",
];
pub const SKIP_FILES: &[&str] = &[
"Cargo.lock",
"package-lock.json",
"yarn.lock",
"pnpm-lock.yaml",
"poetry.lock",
"Pipfile.lock",
"Gemfile.lock",
"composer.lock",
"go.sum",
];
pub const DOC_EXCLUDE_EXTS: &[&str] = &["md", "mdx", "rst", "adoc", "txt"];
pub const DOC_EXCLUDE_BASENAME_SUBSTRINGS: &[&str] = &["changelog", "license", "notice"];
#[derive(Debug, Clone, Copy)]
pub struct WalkOptions {
pub include_docs: bool,
pub respect_gitignore: bool,
}
impl Default for WalkOptions {
fn default() -> Self {
Self {
include_docs: true,
respect_gitignore: true,
}
}
}
pub fn is_default_doc_excluded(path: &Path) -> bool {
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
return false;
};
let name_lower = name.to_ascii_lowercase();
if let Some(ext) = path
.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_ascii_lowercase())
{
if DOC_EXCLUDE_EXTS.contains(&ext.as_str()) {
return true;
}
}
for needle in DOC_EXCLUDE_BASENAME_SUBSTRINGS {
if name_lower.contains(needle) {
return true;
}
}
false
}
const BINARY_EXTS: &[&str] = &[
"wasm", "so", "dylib", "dll", "exe", "pdf", "png", "jpg", "jpeg", "gif", "ico", "webp", "zip",
"tar", "gz", "bz2", "xz", "7z", "rar", "ttf", "otf", "woff", "woff2", "mp3", "mp4", "mov",
"avi", "mkv", "db", "sqlite", "lock", "pyc", "class", "o", "a",
];
pub const MAX_FILE_BYTES: u64 = 1_048_576;
const MAX_LINE_LEN_FOR_MINIFIED: usize = 500;
const MIN_LINES_FOR_READABLE_JS: usize = 5;
pub fn should_skip_path(path: &Path) -> bool {
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => return true, };
if SKIP_FILES.contains(&file_name) {
return true;
}
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_ascii_lowercase();
if BINARY_EXTS.iter().any(|b| *b == ext) {
return true;
}
if file_name.ends_with(".min.js")
|| file_name.ends_with(".min.css")
|| file_name.ends_with(".bundle.js")
|| file_name.ends_with(".bundle.css")
|| file_name.ends_with(".chunk.js")
{
return true;
}
if ext == "js" || ext == "css" {
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
if is_hashed_bundle_stem(stem) {
return true;
}
}
}
if let Ok(meta) = std::fs::metadata(path) {
if meta.len() > MAX_FILE_BYTES {
return true;
}
}
false
}
pub fn path_in_skipped_dir(path: &Path) -> bool {
path.components().any(|c| {
c.as_os_str()
.to_str()
.is_some_and(|name| SKIP_DIRS.contains(&name))
})
}
pub fn should_skip_content(path: &Path, content: &str) -> bool {
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_ascii_lowercase();
if !matches!(ext.as_str(), "js" | "mjs" | "cjs") {
return false;
}
let line_count = content.lines().count();
if line_count >= MIN_LINES_FOR_READABLE_JS {
return false;
}
content.lines().any(|l| l.len() > MAX_LINE_LEN_FOR_MINIFIED)
}
fn is_hashed_bundle_stem(stem: &str) -> bool {
let Some(dash_pos) = stem.rfind('-') else {
return false;
};
let hash_part = &stem[dash_pos + 1..];
hash_part.len() >= 8 && hash_part.chars().all(|c| c.is_ascii_alphanumeric())
}
pub struct WalkResult {
pub files: Vec<PathBuf>,
pub skipped_dirs: usize,
}
pub fn walk_source_files(root: &Path) -> WalkResult {
walk_source_files_with_options(root, WalkOptions::default())
}
pub fn walk_source_files_with_options(root: &Path, opts: WalkOptions) -> WalkResult {
let mut files = Vec::new();
let mut skipped_dirs = 0usize;
let canonical_root = std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
let mut builder = ignore::WalkBuilder::new(&canonical_root);
builder
.follow_links(true)
.hidden(false)
.standard_filters(opts.respect_gitignore)
.git_ignore(opts.respect_gitignore)
.git_exclude(opts.respect_gitignore)
.git_global(opts.respect_gitignore)
.ignore(opts.respect_gitignore)
.parents(opts.respect_gitignore)
.require_git(false);
for entry in builder.build() {
let entry = match entry {
Ok(e) => e,
Err(_) => {
skipped_dirs += 1;
continue;
}
};
let is_file = entry.file_type().is_some_and(|ft| ft.is_file());
if !is_file {
continue;
}
let path = entry.path();
if path
.components()
.filter_map(|c| c.as_os_str().to_str())
.any(|seg| SKIP_DIRS.contains(&seg))
{
continue;
}
let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
continue;
};
if !SOURCE_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
continue;
}
if should_skip_path(path) {
continue;
}
if !opts.include_docs && is_default_doc_excluded(path) {
continue;
}
files.push(path.to_path_buf());
}
WalkResult {
files,
skipped_dirs,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn finds_source_files_and_skips_dirs() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("target/debug")).unwrap();
fs::create_dir_all(root.join("node_modules/foo")).unwrap();
fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
fs::write(root.join("src/lib.py"), "x = 1").unwrap();
fs::write(root.join("README.md"), "# hi").unwrap();
fs::write(root.join("target/debug/build.o"), b"\0\0").unwrap();
fs::write(root.join("node_modules/foo/index.js"), "// no").unwrap();
fs::write(root.join("binary.bin"), b"\0\0").unwrap();
let result = walk_source_files(root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(names.contains(&"main.rs".to_string()));
assert!(names.contains(&"lib.py".to_string()));
assert!(
names.contains(&"README.md".to_string()),
"Markdown included by default (issue #118)"
);
assert!(!names.contains(&"build.o".to_string()));
assert!(!names.contains(&"index.js".to_string()));
assert!(!names.contains(&"binary.bin".to_string()));
}
#[test]
fn test_is_default_doc_excluded() {
assert!(is_default_doc_excluded(Path::new("README.md")));
assert!(is_default_doc_excluded(Path::new("docs/guide.mdx")));
assert!(is_default_doc_excluded(Path::new("docs/intro.rst")));
assert!(is_default_doc_excluded(Path::new("CONTRIBUTING.adoc")));
assert!(is_default_doc_excluded(Path::new("notes.txt")));
assert!(is_default_doc_excluded(Path::new("CHANGELOG")));
assert!(is_default_doc_excluded(Path::new("CHANGELOG.md")));
assert!(is_default_doc_excluded(Path::new("changelog.txt")));
assert!(is_default_doc_excluded(Path::new("LICENSE")));
assert!(is_default_doc_excluded(Path::new("License.txt")));
assert!(is_default_doc_excluded(Path::new("NOTICE")));
assert!(!is_default_doc_excluded(Path::new("src/main.rs")));
assert!(!is_default_doc_excluded(Path::new("src/lib.py")));
assert!(!is_default_doc_excluded(Path::new("Cargo.toml")));
}
#[test]
fn test_default_includes_markdown_and_changelog() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join("main.rs"), "fn main() {}").unwrap();
fs::write(root.join("README.md"), "# project").unwrap();
fs::write(root.join("CHANGELOG.md"), "# 1.0.0").unwrap();
fs::write(root.join("LICENSE.md"), "MIT").unwrap();
fs::write(root.join("NOTICE.txt"), "(c)").unwrap();
fs::create_dir_all(root.join("docs")).unwrap();
fs::write(root.join("docs/intro.mdx"), "# intro").unwrap();
let names: Vec<String> = walk_source_files(root)
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(names.contains(&"main.rs".to_string()));
assert!(
names.contains(&"README.md".to_string()),
"README.md included by default (issue #118): {names:?}"
);
assert!(names.contains(&"CHANGELOG.md".to_string()));
assert!(names.contains(&"LICENSE.md".to_string()));
assert!(names.contains(&"NOTICE.txt".to_string()));
assert!(names.contains(&"intro.mdx".to_string()));
}
#[test]
fn test_issue_118_acceptance_walks_both_source_and_docs() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join("lib.rs"), "pub fn entrypoint() {}").unwrap();
fs::write(
root.join("README.md"),
"# Project\nInstall via cargo install trusty-search.",
)
.unwrap();
fs::write(root.join("CHANGELOG.md"), "## 0.8.3\n- fix #118").unwrap();
let names: Vec<String> = walk_source_files(root)
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"lib.rs".to_string()),
"source must be walked: {names:?}"
);
assert!(
names.contains(&"README.md".to_string()),
"README.md must be walked under v0.8.3 default (issue #118): {names:?}"
);
assert!(
names.contains(&"CHANGELOG.md".to_string()),
"CHANGELOG.md must be walked under v0.8.3 default (issue #118): {names:?}"
);
}
#[test]
fn test_include_docs_false_excludes_markdown() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join("main.rs"), "fn main() {}").unwrap();
fs::write(root.join("README.md"), "# project").unwrap();
fs::write(root.join("CHANGELOG.md"), "# 1.0.0").unwrap();
let names: Vec<String> = walk_source_files_with_options(
root,
WalkOptions {
include_docs: false,
..WalkOptions::default()
},
)
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(names.contains(&"main.rs".to_string()));
assert!(!names.contains(&"README.md".to_string()));
assert!(!names.contains(&"CHANGELOG.md".to_string()));
}
#[test]
fn test_skips_min_js() {
assert!(should_skip_path(Path::new("foo.min.js")));
assert!(should_skip_path(Path::new("path/to/app.min.js")));
}
#[test]
fn test_skips_min_css() {
assert!(should_skip_path(Path::new("styles.min.css")));
}
#[test]
fn test_skips_bundle_js() {
assert!(should_skip_path(Path::new("app.bundle.js")));
assert!(should_skip_path(Path::new("vendor.bundle.css")));
}
#[test]
fn test_skips_chunk_js() {
assert!(should_skip_path(Path::new("runtime.chunk.js")));
}
#[test]
fn test_skips_hashed_bundle() {
assert!(should_skip_path(Path::new("index-ahKOasfG.js")));
assert!(should_skip_path(Path::new("vendor-1a2b3c4d5e6f7a8b.js")));
assert!(should_skip_path(Path::new("src/assets/main-AbCdEfGh.js")));
}
#[test]
fn test_hashed_bundle_too_short_not_skipped() {
assert!(!should_skip_path(Path::new("foo-abcdefg.js")));
}
#[test]
fn test_keeps_normal_js() {
assert!(!should_skip_path(Path::new("utils.js")));
assert!(!should_skip_path(Path::new("main.js")));
assert!(!should_skip_path(Path::new("src/components/button.js")));
}
#[test]
fn test_skips_node_modules_dir() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::create_dir_all(root.join("node_modules/lodash")).unwrap();
fs::write(
root.join("node_modules/lodash/index.js"),
"module.exports={}",
)
.unwrap();
fs::write(root.join("real.js"), "export const x = 1;").unwrap();
let result = walk_source_files(root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
!names.contains(&"index.js".to_string()),
"node_modules must be excluded"
);
assert!(names.contains(&"real.js".to_string()));
}
#[test]
fn test_skips_large_file() {
let tmp = tempfile::tempdir().expect("tempdir");
let big_path = tmp.path().join("huge.js");
let big_content = "x".repeat((MAX_FILE_BYTES + 1) as usize);
fs::write(&big_path, big_content.as_bytes()).unwrap();
assert!(should_skip_path(&big_path));
}
#[test]
fn test_keeps_small_file() {
let tmp = tempfile::tempdir().expect("tempdir");
let small_path = tmp.path().join("small.js");
fs::write(&small_path, b"const x = 1;").unwrap();
assert!(!should_skip_path(&small_path));
}
#[test]
fn test_skip_content_detects_minified_js() {
let minified = "a".repeat(501);
assert!(should_skip_content(Path::new("bundle.js"), &minified));
}
#[test]
fn test_skip_content_allows_normal_js() {
let normal = "const x = 1;\nconst y = 2;\nconst z = 3;\nconst w = 4;\nconst v = 5;\n";
assert!(!should_skip_content(Path::new("app.js"), normal));
}
#[test]
fn test_skip_content_ignores_non_js() {
let long_line = "x".repeat(1000);
assert!(!should_skip_content(Path::new("data.rs"), &long_line));
assert!(!should_skip_content(Path::new("query.py"), &long_line));
}
#[test]
fn test_skip_content_mjs_cjs() {
let minified = "a".repeat(501);
assert!(should_skip_content(Path::new("mod.mjs"), &minified));
assert!(should_skip_content(Path::new("mod.cjs"), &minified));
}
#[test]
fn test_skips_lock_files() {
assert!(should_skip_path(Path::new("Cargo.lock")));
assert!(should_skip_path(Path::new("project/Cargo.lock")));
assert!(should_skip_path(Path::new("package-lock.json")));
assert!(should_skip_path(Path::new("yarn.lock")));
assert!(should_skip_path(Path::new("pnpm-lock.yaml")));
assert!(should_skip_path(Path::new("poetry.lock")));
assert!(should_skip_path(Path::new("Pipfile.lock")));
assert!(should_skip_path(Path::new("Gemfile.lock")));
assert!(should_skip_path(Path::new("composer.lock")));
assert!(should_skip_path(Path::new("go.sum")));
}
#[test]
fn test_does_not_skip_non_lock_named_files() {
assert!(!should_skip_path(Path::new("main.rs")));
assert!(!should_skip_path(Path::new("locked_file.rs")));
assert!(!should_skip_path(Path::new("my-cargo-locker.py")));
}
#[test]
fn test_walker_skips_lock_files_in_tree() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join("Cargo.lock"), "# lock").unwrap();
fs::write(root.join("package-lock.json"), "{}").unwrap();
fs::write(root.join("yarn.lock"), "# lock").unwrap();
fs::write(root.join("real.rs"), "fn main() {}").unwrap();
let result = walk_source_files(root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(names.contains(&"real.rs".to_string()));
assert!(!names.contains(&"Cargo.lock".to_string()));
assert!(!names.contains(&"package-lock.json".to_string()));
assert!(!names.contains(&"yarn.lock".to_string()));
}
#[test]
fn test_walker_skips_new_skip_dirs() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
for dir in [".cache", ".npm", ".build", ".pnpm-store", ".yarn", ".tox"] {
let d = root.join(dir);
fs::create_dir_all(&d).unwrap();
fs::write(d.join("trapped.rs"), "fn x() {}").unwrap();
}
fs::write(root.join("kept.rs"), "fn k() {}").unwrap();
let result = walk_source_files(root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(names.contains(&"kept.rs".to_string()));
assert!(
!names.contains(&"trapped.rs".to_string()),
"files inside new SKIP_DIRS must be excluded"
);
}
#[test]
fn test_walker_skips_cdk_and_sam_dirs() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
for dir in ["cdk.out", "cdk.out2", ".aws-sam", ".turbo", ".mvn"] {
let d = root.join(dir).join("asset.abc123/python/lib");
fs::create_dir_all(&d).unwrap();
fs::write(d.join("vendored.py"), "import boto3").unwrap();
}
fs::write(root.join("handler.py"), "def handler(): pass").unwrap();
let result = walk_source_files(root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"handler.py".to_string()),
"real source must be kept"
);
assert!(
!names.contains(&"vendored.py".to_string()),
"files inside cdk.out / .aws-sam / .turbo / .mvn must be excluded"
);
}
#[test]
fn test_walker_skips_fixture_and_test_data_dirs() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
for dir in [
"fixtures",
"__fixtures__",
"testdata",
"test-data",
"test_data",
"testresources",
"test_resources",
] {
let d = root.join(dir);
fs::create_dir_all(&d).unwrap();
fs::write(d.join("sample.py"), "x = 1").unwrap();
}
let kept_resources = root.join("src/test/resources");
fs::create_dir_all(&kept_resources).unwrap();
fs::write(kept_resources.join("config.py"), "y = 2").unwrap();
fs::write(root.join("handler.py"), "def handler(): pass").unwrap();
let result = walk_source_files(root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"handler.py".to_string()),
"real source must be kept"
);
assert!(
names.contains(&"config.py".to_string()),
"src/test/resources must stay indexed (basename `resources` not skipped)"
);
assert!(
!names.contains(&"sample.py".to_string()),
"files inside fixture / test-data dirs must be excluded"
);
}
#[test]
fn test_skip_dirs_contains_fixture_entries() {
for required in [
"fixtures",
"__fixtures__",
"testdata",
"test-data",
"test_data",
"testresources",
"test_resources",
] {
assert!(
SKIP_DIRS.contains(&required),
"SKIP_DIRS missing required fixture entry: {required}"
);
}
}
#[test]
fn test_sql_extension_excluded_by_allowlist() {
assert!(
!SOURCE_EXTS.iter().any(|e| e.eq_ignore_ascii_case("sql")),
"`sql` must not be in SOURCE_EXTS — SQL is excluded by the allowlist"
);
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join("schema.sql"), "CREATE TABLE t (id INT);").unwrap();
fs::write(root.join("real.rs"), "fn main() {}").unwrap();
let result = walk_source_files(root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(names.contains(&"real.rs".to_string()));
assert!(
!names.contains(&"schema.sql".to_string()),
".sql files must be excluded by the SOURCE_EXTS allowlist"
);
}
#[test]
fn test_skip_dirs_contains_build_artifact_entries() {
for required in [
"cdk.out",
"cdk.out2",
".aws-sam",
".turbo",
".mvn",
".gradle",
"node_modules",
".venv",
"venv",
"__pycache__",
".next",
"dist",
"build",
"target",
"vendor",
] {
assert!(
SKIP_DIRS.contains(&required),
"SKIP_DIRS missing required build-artifact entry: {required}"
);
}
}
#[test]
fn test_skip_dirs_contains_required_entries() {
for required in [
"node_modules",
"target",
"vendor",
".git",
".cargo",
".npm",
"dist",
"build",
".build",
"__pycache__",
".venv",
"venv",
".next",
".nuxt",
"coverage",
".nyc_output",
] {
assert!(
SKIP_DIRS.contains(&required),
"SKIP_DIRS missing required entry: {required}"
);
}
}
#[test]
fn test_path_in_skipped_dir() {
assert!(path_in_skipped_dir(Path::new(
"project/cdk.out/asset.abc/python/handler.py"
)));
assert!(path_in_skipped_dir(Path::new(".aws-sam/build/app.py")));
assert!(path_in_skipped_dir(Path::new(
"repo/node_modules/lodash/x.js"
)));
assert!(path_in_skipped_dir(Path::new("repo/.turbo/cache/x.js")));
assert!(!path_in_skipped_dir(Path::new("src/handler.py")));
assert!(!path_in_skipped_dir(Path::new("project/src/main.rs")));
}
#[test]
fn test_skip_content_multiline_js_not_skipped() {
let content = format!("line1\nline2\nline3\nline4\nline5\n{}\n", "x".repeat(600));
assert!(!should_skip_content(Path::new("ok.js"), &content));
}
#[cfg(unix)]
#[test]
fn test_canonicalizes_symlinked_root() {
use std::os::unix::fs::symlink;
let tmp = tempfile::tempdir().expect("tempdir");
let real_root = std::fs::canonicalize(tmp.path()).expect("canonicalize real root");
fs::write(real_root.join("main.rs"), "fn main() {}").unwrap();
let parent = real_root.parent().expect("tempdir has parent");
let link_path = parent.join(format!(
"trusty-search-symlink-{}",
std::process::id() ));
let _ = std::fs::remove_file(&link_path);
symlink(&real_root, &link_path).expect("create symlink");
let result = walk_source_files(&link_path);
let _ = std::fs::remove_file(&link_path);
assert!(!result.files.is_empty(), "walker emitted no files");
for f in &result.files {
assert!(
f.starts_with(&real_root),
"emitted file {f:?} does not start with canonical root {real_root:?}",
);
assert!(
!f.starts_with(&link_path),
"emitted file {f:?} carries the symlink alias instead of canonical path",
);
}
}
#[cfg(unix)]
#[test]
fn test_follows_symlinked_subdirectory() {
use std::os::unix::fs::symlink;
let tmp = tempfile::tempdir().expect("tempdir");
let root = std::fs::canonicalize(tmp.path()).expect("canonicalize root");
let extern_dir = tempfile::tempdir().expect("extern tempdir");
let extern_root = std::fs::canonicalize(extern_dir.path()).expect("canonicalize extern");
fs::write(extern_root.join("linked.rs"), "fn linked() {}").unwrap();
fs::write(root.join("local.rs"), "fn local() {}").unwrap();
symlink(&extern_root, root.join("vendored")).expect("symlink subdir");
let result = walk_source_files(&root);
let names: Vec<String> = result
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"local.rs".to_string()),
"baseline local file missing: {names:?}",
);
assert!(
names.contains(&"linked.rs".to_string()),
"file inside symlinked subdir was not indexed: {names:?}",
);
}
#[test]
fn test_walker_honors_gitignore() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join(".gitignore"), "excluded/\n").unwrap();
fs::create_dir_all(root.join("excluded")).unwrap();
fs::create_dir_all(root.join("included")).unwrap();
fs::write(root.join("excluded/foo.rs"), "fn foo() {}").unwrap();
fs::write(root.join("included/bar.rs"), "fn bar() {}").unwrap();
let names: Vec<String> = walk_source_files(root)
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"bar.rs".to_string()),
"included file dropped: {names:?}"
);
assert!(
!names.contains(&"foo.rs".to_string()),
"gitignored file not pruned: {names:?}"
);
}
#[test]
fn test_walker_respects_disable_flag() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join(".gitignore"), "excluded/\n").unwrap();
fs::create_dir_all(root.join("excluded")).unwrap();
fs::create_dir_all(root.join("included")).unwrap();
fs::write(root.join("excluded/foo.rs"), "fn foo() {}").unwrap();
fs::write(root.join("included/bar.rs"), "fn bar() {}").unwrap();
let opts = WalkOptions {
include_docs: false,
respect_gitignore: false,
};
let names: Vec<String> = walk_source_files_with_options(root, opts)
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"bar.rs".to_string()),
"included file dropped: {names:?}"
);
assert!(
names.contains(&"foo.rs".to_string()),
"respect_gitignore=false still pruned gitignored file: {names:?}"
);
}
#[test]
fn test_walker_honors_dot_ignore() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::write(root.join(".ignore"), "excluded/\n").unwrap();
fs::create_dir_all(root.join("excluded")).unwrap();
fs::create_dir_all(root.join("included")).unwrap();
fs::write(root.join("excluded/foo.rs"), "fn foo() {}").unwrap();
fs::write(root.join("included/bar.rs"), "fn bar() {}").unwrap();
let names: Vec<String> = walk_source_files(root)
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"bar.rs".to_string()),
"included file dropped: {names:?}"
);
assert!(
!names.contains(&"foo.rs".to_string()),
".ignore file not honoured: {names:?}"
);
}
#[test]
fn test_walker_still_skips_hardcoded_dirs() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path();
fs::create_dir_all(root.join("target/debug")).unwrap();
fs::create_dir_all(root.join("node_modules/foo")).unwrap();
fs::create_dir_all(root.join(".git/objects")).unwrap();
fs::write(root.join("target/debug/build.rs"), "fn b() {}").unwrap();
fs::write(root.join("node_modules/foo/index.js"), "// pkg").unwrap();
fs::write(root.join(".git/objects/blob.rs"), "fn g() {}").unwrap();
fs::write(root.join("real.rs"), "fn real() {}").unwrap();
let names: Vec<String> = walk_source_files(root)
.files
.iter()
.filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
.collect();
assert!(
names.contains(&"real.rs".to_string()),
"real source dropped: {names:?}"
);
assert!(
!names.iter().any(|n| n == "build.rs"),
"target/ leaked into walk: {names:?}"
);
assert!(
!names.iter().any(|n| n == "index.js"),
"node_modules/ leaked into walk: {names:?}"
);
assert!(
!names.iter().any(|n| n == "blob.rs"),
".git/ leaked into walk: {names:?}"
);
}
}