use ignore::WalkBuilder;
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::{Mutex, OnceLock};
pub const HARDCODED_IGNORE_DIRS: &[&str] = &[
".git", ".hg", ".svn", ".jj",
"__pycache__", ".venv", "venv", ".tox",
".mypy_cache", ".pytest_cache", ".ruff_cache",
"node_modules", ".next", ".nuxt", ".turbo", ".parcel-cache",
"dist", "build", "out", ".eggs", "target",
".cache", ".gradle", ".idea", ".vscode",
".ast-bro", ".ast-outline",
];
pub fn add_filters(builder: &mut WalkBuilder, repo_root: &Path) {
let new_name = ".ast-bro-ignore";
let old_name = ".ast-outline-ignore";
let rename_failed = migrate_legacy_ignore_file(repo_root, new_name, old_name);
if rename_failed {
builder.add_custom_ignore_filename(old_name);
}
builder.add_custom_ignore_filename(new_name);
}
fn migrate_legacy_ignore_file(repo_root: &Path, new_name: &str, old_name: &str) -> bool {
if !repo_root.is_dir() {
return false;
}
static STATE: OnceLock<Mutex<HashMap<PathBuf, bool>>> = OnceLock::new();
let map = STATE.get_or_init(|| Mutex::new(HashMap::new()));
let mut guard = map.lock().unwrap();
if let Some(&needs_fallback) = guard.get(repo_root) {
return needs_fallback;
}
let new_path = repo_root.join(new_name);
let old_path = repo_root.join(old_name);
let needs_fallback = if old_path.exists() && !new_path.exists() {
match fs::rename(&old_path, &new_path) {
Err(e) => {
eprintln!("warning: could not rename {old_name} -> {new_name}: {e}");
true
}
Ok(()) => {
eprintln!("info: auto-renamed {old_name} -> {new_name}");
false
}
}
} else {
false
};
guard.insert(repo_root.to_path_buf(), needs_fallback);
needs_fallback
}
pub fn should_skip_path(path: &Path, repo_root: &Path) -> bool {
let Ok(rel) = path.strip_prefix(repo_root) else {
return false;
};
rel.components().any(|c| {
let s = c.as_os_str().to_string_lossy();
HARDCODED_IGNORE_DIRS.iter().any(|d| *d == s)
})
}
const TEST_DIR_TOKENS: &[&str] = &[
"test", "tests", "__tests__", "e2e", "cypress", "playwright",
"integration-tests", "integration_tests", "test-fixtures", "fixtures",
"spec", "specs", "mocha", "jest",
];
const TEST_FILE_SUFFIXES: &[&str] = &[
"_test", ".test", "_spec", ".spec",
"_tests", ".tests", "_specs", ".specs",
];
pub fn is_test_file(path: &Path, repo_root: &Path) -> bool {
let rel = path.strip_prefix(repo_root).unwrap_or(path);
for component in rel.components() {
let s = component.as_os_str().to_string_lossy();
let lower = s.to_lowercase();
if TEST_DIR_TOKENS.iter().any(|t| *t == lower) {
return true;
}
}
let stem = rel
.file_stem()
.map(|s| s.to_string_lossy().to_lowercase())
.unwrap_or_default();
if TEST_FILE_SUFFIXES.iter().any(|suf| stem.ends_with(suf)) {
return true;
}
if stem.starts_with("test_")
&& rel
.extension()
.and_then(|e| e.to_str())
.is_some_and(|e| e.eq_ignore_ascii_case("py"))
{
return true;
}
false
}
pub fn detect_language(path: &Path) -> Option<ast_grep_language::SupportLang> {
use ast_grep_language::SupportLang;
use std::io::Read;
let mut file = fs::File::open(path).ok()?;
let mut buffer = [0u8; 256];
let bytes_read = file.read(&mut buffer).ok()?;
if bytes_read < 2 {
return None;
}
if buffer[0] != b'#' || buffer[1] != b'!' {
return None;
}
let newline_pos = buffer[..bytes_read]
.iter()
.position(|&b| b == b'\n')
.unwrap_or(bytes_read);
if newline_pos == bytes_read && bytes_read == buffer.len() {
return None;
}
let first_line = std::str::from_utf8(&buffer[..newline_pos]).ok()?;
let shebang = first_line[2..].trim();
let mut tokens = shebang.split_whitespace();
let command = tokens.next()?;
let command_basename = command.rsplit('/').next().unwrap_or(command);
let program = if command_basename == "env" {
let mut program = tokens.next()?;
while program.starts_with('-') || program.contains('=') {
program = tokens.next()?;
}
program.rsplit('/').next().unwrap_or(program)
} else {
command_basename
};
let program = program
.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.')
.to_lowercase();
match program.as_str() {
"python" | "pypy" => Some(SupportLang::Python),
"ruby" | "rb" => Some(SupportLang::Ruby),
"node" | "nodejs" | "bun" | "deno" => Some(SupportLang::TypeScript),
"php" => Some(SupportLang::Php),
"bash" | "sh" | "zsh" | "ksh" => Some(SupportLang::Bash),
"lua" | "luajit" => Some(SupportLang::Lua),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn skip_node_modules_anywhere() {
let root = PathBuf::from("/r");
assert!(should_skip_path(&root.join("node_modules/lodash/index.js"), &root));
assert!(should_skip_path(
&root.join("packages/foo/node_modules/lib.js"),
&root,
));
}
#[test]
fn skip_target_dir() {
let root = PathBuf::from("/r");
assert!(should_skip_path(&root.join("target/debug/build/x.rs"), &root));
}
#[test]
fn skip_self_managed_index() {
let root = PathBuf::from("/r");
assert!(should_skip_path(&root.join(".ast-bro/index/meta.json"), &root));
}
#[test]
fn allow_normal_paths() {
let root = PathBuf::from("/r");
assert!(!should_skip_path(&root.join("src/main.rs"), &root));
assert!(!should_skip_path(&root.join("docs/README.md"), &root));
}
#[test]
fn allow_paths_outside_root() {
let root = PathBuf::from("/r");
assert!(!should_skip_path(&PathBuf::from("/elsewhere/node_modules/x"), &root));
}
#[test]
fn test_detection_directory_components() {
let root = PathBuf::from("/r");
assert!(is_test_file(&root.join("tests/foo.rs"), &root));
assert!(is_test_file(&root.join("src/features/__tests__/a.ts"), &root));
assert!(is_test_file(&root.join("e2e/signup.spec.ts"), &root));
assert!(is_test_file(&root.join("cypress/integration/login.js"), &root));
}
#[test]
fn test_detection_file_suffixes() {
let root = PathBuf::from("/r");
assert!(is_test_file(&root.join("src/foo_test.go"), &root));
assert!(is_test_file(&root.join("src/utils.test.ts"), &root));
assert!(is_test_file(&root.join("src/auth.spec.js"), &root));
assert!(is_test_file(&root.join("src/bar_tests.py"), &root));
assert!(is_test_file(&root.join("pkg/test_foo.py"), &root));
assert!(!is_test_file(&root.join("src/test_helpers.go"), &root));
}
#[test]
fn test_detection_production_paths() {
let root = PathBuf::from("/r");
assert!(!is_test_file(&root.join("src/foo.rs"), &root));
assert!(!is_test_file(&root.join("lib/auth.py"), &root));
assert!(!is_test_file(&root.join("src/test_utils.rs"), &root));
assert!(!is_test_file(&root.join("src/integration/stripe.rs"), &root));
assert!(is_test_file(&root.join("integration-tests/api.rs"), &root));
assert!(is_test_file(&root.join("integration_tests/api.rs"), &root));
}
#[test]
fn shebang_python() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("myscript");
std::fs::write(&path, "#!/usr/bin/env python3\nprint('hi')\n").unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::Python)
);
}
#[test]
fn shebang_with_env_flags() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("deploy");
std::fs::write(&path, "#!/usr/bin/env -S python3\n").unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::Python)
);
}
#[test]
fn shebang_direct_path() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("tool");
std::fs::write(&path, "#!/usr/bin/python3\n").unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::Python)
);
}
#[test]
fn shebang_ruby() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("script");
std::fs::write(&path, "#!/usr/bin/ruby\nputs 'hi'\n").unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::Ruby)
);
}
#[test]
fn shebang_node_resolves_to_typescript() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("server");
std::fs::write(&path, "#!/usr/bin/env node\nconsole.log(1);\n").unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::TypeScript)
);
}
#[test]
fn no_shebang_returns_none() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("plain");
std::fs::write(&path, "just a plain file\n").unwrap();
assert_eq!(detect_language(&path), None);
}
#[test]
fn empty_file_returns_none() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("empty");
std::fs::write(&path, "").unwrap();
assert_eq!(detect_language(&path), None);
}
#[test]
fn shebang_unrecognized_interpreter() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("tool");
std::fs::write(&path, "#!/usr/bin/env foointerpreter\n").unwrap();
assert_eq!(detect_language(&path), None);
}
#[test]
fn shebang_env_substring_in_path() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("script");
std::fs::write(&path, "#!/home/envuser/bin/python3\nprint('hi')\n").unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::Python)
);
}
#[test]
fn shebang_env_with_tab_separator() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("script");
std::fs::write(&path, "#!/usr/bin/env\tpython3\nprint('hi')\n").unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::Python)
);
}
#[test]
fn binary_file_returns_none() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("bin");
std::fs::write(&path, [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]).unwrap();
assert_eq!(detect_language(&path), None);
}
#[test]
fn long_line_without_newline_returns_none() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("long");
let content = "!".repeat(300);
std::fs::write(&path, content).unwrap();
assert_eq!(detect_language(&path), None);
}
#[test]
fn shebang_at_boundary_256() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("edge");
let padding = " ".repeat(230);
let content = format!("#!/usr/bin/env python3{}\nprint('hi')\n", padding);
assert!(content.as_bytes()[..256].contains(&b'\n'));
std::fs::write(&path, content).unwrap();
assert_eq!(
detect_language(&path),
Some(ast_grep_language::SupportLang::Python)
);
}
#[test]
fn shebang_newline_beyond_256_returns_none() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("toolong");
let padding = " ".repeat(300);
let content = format!("#!/usr/bin/env python3{}\nprint('hi')\n", padding);
assert!(!content.as_bytes()[..256].contains(&b'\n'));
std::fs::write(&path, content).unwrap();
assert_eq!(detect_language(&path), None);
}
}