use std::collections::HashSet;
use std::path::{Path, PathBuf};
use ignore::gitignore::GitignoreBuilder;
use walkdir::{DirEntry, WalkDir};
use crate::error::TldrError;
use crate::types::{FileTree, IgnoreSpec, NodeType};
use crate::TldrResult;
pub const MAX_FILE_SIZE: u64 = 5 * 1024 * 1024;
pub const DEFAULT_SKIP_DIRS: &[&str] = &[
"node_modules",
"vendor",
"target",
"dist",
"build",
"out",
"bin",
"obj",
".next",
".nuxt",
"dox",
"__pycache__",
"venv",
".venv",
"env",
".env",
".tox",
".pytest_cache",
".mypy_cache",
".ruff_cache",
"coverage",
".coverage",
".gradle",
".git",
".svn",
".hg",
".idea",
".vscode",
".cache",
];
const GENERATED_DIR_SENTINELS: &[&str] = &["doxygen.css", "doxygen.svg"];
pub(crate) fn dir_has_generated_sentinel(dir: &Path) -> bool {
let Ok(entries) = std::fs::read_dir(dir) else {
return false;
};
for entry in entries.flatten() {
if let Some(name) = entry.file_name().to_str() {
if GENERATED_DIR_SENTINELS.contains(&name) {
return true;
}
}
}
false
}
pub fn get_file_tree(
root: &Path,
extensions: Option<&HashSet<String>>,
exclude_hidden: bool,
ignore_spec: Option<&IgnoreSpec>,
) -> TldrResult<FileTree> {
if !root.exists() {
return Err(TldrError::PathNotFound(root.to_path_buf()));
}
let canonical =
dunce::canonicalize(root).map_err(|_| TldrError::PathNotFound(root.to_path_buf()))?;
let path_str = root.to_string_lossy();
if path_str.contains("..") {
if let Ok(parent) = std::env::current_dir() {
let joined = parent.join(root);
if let Ok(joined_canonical) = dunce::canonicalize(&joined) {
if !joined_canonical.starts_with(&parent)
&& !joined_canonical.starts_with(&canonical)
{
return Err(TldrError::PathTraversal(root.to_path_buf()));
}
}
}
}
let gitignore = build_gitignore(&canonical, ignore_spec);
let root_name = canonical
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| ".".to_string());
let children = build_tree_children(
&canonical,
&canonical,
extensions,
exclude_hidden,
gitignore.as_ref(),
)?;
Ok(FileTree::dir(root_name, children))
}
fn build_gitignore(
root: &Path,
ignore_spec: Option<&IgnoreSpec>,
) -> Option<ignore::gitignore::Gitignore> {
let patterns = ignore_spec?.patterns.as_slice();
if patterns.is_empty() {
return None;
}
let mut builder = GitignoreBuilder::new(root);
for pattern in patterns {
let _ = builder.add_line(None, pattern);
}
builder.build().ok()
}
fn build_tree_children(
dir: &Path,
root: &Path,
extensions: Option<&HashSet<String>>,
exclude_hidden: bool,
gitignore: Option<&ignore::gitignore::Gitignore>,
) -> TldrResult<Vec<FileTree>> {
let mut children = Vec::new();
let walker = WalkDir::new(dir)
.max_depth(1)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
if e.depth() == 0 {
return true;
}
should_include_entry(e, exclude_hidden, gitignore)
});
for entry in walker.filter_map(|e| e.ok()) {
let path = entry.path();
if path == dir {
continue;
}
let name = entry.file_name().to_string_lossy().to_string();
if entry.file_type().is_dir() {
if DEFAULT_SKIP_DIRS.contains(&name.as_str()) {
continue;
}
if dir_has_generated_sentinel(path) {
continue;
}
let sub_children =
build_tree_children(path, root, extensions, exclude_hidden, gitignore)?;
if !sub_children.is_empty() || extensions.is_none() {
children.push(FileTree::dir(name, sub_children));
}
} else if entry.file_type().is_file() {
if let Some(exts) = extensions {
let ext = path
.extension()
.map(|e| format!(".{}", e.to_string_lossy()))
.unwrap_or_default();
if !exts.contains(&ext) {
continue;
}
}
let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
children.push(FileTree::file(name, relative_path));
}
}
children.sort_by(|a, b| match (&a.node_type, &b.node_type) {
(NodeType::Dir, NodeType::File) => std::cmp::Ordering::Less,
(NodeType::File, NodeType::Dir) => std::cmp::Ordering::Greater,
_ => a.name.cmp(&b.name),
});
Ok(children)
}
fn should_include_entry(
entry: &DirEntry,
exclude_hidden: bool,
gitignore: Option<&ignore::gitignore::Gitignore>,
) -> bool {
let name = entry.file_name().to_string_lossy();
if exclude_hidden && name.starts_with('.') && name != "." && name != ".." {
return false;
}
if let Some(gi) = gitignore {
let is_dir = entry.file_type().is_dir();
if gi.matched(entry.path(), is_dir).is_ignore() {
return false;
}
}
true
}
pub fn collect_files(tree: &FileTree, root: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
collect_files_recursive(tree, root, &mut files);
files
}
fn collect_files_recursive(tree: &FileTree, root: &Path, files: &mut Vec<PathBuf>) {
match tree.node_type {
NodeType::File => {
if let Some(ref path) = tree.path {
files.push(root.join(path));
}
}
NodeType::Dir => {
for child in &tree.children {
collect_files_recursive(child, root, files);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn create_test_dir() -> TempDir {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("main.py"), "# Python file").unwrap();
fs::write(dir.path().join("utils.py"), "# Utils").unwrap();
fs::write(dir.path().join("config.json"), "{}").unwrap();
fs::create_dir(dir.path().join("src")).unwrap();
fs::write(dir.path().join("src/module.py"), "# Module").unwrap();
fs::write(dir.path().join(".hidden"), "hidden").unwrap();
dir
}
#[test]
fn test_get_file_tree_basic() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, true, None).unwrap();
assert_eq!(tree.node_type, NodeType::Dir);
assert!(!tree.children.is_empty());
}
#[test]
fn test_get_file_tree_extension_filter() {
let dir = create_test_dir();
let extensions: HashSet<String> = [".py".to_string()].into_iter().collect();
let tree = get_file_tree(dir.path(), Some(&extensions), true, None).unwrap();
fn check_extensions(node: &FileTree) {
if node.node_type == NodeType::File {
assert!(
node.name.ends_with(".py"),
"Found non-py file: {}",
node.name
);
}
for child in &node.children {
check_extensions(child);
}
}
check_extensions(&tree);
}
#[test]
fn test_get_file_tree_excludes_hidden() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, true, None).unwrap();
fn check_no_hidden(node: &FileTree) {
assert!(
!node.name.starts_with('.') || node.name == ".",
"Hidden file found: {}",
node.name
);
for child in &node.children {
check_no_hidden(child);
}
}
for child in &tree.children {
check_no_hidden(child);
}
}
#[test]
fn test_get_file_tree_includes_hidden() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, false, None).unwrap();
fn has_hidden(node: &FileTree) -> bool {
if node.name.starts_with('.') && node.name != "." {
return true;
}
node.children.iter().any(has_hidden)
}
assert!(has_hidden(&tree), "No hidden files found");
}
#[test]
fn test_get_file_tree_nonexistent() {
let result = get_file_tree(Path::new("/nonexistent/path"), None, true, None);
assert!(matches!(result, Err(TldrError::PathNotFound(_))));
}
#[test]
fn test_get_file_tree_ignore_patterns() {
let dir = create_test_dir();
let ignore = IgnoreSpec::new(vec!["*.json".to_string()]);
let tree = get_file_tree(dir.path(), None, true, Some(&ignore)).unwrap();
fn check_no_json(node: &FileTree) {
assert!(
!node.name.ends_with(".json"),
"JSON file found: {}",
node.name
);
for child in &node.children {
check_no_json(child);
}
}
check_no_json(&tree);
}
#[test]
fn test_collect_files() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, true, None).unwrap();
let files = collect_files(&tree, dir.path());
assert!(!files.is_empty());
assert!(files.iter().any(|f| f.ends_with("main.py")));
}
#[test]
fn test_get_file_tree_hardlinks_no_symlink_cycle() {
let dir = TempDir::new().unwrap();
let original = dir.path().join("original.txt");
let hard = dir.path().join("hardlink.txt");
fs::write(&original, "shared content").unwrap();
fs::hard_link(&original, &hard).expect("hardlink creation failed");
assert!(original.exists());
assert!(hard.exists());
let result = get_file_tree(dir.path(), None, true, None);
assert!(
result.is_ok(),
"tree builder must not report SymlinkCycle on hardlinks; got: {:?}",
result.err()
);
let tree = result.unwrap();
let names: Vec<&str> = tree
.children
.iter()
.map(|c| c.name.as_str())
.collect();
assert!(
names.contains(&"original.txt") && names.contains(&"hardlink.txt"),
"expected both hardlinked files in tree; got: {:?}",
names
);
}
}