use std::collections::HashSet;
use std::path::{Path, PathBuf};
use ignore::gitignore::GitignoreBuilder;
use walkdir::{DirEntry, WalkDir};
use crate::error::TldrError;
use crate::types::{FileTree, IgnoreSpec, NodeType};
use crate::TldrResult;
pub const MAX_FILE_SIZE: u64 = 5 * 1024 * 1024;
pub const DEFAULT_SKIP_DIRS: &[&str] = &[
"node_modules",
"__pycache__",
".git",
".svn",
".hg",
"dist",
"build",
".next",
".nuxt",
"coverage",
".tox",
"venv",
".venv",
"env",
".env",
"vendor",
".cache",
"target",
".idea",
".vscode",
];
pub fn get_file_tree(
root: &Path,
extensions: Option<&HashSet<String>>,
exclude_hidden: bool,
ignore_spec: Option<&IgnoreSpec>,
) -> TldrResult<FileTree> {
if !root.exists() {
return Err(TldrError::PathNotFound(root.to_path_buf()));
}
let canonical =
dunce::canonicalize(root).map_err(|_| TldrError::PathNotFound(root.to_path_buf()))?;
let path_str = root.to_string_lossy();
if path_str.contains("..") {
if let Ok(parent) = std::env::current_dir() {
let joined = parent.join(root);
if let Ok(joined_canonical) = dunce::canonicalize(&joined) {
if !joined_canonical.starts_with(&parent)
&& !joined_canonical.starts_with(&canonical)
{
return Err(TldrError::PathTraversal(root.to_path_buf()));
}
}
}
}
let gitignore = build_gitignore(&canonical, ignore_spec);
let root_name = canonical
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| ".".to_string());
let children = build_tree_children(
&canonical,
&canonical,
extensions,
exclude_hidden,
gitignore.as_ref(),
)?;
Ok(FileTree::dir(root_name, children))
}
fn build_gitignore(
root: &Path,
ignore_spec: Option<&IgnoreSpec>,
) -> Option<ignore::gitignore::Gitignore> {
let patterns = ignore_spec?.patterns.as_slice();
if patterns.is_empty() {
return None;
}
let mut builder = GitignoreBuilder::new(root);
for pattern in patterns {
let _ = builder.add_line(None, pattern);
}
builder.build().ok()
}
fn build_tree_children(
dir: &Path,
root: &Path,
extensions: Option<&HashSet<String>>,
exclude_hidden: bool,
gitignore: Option<&ignore::gitignore::Gitignore>,
) -> TldrResult<Vec<FileTree>> {
let mut children = Vec::new();
let mut seen_inodes: HashSet<u64> = HashSet::new();
let walker = WalkDir::new(dir)
.max_depth(1)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
if e.depth() == 0 {
return true;
}
should_include_entry(e, exclude_hidden, gitignore)
});
for entry in walker.filter_map(|e| e.ok()) {
let path = entry.path();
if path == dir {
continue;
}
let name = entry.file_name().to_string_lossy().to_string();
#[cfg(unix)]
{
use std::os::unix::fs::MetadataExt;
if let Ok(metadata) = entry.metadata() {
let inode = metadata.ino();
if seen_inodes.contains(&inode) {
return Err(TldrError::SymlinkCycle(path.to_path_buf()));
}
seen_inodes.insert(inode);
}
}
if entry.file_type().is_dir() {
if DEFAULT_SKIP_DIRS.contains(&name.as_str()) {
continue;
}
let sub_children =
build_tree_children(path, root, extensions, exclude_hidden, gitignore)?;
if !sub_children.is_empty() || extensions.is_none() {
children.push(FileTree::dir(name, sub_children));
}
} else if entry.file_type().is_file() {
if let Some(exts) = extensions {
let ext = path
.extension()
.map(|e| format!(".{}", e.to_string_lossy()))
.unwrap_or_default();
if !exts.contains(&ext) {
continue;
}
}
let relative_path = path.strip_prefix(root).unwrap_or(path).to_path_buf();
children.push(FileTree::file(name, relative_path));
}
}
children.sort_by(|a, b| match (&a.node_type, &b.node_type) {
(NodeType::Dir, NodeType::File) => std::cmp::Ordering::Less,
(NodeType::File, NodeType::Dir) => std::cmp::Ordering::Greater,
_ => a.name.cmp(&b.name),
});
Ok(children)
}
fn should_include_entry(
entry: &DirEntry,
exclude_hidden: bool,
gitignore: Option<&ignore::gitignore::Gitignore>,
) -> bool {
let name = entry.file_name().to_string_lossy();
if exclude_hidden && name.starts_with('.') && name != "." && name != ".." {
return false;
}
if let Some(gi) = gitignore {
let is_dir = entry.file_type().is_dir();
if gi.matched(entry.path(), is_dir).is_ignore() {
return false;
}
}
true
}
pub fn collect_files(tree: &FileTree, root: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
collect_files_recursive(tree, root, &mut files);
files
}
fn collect_files_recursive(tree: &FileTree, root: &Path, files: &mut Vec<PathBuf>) {
match tree.node_type {
NodeType::File => {
if let Some(ref path) = tree.path {
files.push(root.join(path));
}
}
NodeType::Dir => {
for child in &tree.children {
collect_files_recursive(child, root, files);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn create_test_dir() -> TempDir {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("main.py"), "# Python file").unwrap();
fs::write(dir.path().join("utils.py"), "# Utils").unwrap();
fs::write(dir.path().join("config.json"), "{}").unwrap();
fs::create_dir(dir.path().join("src")).unwrap();
fs::write(dir.path().join("src/module.py"), "# Module").unwrap();
fs::write(dir.path().join(".hidden"), "hidden").unwrap();
dir
}
#[test]
fn test_get_file_tree_basic() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, true, None).unwrap();
assert_eq!(tree.node_type, NodeType::Dir);
assert!(!tree.children.is_empty());
}
#[test]
fn test_get_file_tree_extension_filter() {
let dir = create_test_dir();
let extensions: HashSet<String> = [".py".to_string()].into_iter().collect();
let tree = get_file_tree(dir.path(), Some(&extensions), true, None).unwrap();
fn check_extensions(node: &FileTree) {
if node.node_type == NodeType::File {
assert!(
node.name.ends_with(".py"),
"Found non-py file: {}",
node.name
);
}
for child in &node.children {
check_extensions(child);
}
}
check_extensions(&tree);
}
#[test]
fn test_get_file_tree_excludes_hidden() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, true, None).unwrap();
fn check_no_hidden(node: &FileTree) {
assert!(
!node.name.starts_with('.') || node.name == ".",
"Hidden file found: {}",
node.name
);
for child in &node.children {
check_no_hidden(child);
}
}
for child in &tree.children {
check_no_hidden(child);
}
}
#[test]
fn test_get_file_tree_includes_hidden() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, false, None).unwrap();
fn has_hidden(node: &FileTree) -> bool {
if node.name.starts_with('.') && node.name != "." {
return true;
}
node.children.iter().any(has_hidden)
}
assert!(has_hidden(&tree), "No hidden files found");
}
#[test]
fn test_get_file_tree_nonexistent() {
let result = get_file_tree(Path::new("/nonexistent/path"), None, true, None);
assert!(matches!(result, Err(TldrError::PathNotFound(_))));
}
#[test]
fn test_get_file_tree_ignore_patterns() {
let dir = create_test_dir();
let ignore = IgnoreSpec::new(vec!["*.json".to_string()]);
let tree = get_file_tree(dir.path(), None, true, Some(&ignore)).unwrap();
fn check_no_json(node: &FileTree) {
assert!(
!node.name.ends_with(".json"),
"JSON file found: {}",
node.name
);
for child in &node.children {
check_no_json(child);
}
}
check_no_json(&tree);
}
#[test]
fn test_collect_files() {
let dir = create_test_dir();
let tree = get_file_tree(dir.path(), None, true, None).unwrap();
let files = collect_files(&tree, dir.path());
assert!(!files.is_empty());
assert!(files.iter().any(|f| f.ends_with("main.py")));
}
}