use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use ignore::WalkBuilder;
use tracing::warn;
use crate::ast::types::FileTreeEntry;
use crate::error::{validate_path_containment, Result, BrrrError};
const DEFAULT_MAX_DEPTH: usize = 100;
const DEFAULT_SKIP_DIRS: &[&str] = &[
"node_modules",
"__pycache__",
".git",
".svn",
".hg",
"dist",
"build",
".next",
".nuxt",
"coverage",
".tox",
"venv",
".venv",
"env",
".env",
"vendor",
".cache",
"target", ];
pub fn file_tree(
path: &str,
ext_filter: &[String],
show_hidden: bool,
no_ignore: bool,
max_depth: Option<usize>,
) -> Result<FileTreeEntry> {
let effective_max_depth = max_depth.unwrap_or(DEFAULT_MAX_DEPTH);
let root_path = Path::new(path)
.canonicalize()
.map_err(|e| crate::error::BrrrError::Io(e))?;
let mut walker_builder = WalkBuilder::new(&root_path);
if no_ignore {
walker_builder
.git_ignore(false)
.git_global(false)
.git_exclude(false)
.ignore(false);
} else {
walker_builder.add_custom_ignore_filename(".brrrignore");
}
walker_builder.hidden(false);
walker_builder.max_depth(Some(effective_max_depth));
let walker = walker_builder.build();
let mut children_map: HashMap<PathBuf, Vec<PathBuf>> = HashMap::new();
let mut is_dir_set: HashSet<PathBuf> = HashSet::new();
let mut visited_canonical: HashSet<PathBuf> = HashSet::new();
visited_canonical.insert(root_path.clone());
for result in walker {
let entry = match result {
Ok(e) => e,
Err(_) => continue,
};
let entry_path = entry.path().to_path_buf();
if entry_path == root_path {
is_dir_set.insert(entry_path);
continue;
}
if entry_path.is_symlink() {
match validate_path_containment(&root_path, &entry_path) {
Ok(_) => {} Err(BrrrError::PathTraversal { target, base }) => {
warn!(
symlink = %entry_path.display(),
target = %target,
base = %base,
"Skipping symlink that escapes project root (path traversal attempt)"
);
continue;
}
Err(BrrrError::Io(_)) => {
warn!(
symlink = %entry_path.display(),
"Skipping broken symlink"
);
continue;
}
Err(_) => continue,
}
}
let is_symlink = entry_path.is_symlink();
let is_directory = entry_path.is_dir();
if is_directory {
match entry_path.canonicalize() {
Ok(canonical) => {
if !visited_canonical.insert(canonical.clone()) {
if is_symlink {
warn!(
path = %entry_path.display(),
canonical = %canonical.display(),
"Skipping symlink due to cycle (target already visited)"
);
continue;
}
}
}
Err(e) => {
if is_symlink {
warn!(
path = %entry_path.display(),
error = %e,
"Skipping symlink that cannot be resolved"
);
continue;
}
warn!(
path = %entry_path.display(),
error = %e,
"Warning: directory could not be canonicalized"
);
}
}
}
let rel_path = match entry_path.strip_prefix(&root_path) {
Ok(p) => p,
Err(_) => continue,
};
if rel_path.components().any(|c| {
let component_str = c.as_os_str().to_string_lossy();
DEFAULT_SKIP_DIRS.contains(&component_str.as_ref())
}) {
continue;
}
if !show_hidden {
if rel_path.components().any(|c| {
c.as_os_str()
.to_str()
.map(|s| s.starts_with('.'))
.unwrap_or(false)
}) {
continue;
}
}
if !is_directory && !ext_filter.is_empty() {
let file_ext = entry_path
.extension()
.and_then(|e| e.to_str())
.map(|e| format!(".{}", e));
let matches = file_ext
.as_ref()
.map(|fe| ext_filter.iter().any(|ef| ef == fe))
.unwrap_or(false);
if !matches {
continue;
}
}
if is_directory {
is_dir_set.insert(entry_path.clone());
}
if let Some(parent) = entry_path.parent() {
children_map
.entry(parent.to_path_buf())
.or_default()
.push(entry_path);
}
}
let tree = build_tree_from_map(
&root_path,
&root_path,
&mut children_map,
&is_dir_set,
!ext_filter.is_empty(),
0, effective_max_depth, );
match tree {
Some(t) => Ok(t),
None => {
let name = root_path
.file_name()
.map(|n| {
let lossy = n.to_string_lossy();
if lossy.contains('\u{FFFD}') {
warn!(
path = %root_path.display(),
"Root directory name contains non-UTF8 characters, using lossy conversion"
);
}
lossy.into_owned()
})
.unwrap_or_else(|| ".".to_string());
Ok(FileTreeEntry::new_dir(name, ".".to_string(), vec![]))
}
}
}
fn build_tree_from_map(
path: &Path,
root: &Path,
children_map: &mut HashMap<PathBuf, Vec<PathBuf>>,
is_dir_set: &HashSet<PathBuf>,
has_filter: bool,
current_depth: usize,
max_depth: usize,
) -> Option<FileTreeEntry> {
let is_directory = is_dir_set.contains(path);
if !is_directory {
let name = path
.file_name()
.map(|n| {
let lossy = n.to_string_lossy();
if lossy.contains('\u{FFFD}') {
warn!(
path = %path.display(),
"File name contains non-UTF8 characters, using lossy conversion"
);
}
lossy.into_owned()
})
.unwrap_or_else(|| "<invalid>".to_string());
let rel_path = path
.strip_prefix(root)
.map(|p| p.display().to_string())
.unwrap_or_default();
return Some(FileTreeEntry::new_file(name, rel_path));
}
if current_depth >= max_depth {
return Some(FileTreeEntry::depth_limit_reached(path, root));
}
let child_paths = children_map.remove(path).unwrap_or_default();
let mut children: Vec<FileTreeEntry> = Vec::with_capacity(child_paths.len());
for child_path in child_paths {
if let Some(child_entry) = build_tree_from_map(
&child_path,
root,
children_map,
is_dir_set,
has_filter,
current_depth + 1, max_depth,
) {
children.push(child_entry);
}
}
children.sort_by(|a, b| match (a.is_dir(), b.is_dir()) {
(true, false) => std::cmp::Ordering::Less,
(false, true) => std::cmp::Ordering::Greater,
_ => a.name.to_lowercase().cmp(&b.name.to_lowercase()),
});
if has_filter && children.is_empty() && path != root {
return None;
}
let name = path
.file_name()
.map(|n| {
let lossy = n.to_string_lossy();
if lossy.contains('\u{FFFD}') {
warn!(
path = %path.display(),
"Directory name contains non-UTF8 characters, using lossy conversion"
);
}
lossy.into_owned()
})
.unwrap_or_else(|| ".".to_string());
let rel_path = if path == root {
".".to_string()
} else {
path.strip_prefix(root)
.map(|p| p.display().to_string())
.unwrap_or_default()
};
Some(FileTreeEntry::new_dir(name, rel_path, children))
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::{self, File};
use std::io::Write;
use tempfile::TempDir;
fn create_test_tree() -> TempDir {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("tests")).unwrap();
fs::create_dir_all(root.join("docs")).unwrap();
fs::create_dir_all(root.join(".hidden")).unwrap();
fs::create_dir_all(root.join("node_modules/package")).unwrap();
File::create(root.join("src/main.py"))
.unwrap()
.write_all(b"print('hello')")
.unwrap();
File::create(root.join("src/utils.py"))
.unwrap()
.write_all(b"# utils")
.unwrap();
File::create(root.join("tests/test_main.py"))
.unwrap()
.write_all(b"# tests")
.unwrap();
File::create(root.join("docs/README.md"))
.unwrap()
.write_all(b"# README")
.unwrap();
File::create(root.join(".hidden/secret.py"))
.unwrap()
.write_all(b"# secret")
.unwrap();
File::create(root.join("node_modules/package/index.js"))
.unwrap()
.write_all(b"// index")
.unwrap();
temp_dir
}
#[test]
fn test_file_tree_no_filter() {
let temp_dir = create_test_tree();
let tree = file_tree(temp_dir.path().to_str().unwrap(), &[], false, false, None).unwrap();
assert!(tree.is_dir());
assert!(!tree.children.is_empty());
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(child_names.contains(&"src"));
assert!(child_names.contains(&"tests"));
assert!(child_names.contains(&"docs"));
assert!(!child_names.contains(&".hidden"));
assert!(!child_names.contains(&"node_modules"));
}
#[test]
fn test_file_tree_with_extension_filter() {
let temp_dir = create_test_tree();
let tree = file_tree(
temp_dir.path().to_str().unwrap(),
&[".py".to_string()],
false,
false,
None,
)
.unwrap();
assert!(tree.is_dir());
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(child_names.contains(&"src"));
assert!(child_names.contains(&"tests"));
assert!(!child_names.contains(&"docs")); }
#[test]
fn test_tree_structure_is_hierarchical() {
let temp_dir = create_test_tree();
let tree = file_tree(temp_dir.path().to_str().unwrap(), &[], false, false, None).unwrap();
let src_dir = tree.children.iter().find(|c| c.name == "src").unwrap();
assert!(src_dir.is_dir());
assert!(!src_dir.children.is_empty());
let src_files: Vec<&str> = src_dir.children.iter().map(|c| c.name.as_str()).collect();
assert!(src_files.contains(&"main.py"));
assert!(src_files.contains(&"utils.py"));
}
#[test]
fn test_sorting_dirs_first() {
let temp_dir = create_test_tree();
File::create(temp_dir.path().join("setup.py"))
.unwrap()
.write_all(b"# setup")
.unwrap();
let tree = file_tree(temp_dir.path().to_str().unwrap(), &[], false, false, None).unwrap();
let first_file_idx = tree
.children
.iter()
.position(|c| !c.is_dir())
.unwrap_or(tree.children.len());
let last_dir_idx = tree.children.iter().rposition(|c| c.is_dir()).unwrap_or(0);
assert!(
last_dir_idx < first_file_idx,
"Directories should come before files"
);
}
#[test]
fn test_file_tree_with_multiple_extension_filter() {
let temp_dir = create_test_tree();
let tree = file_tree(
temp_dir.path().to_str().unwrap(),
&[".py".to_string(), ".md".to_string()],
false,
false,
None,
)
.unwrap();
assert!(tree.is_dir());
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(
child_names.contains(&"src"),
"src should be present (contains .py)"
);
assert!(
child_names.contains(&"tests"),
"tests should be present (contains .py)"
);
assert!(
child_names.contains(&"docs"),
"docs should be present (contains .md)"
);
}
#[test]
fn test_file_tree_show_hidden() {
let temp_dir = create_test_tree();
let tree = file_tree(temp_dir.path().to_str().unwrap(), &[], true, false, None).unwrap();
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(
child_names.contains(&".hidden"),
".hidden should be present when show_hidden=true"
);
assert!(
!child_names.contains(&"node_modules"),
"node_modules should still be excluded"
);
}
#[cfg(unix)]
#[test]
fn test_symlink_outside_root_excluded() {
use std::os::unix::fs::symlink;
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
File::create(root.join("src/main.py"))
.unwrap()
.write_all(b"# main")
.unwrap();
let _ = symlink("/tmp", root.join("escape_link"));
let tree = file_tree(root.to_str().unwrap(), &[], false, false, None).unwrap();
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(
child_names.contains(&"src"),
"src should be present"
);
for child in &tree.children {
assert_ne!(
child.name, "escape_link",
"Symlink pointing outside root should be excluded"
);
}
}
#[cfg(unix)]
#[test]
fn test_symlink_inside_root_included() {
use std::os::unix::fs::symlink;
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("lib")).unwrap();
File::create(root.join("lib/utils.py"))
.unwrap()
.write_all(b"# utils")
.unwrap();
let _ = symlink(root.join("lib"), root.join("src/lib_link"));
let tree = file_tree(root.to_str().unwrap(), &[], false, false, None).unwrap();
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(child_names.contains(&"src"), "src should be present");
assert!(child_names.contains(&"lib"), "lib should be present");
}
#[cfg(unix)]
#[test]
fn test_symlink_cycle_detected_and_skipped() {
use std::os::unix::fs::symlink;
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("dir_a")).unwrap();
fs::create_dir_all(root.join("dir_b")).unwrap();
File::create(root.join("dir_a/file.py"))
.unwrap()
.write_all(b"# file in dir_a")
.unwrap();
let _ = symlink(root.join("dir_b"), root.join("dir_a/link_to_b"));
let _ = symlink(root.join("dir_a"), root.join("dir_b/link_to_a"));
let result = file_tree(root.to_str().unwrap(), &[], false, false, None);
assert!(result.is_ok(), "file_tree should handle symlink cycles gracefully");
let tree = result.unwrap();
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(child_names.contains(&"dir_a"), "dir_a should be present");
assert!(child_names.contains(&"dir_b"), "dir_b should be present");
}
#[cfg(unix)]
#[test]
fn test_symlink_self_cycle_detected() {
use std::os::unix::fs::symlink;
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("data")).unwrap();
File::create(root.join("data/file.py"))
.unwrap()
.write_all(b"# file")
.unwrap();
let _ = symlink(root.join("data"), root.join("data/self_link"));
let result = file_tree(root.to_str().unwrap(), &[], false, false, None);
assert!(result.is_ok(), "file_tree should handle self-referential symlinks");
let tree = result.unwrap();
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(child_names.contains(&"data"), "data directory should be present");
}
#[cfg(unix)]
#[test]
fn test_symlink_to_root_cycle_detected() {
use std::os::unix::fs::symlink;
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
File::create(root.join("src/main.py"))
.unwrap()
.write_all(b"# main")
.unwrap();
let _ = symlink(root, root.join("src/back_to_root"));
let result = file_tree(root.to_str().unwrap(), &[], false, false, None);
assert!(result.is_ok(), "file_tree should handle symlinks pointing to root");
let tree = result.unwrap();
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(child_names.contains(&"src"), "src directory should be present");
}
#[cfg(unix)]
#[test]
fn test_deep_symlink_chain_with_cycle() {
use std::os::unix::fs::symlink;
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("dir_a")).unwrap();
fs::create_dir_all(root.join("dir_b")).unwrap();
fs::create_dir_all(root.join("dir_c")).unwrap();
File::create(root.join("dir_a/a.py"))
.unwrap()
.write_all(b"# a")
.unwrap();
File::create(root.join("dir_b/b.py"))
.unwrap()
.write_all(b"# b")
.unwrap();
File::create(root.join("dir_c/c.py"))
.unwrap()
.write_all(b"# c")
.unwrap();
let _ = symlink(root.join("dir_b"), root.join("dir_a/link_to_b"));
let _ = symlink(root.join("dir_c"), root.join("dir_b/link_to_c"));
let _ = symlink(root.join("dir_a"), root.join("dir_c/link_to_a"));
let result = file_tree(root.to_str().unwrap(), &[], false, false, None);
assert!(result.is_ok(), "file_tree should handle 3-way symlink cycles");
let tree = result.unwrap();
let child_names: Vec<&str> = tree.children.iter().map(|c| c.name.as_str()).collect();
assert!(child_names.contains(&"dir_a"), "dir_a should be present");
assert!(child_names.contains(&"dir_b"), "dir_b should be present");
assert!(child_names.contains(&"dir_c"), "dir_c should be present");
}
#[test]
fn test_max_depth_limits_traversal() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("a/b/c/d/e")).unwrap();
File::create(root.join("a/b/c/d/e/file.py"))
.unwrap()
.write_all(b"# deep file")
.unwrap();
let tree = file_tree(root.to_str().unwrap(), &[], false, false, Some(2)).unwrap();
assert!(tree.is_dir());
let a_dir = tree.children.iter().find(|c| c.name == "a");
assert!(a_dir.is_some(), "Directory 'a' should be present at depth 1");
let a_dir = a_dir.unwrap();
let b_dir = a_dir.children.iter().find(|c| c.name == "b");
assert!(b_dir.is_some(), "Directory 'b' should be present at depth 2");
let b_dir = b_dir.unwrap();
assert!(
b_dir.depth_limited,
"Directory 'b' at depth 2 should be marked as depth_limited"
);
assert!(
b_dir.children.is_empty(),
"Directory 'b' should have no children due to depth limit"
);
}
#[test]
fn test_default_max_depth_allows_reasonable_nesting() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("a/b/c/d/e")).unwrap();
File::create(root.join("a/b/c/d/e/file.py"))
.unwrap()
.write_all(b"# deep file")
.unwrap();
let tree = file_tree(root.to_str().unwrap(), &[], false, false, None).unwrap();
let a = tree.children.iter().find(|c| c.name == "a").unwrap();
let b = a.children.iter().find(|c| c.name == "b").unwrap();
let c = b.children.iter().find(|c| c.name == "c").unwrap();
let d = c.children.iter().find(|c| c.name == "d").unwrap();
let e = d.children.iter().find(|c| c.name == "e").unwrap();
let file = e.children.iter().find(|c| c.name == "file.py");
assert!(file.is_some(), "file.py should be present at depth 5");
assert!(!a.depth_limited);
assert!(!b.depth_limited);
assert!(!c.depth_limited);
assert!(!d.depth_limited);
assert!(!e.depth_limited);
}
#[test]
fn test_max_depth_zero_returns_only_root() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
File::create(root.join("src/main.py"))
.unwrap()
.write_all(b"# main")
.unwrap();
let tree = file_tree(root.to_str().unwrap(), &[], false, false, Some(0)).unwrap();
assert!(tree.is_dir());
assert!(tree.depth_limited, "Root should be depth_limited when max_depth=0");
assert!(
tree.children.is_empty(),
"Root should have no children when max_depth=0"
);
}
#[test]
fn test_file_tree_entry_json_schema_has_type_field() {
let entry = FileTreeEntry::new_dir(
"test".to_string(),
"test".to_string(),
vec![FileTreeEntry::new_file("file.py".to_string(), "test/file.py".to_string())],
);
let json = serde_json::to_string(&entry).unwrap();
assert!(
json.contains(r#""type":"dir""#),
"JSON should contain type:dir, got: {}",
json
);
assert!(
!json.contains(r#""is_dir""#),
"JSON should NOT contain is_dir field, got: {}",
json
);
assert!(
json.contains(r#""type":"file""#),
"Child JSON should contain type:file, got: {}",
json
);
}
#[test]
fn test_file_tree_entry_json_roundtrip() {
let original = FileTreeEntry::new_dir(
"project".to_string(),
".".to_string(),
vec![
FileTreeEntry::new_dir("src".to_string(), "src".to_string(), vec![]),
FileTreeEntry::new_file("main.py".to_string(), "main.py".to_string()),
],
);
let json = serde_json::to_string(&original).unwrap();
let deserialized: FileTreeEntry = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.name, "project");
assert_eq!(deserialized.entry_type, "dir");
assert!(deserialized.is_dir());
assert_eq!(deserialized.children.len(), 2);
let src = deserialized.children.iter().find(|c| c.name == "src").unwrap();
assert!(src.is_dir());
assert_eq!(src.entry_type, "dir");
let main = deserialized.children.iter().find(|c| c.name == "main.py").unwrap();
assert!(!main.is_dir());
assert_eq!(main.entry_type, "file");
}
#[test]
fn test_file_tree_entry_python_compatible_json() {
let entry = FileTreeEntry::new_file("test.py".to_string(), "src/test.py".to_string());
let json = serde_json::to_string(&entry).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["name"], "test.py");
assert_eq!(parsed["type"], "file");
assert_eq!(parsed["path"], "src/test.py");
assert!(parsed.get("children").is_none() || parsed["children"].as_array().unwrap().is_empty());
}
}