use crate::error::StorageError;
use crate::tree::hasher;
use crate::tree::node::{DirectoryNode, FileNode, MerkleNode};
use crate::tree::path;
use crate::tree::walker::{Entry, Walker, WalkerConfig};
use crate::types::NodeID;
use hex;
use std::collections::{BTreeMap, HashMap};
use std::path::{Path, PathBuf};
use std::time::Instant;
use tracing::{debug, error, info, instrument, trace, warn};
#[derive(Debug, Clone)]
pub struct Tree {
pub root_id: NodeID,
pub nodes: HashMap<NodeID, MerkleNode>,
parent_map: HashMap<NodeID, NodeID>,
}
impl Tree {
pub fn find_parent(&self, node_id: &NodeID) -> Option<NodeID> {
self.parent_map.get(node_id).copied()
}
pub fn get_children(&self, node_id: &NodeID) -> Vec<NodeID> {
match self.nodes.get(node_id) {
Some(MerkleNode::Directory(dir)) => {
dir.children.iter().map(|(_, child_id)| *child_id).collect()
}
_ => vec![],
}
}
pub fn find_gitignore_node_id(&self) -> Option<NodeID> {
use std::ffi::OsStr;
for (node_id, node) in &self.nodes {
let path = match node {
MerkleNode::File(f) => &f.path,
MerkleNode::Directory(d) => &d.path,
};
if path.file_name() == Some(OsStr::new(".gitignore")) {
return Some(*node_id);
}
}
None
}
}
pub struct TreeBuilder {
root: PathBuf,
walker_config: Option<WalkerConfig>,
}
impl TreeBuilder {
pub fn new(root: PathBuf) -> Self {
Self {
root,
walker_config: None,
}
}
pub fn with_walker_config(mut self, config: WalkerConfig) -> Self {
self.walker_config = Some(config);
self
}
#[instrument(skip(self), fields(workspace = %self.root.display()))]
pub fn build(&self) -> Result<Tree, StorageError> {
let start = Instant::now();
info!("Starting tree build");
let walker = match &self.walker_config {
Some(config) => Walker::with_config(self.root.clone(), config.clone()),
None => Walker::new(self.root.clone()),
};
let entries = match walker.walk() {
Ok(e) => {
debug!(entry_count = e.len(), "Walked filesystem");
e
}
Err(e) => {
error!("Filesystem walk failed: {}", e);
return Err(e);
}
};
let mut files = Vec::new();
let mut directories = Vec::new();
for entry in entries {
match entry {
Entry::File { path, size } => files.push((path, size)),
Entry::Directory { path } => directories.push(path),
}
}
let mut node_map: HashMap<PathBuf, NodeID> = HashMap::new();
let mut nodes: HashMap<NodeID, MerkleNode> = HashMap::new();
for (file_path, size) in files {
let (node_id, file_node) = self.hash_file(&file_path, size)?;
let canonical_path = path::canonicalize_path(&file_path)?;
node_map.insert(canonical_path, node_id);
nodes.insert(node_id, MerkleNode::File(file_node));
}
if !directories.contains(&self.root) {
directories.push(self.root.clone());
}
directories.sort_by(|a, b| {
let depth_a = a.components().count();
let depth_b = b.components().count();
depth_b.cmp(&depth_a) });
for dir_path in directories {
let (node_id, dir_node) = self.hash_directory(&dir_path, &node_map)?;
let canonical_path = path::canonicalize_path(&dir_path)?;
node_map.insert(canonical_path, node_id);
nodes.insert(node_id, MerkleNode::Directory(dir_node));
}
let mut parent_map: HashMap<NodeID, NodeID> = HashMap::new();
for (node_id, node) in &nodes {
if let MerkleNode::Directory(dir) = node {
for (_, child_node_id) in &dir.children {
parent_map.insert(*child_node_id, *node_id);
}
}
}
let canonical_root = path::canonicalize_path(&self.root)?;
let root_id = node_map.get(&canonical_root).copied().ok_or_else(|| {
error!("Root directory not found in node map: {:?}", canonical_root);
StorageError::InvalidPath(format!(
"Root directory not found in node map: {:?}",
canonical_root
))
})?;
let duration = start.elapsed();
info!(
node_count = nodes.len(),
root_id = %hex::encode(root_id),
duration_ms = duration.as_millis(),
"Tree build completed"
);
Ok(Tree {
root_id,
nodes,
parent_map,
})
}
pub fn compute_root(&self) -> Result<NodeID, StorageError> {
let tree = self.build()?;
Ok(tree.root_id)
}
#[instrument(skip(self), fields(path = %file_path.display()))]
fn hash_file(&self, file_path: &Path, size: u64) -> Result<(NodeID, FileNode), StorageError> {
trace!("Hashing file");
let content = std::fs::read(file_path).map_err(|e| {
error!("Failed to read file: {}", e);
StorageError::IoError(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to read file {:?}: {}", file_path, e),
))
})?;
let content_hash = hasher::compute_content_hash(&content);
trace!(content_hash = %hex::encode(content_hash), "Computed content hash");
let metadata = BTreeMap::new();
let node_id = hasher::compute_file_node_id(file_path, &content_hash, &metadata)?;
let file_node = FileNode {
path: file_path.to_path_buf(),
content_hash,
size,
metadata,
};
Ok((node_id, file_node))
}
fn hash_directory(
&self,
dir_path: &Path,
node_map: &HashMap<PathBuf, NodeID>,
) -> Result<(NodeID, DirectoryNode), StorageError> {
let dir_entries = std::fs::read_dir(dir_path).map_err(|e| {
StorageError::IoError(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to read directory {:?}: {}", dir_path, e),
))
})?;
let mut children = Vec::new();
for entry in dir_entries {
let entry = entry.map_err(|e| {
StorageError::IoError(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to read directory entry in {:?}: {}", dir_path, e),
))
})?;
let child_path = entry.path();
let child_name = entry.file_name().to_string_lossy().to_string();
let canonical_child_path = match path::canonicalize_path(&child_path) {
Ok(p) => p,
Err(_) => {
continue;
}
};
if let Some(&child_node_id) = node_map.get(&canonical_child_path) {
children.push((child_name, child_node_id));
} else {
continue;
}
}
children.sort_by(|a, b| a.0.cmp(&b.0));
let metadata = BTreeMap::new();
let node_id = hasher::compute_directory_node_id(dir_path, &children, &metadata)?;
let dir_node = DirectoryNode {
path: dir_path.to_path_buf(),
children,
metadata,
};
Ok((node_id, dir_node))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_build_tree_single_file() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("test.txt"), "test content").unwrap();
let builder = TreeBuilder::new(root.clone());
let tree = builder.build().unwrap();
assert_eq!(tree.nodes.len(), 2);
assert!(tree.nodes.contains_key(&tree.root_id));
}
#[test]
fn test_build_tree_multiple_files() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("file1.txt"), "content1").unwrap();
fs::write(root.join("file2.txt"), "content2").unwrap();
let builder = TreeBuilder::new(root.clone());
let tree = builder.build().unwrap();
assert_eq!(tree.nodes.len(), 3);
}
#[test]
fn test_build_tree_nested_directories() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::create_dir(root.join("dir1")).unwrap();
fs::write(root.join("dir1").join("file.txt"), "content").unwrap();
fs::write(root.join("file.txt"), "root content").unwrap();
let builder = TreeBuilder::new(root.clone());
let tree = builder.build().unwrap();
assert!(tree.nodes.len() >= 4);
}
#[test]
fn test_compute_root_deterministic() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("test.txt"), "test content").unwrap();
let builder = TreeBuilder::new(root.clone());
let root1 = builder.compute_root().unwrap();
let root2 = builder.compute_root().unwrap();
assert_eq!(root1, root2);
}
#[test]
fn test_compute_root_changes_with_content() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("test.txt"), "content1").unwrap();
let builder = TreeBuilder::new(root.clone());
let root1 = builder.compute_root().unwrap();
fs::write(root.join("test.txt"), "content2").unwrap();
let root2 = builder.compute_root().unwrap();
assert_ne!(root1, root2);
}
#[test]
fn test_compute_root_changes_with_structure() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("file1.txt"), "content").unwrap();
let builder = TreeBuilder::new(root.clone());
let root1 = builder.compute_root().unwrap();
fs::write(root.join("file2.txt"), "content").unwrap();
let root2 = builder.compute_root().unwrap();
assert_ne!(root1, root2);
}
}