use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Path;
use indicatif::{ProgressBar, ProgressStyle};
use rocksdb::{DBWithThreadMode, SingleThreaded};
use std::path::PathBuf;
use std::str;
use std::time::Duration;
use std::time::Instant;
use time::OffsetDateTime;
use crate::config::UserConfig;
use crate::constants::DEFAULT_BRANCH_NAME;
use crate::constants::MERGE_HEAD_FILE;
use crate::constants::ORIG_HEAD_FILE;
use crate::constants::{HEAD_FILE, STAGED_DIR};
use crate::core::db;
use crate::core::db::key_val::str_val_db;
use crate::core::db::merkle_node::MerkleNodeDB;
use crate::core::refs::with_ref_manager;
use crate::core::v_latest::index::CommitMerkleTree;
use crate::core::v_latest::status;
use crate::error::OxenError;
use crate::model::MerkleHash;
use crate::model::MerkleTreeNodeType;
use crate::model::NewCommit;
use crate::model::NewCommitBody;
use crate::model::User;
use crate::model::merkle_tree::node::EMerkleTreeNode;
use crate::model::merkle_tree::node::StagedMerkleTreeNode;
use crate::model::merkle_tree::node::VNode;
use crate::model::merkle_tree::node::commit_node::CommitNodeOpts;
use crate::model::merkle_tree::node::dir_node::DirNodeOpts;
use crate::model::merkle_tree::node::vnode::VNodeOpts;
use crate::model::{Commit, LocalRepository, StagedEntryStatus};
use crate::util::hasher;
use crate::{repositories, util};
use crate::model::merkle_tree::node::MerkleTreeNode;
use crate::model::merkle_tree::node::{CommitNode, DirNode};
#[derive(Clone)]
pub struct EntryVNode {
pub id: MerkleHash,
pub entries: Vec<StagedMerkleTreeNode>,
pub removed_entries: Vec<StagedMerkleTreeNode>,
}
impl std::fmt::Debug for EntryVNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"EntryVNode {{ id: {:?}, entries: {} }}",
self.id,
self.entries
.iter()
.map(|e| e.node.to_string())
.collect::<Vec<String>>()
.join(", ")
)
}
}
impl EntryVNode {
pub fn new(id: MerkleHash) -> Self {
EntryVNode {
id,
entries: vec![],
removed_entries: vec![],
}
}
}
pub fn commit(repo: &LocalRepository, message: impl AsRef<str>) -> Result<Commit, OxenError> {
let cfg = UserConfig::get()?;
commit_with_cfg(repo, message, &cfg, None)
}
pub fn commit_with_parent_ids(
repo: &LocalRepository,
message: impl AsRef<str>,
parent_ids: Vec<String>,
) -> Result<Commit, OxenError> {
let cfg = UserConfig::get()?;
commit_with_cfg(repo, message, &cfg, Some(parent_ids))
}
pub fn commit_with_user(
repo: &LocalRepository,
message: impl AsRef<str>,
user: &User,
) -> Result<Commit, OxenError> {
let cfg = UserConfig {
name: user.name.clone(),
email: user.email.clone(),
editor: None,
};
commit_with_cfg(repo, message, &cfg, None)
}
pub fn commit_with_cfg(
repo: &LocalRepository,
message: impl AsRef<str>,
cfg: &UserConfig,
parent_ids: Option<Vec<String>>,
) -> Result<Commit, OxenError> {
let start_time = Instant::now();
let message = message.as_ref();
let opts = db::key_val::opts::default();
let staged_db_path = util::fs::oxen_hidden_dir(&repo.path).join(STAGED_DIR);
log::debug!("commit_with_cfg staged db path: {staged_db_path:?}");
let staged_db: DBWithThreadMode<SingleThreaded> =
DBWithThreadMode::open(&opts, dunce::simplified(&staged_db_path))?;
let commit_progress_bar = ProgressBar::new_spinner();
commit_progress_bar.set_style(ProgressStyle::default_spinner());
commit_progress_bar.enable_steady_tick(Duration::from_millis(100));
let (dir_entries, total_changes) =
status::read_staged_entries(repo, &staged_db, &commit_progress_bar)?;
commit_progress_bar.set_message(format!("Committing {total_changes} changes"));
log::debug!("got dir entries: {:?}", dir_entries.len());
if dir_entries.is_empty() {
return Err(OxenError::basic_str("No changes to commit"));
}
let new_commit = NewCommitBody {
message: message.to_string(),
author: cfg.name.clone(),
email: cfg.email.clone(),
};
let branch = repositories::branches::current_branch(repo)?;
let maybe_branch_name = branch.map(|b| b.name);
let commit = if let Some(parent_ids) = parent_ids {
log::debug!("parent ids: {parent_ids:?}");
commit_dir_entries_with_parents(
repo,
parent_ids,
dir_entries,
&new_commit,
&commit_progress_bar,
maybe_branch_name
.clone()
.unwrap_or(DEFAULT_BRANCH_NAME.to_string()),
)?
} else {
log::debug!("no parent ids, committing new");
commit_dir_entries_new(repo, dir_entries, &new_commit, &commit_progress_bar)?
};
let staged_db_path = staged_db.path().to_owned();
drop(staged_db);
util::fs::remove_dir_all(staged_db_path)?;
let head_path = util::fs::oxen_hidden_dir(&repo.path).join(HEAD_FILE);
log::debug!("Looking for HEAD file at {head_path:?}");
let commit_id = commit.id.to_owned();
let branch_name = maybe_branch_name.unwrap_or(DEFAULT_BRANCH_NAME.to_string());
let head_path_exists = head_path.exists();
with_ref_manager(repo, |manager| {
if !head_path_exists {
log::debug!("HEAD file does not exist, creating new branch");
manager.set_head(&branch_name);
manager.set_branch_commit_id(&branch_name, &commit_id)?;
}
manager.set_head_commit_id(&commit_id)
})?;
println!(
"🐂 commit {} in {}",
commit,
humantime::format_duration(Duration::from_millis(
start_time.elapsed().as_millis() as u64
))
);
Ok(commit)
}
pub fn commit_dir_entries_with_parents(
repo: &LocalRepository,
parent_commits: Vec<String>,
dir_entries: HashMap<PathBuf, Vec<StagedMerkleTreeNode>>,
new_commit: &NewCommitBody,
commit_progress_bar: &ProgressBar,
target_branch: impl AsRef<str>,
) -> Result<Commit, OxenError> {
let message = &new_commit.message;
let target_branch = target_branch.as_ref();
let head_path = util::fs::oxen_hidden_dir(&repo.path).join(HEAD_FILE);
let maybe_head_commit = if head_path.exists() {
repositories::revisions::get(repo, target_branch)?
} else {
None
};
let directories = dir_entries
.keys()
.map(|path| path.to_path_buf())
.collect::<Vec<_>>();
log::debug!("collecting existing nodes for directories: {directories:?}");
let mut existing_nodes: HashMap<PathBuf, MerkleTreeNode> = HashMap::new();
if let Some(commit) = &maybe_head_commit {
existing_nodes = repositories::tree::list_nodes_from_paths(repo, commit, &directories)?;
}
log::debug!(
"existing nodes (count: {}) {:?}",
existing_nodes.len(),
existing_nodes.keys()
);
let vnode_entries = split_into_vnodes(repo, &dir_entries, &existing_nodes, new_commit)?;
let timestamp = OffsetDateTime::now_utc();
let new_commit = create_commit_data(repo, message, timestamp, parent_commits, new_commit)?;
let commit_id = compute_commit_id(&new_commit)?;
let mut parent_hashes = Vec::new();
for parent_id in &new_commit.parent_ids {
if let Some(parent_commit) = repositories::commits::get_by_id(repo, parent_id)? {
let node = CommitMerkleTree::from_commit(repo, &parent_commit)?;
parent_hashes.push(node.root.hash);
}
}
let node = CommitNode::new(
repo,
CommitNodeOpts {
hash: commit_id,
parent_ids: parent_hashes,
email: new_commit.email.clone(),
author: new_commit.author.clone(),
message: message.to_string(),
timestamp,
},
)?;
let opts = db::key_val::opts::default();
let commit_id_string = format!("{commit_id}").to_string();
let dir_hash_db_path =
CommitMerkleTree::dir_hash_db_path_from_commit_id(repo, &commit_id_string);
let dir_hash_db: DBWithThreadMode<SingleThreaded> =
DBWithThreadMode::open(&opts, dunce::simplified(&dir_hash_db_path))?;
let (dir_hashes, parent_id) = match &maybe_head_commit {
Some(commit) => (
CommitMerkleTree::dir_hashes(repo, commit)?,
Some(commit.hash()?),
),
None => (HashMap::new(), None),
};
for (path, hash) in &dir_hashes {
if let Some(path_str) = path.to_str() {
str_val_db::put(&dir_hash_db, path_str, &hash.to_string())?;
} else {
log::error!("Failed to convert path to string: {path:?}");
}
}
let mut commit_db = MerkleNodeDB::open_read_write(repo, &node, parent_id)?;
write_commit_entries(
repo,
commit_id,
&mut commit_db,
&dir_hash_db,
&dir_hashes,
&vnode_entries,
)?;
commit_progress_bar.finish_and_clear();
Ok(node.to_commit())
}
pub fn commit_dir_entries_new(
repo: &LocalRepository,
dir_entries: HashMap<PathBuf, Vec<StagedMerkleTreeNode>>,
new_commit: &NewCommitBody,
commit_progress_bar: &ProgressBar,
) -> Result<Commit, OxenError> {
let message = &new_commit.message;
let maybe_head_commit = repositories::commits::head_commit_maybe(repo)?;
let mut parent_ids = vec![];
if let Some(parent) = &maybe_head_commit {
parent_ids.push(parent.hash()?);
}
let directories = dir_entries
.keys()
.map(|path| path.to_path_buf())
.collect::<Vec<_>>();
log::debug!("new commit directories: {directories:?}");
let mut existing_nodes: HashMap<PathBuf, MerkleTreeNode> = HashMap::new();
if let Some(commit) = &maybe_head_commit {
existing_nodes = repositories::tree::list_nodes_from_paths(repo, commit, &directories)?;
}
let vnode_entries = split_into_vnodes(repo, &dir_entries, &existing_nodes, new_commit)?;
let timestamp = OffsetDateTime::now_utc();
let new_commit = create_commit_data(
repo,
message,
timestamp,
parent_ids.iter().map(|id| id.to_string()).collect(),
new_commit,
)?;
let commit_id = compute_commit_id(&new_commit)?;
let node = CommitNode::new(
repo,
CommitNodeOpts {
hash: commit_id,
parent_ids: new_commit
.parent_ids
.iter()
.map(|id: &String| id.parse().unwrap())
.collect(),
email: new_commit.email.clone(),
author: new_commit.author.clone(),
message: message.to_string(),
timestamp,
},
)?;
let opts = db::key_val::opts::default();
let commit_id_string = format!("{commit_id}").to_string();
let dir_hash_db_path =
CommitMerkleTree::dir_hash_db_path_from_commit_id(repo, &commit_id_string);
let dir_hash_db: DBWithThreadMode<SingleThreaded> =
DBWithThreadMode::open(&opts, dunce::simplified(&dir_hash_db_path))?;
let (dir_hashes, parent_id) = match &maybe_head_commit {
Some(commit) => (
CommitMerkleTree::dir_hashes(repo, commit)?,
Some(commit.hash()?),
),
None => (HashMap::new(), None),
};
for (path, hash) in &dir_hashes {
if let Some(path_str) = path.to_str() {
str_val_db::put(&dir_hash_db, path_str, &hash.to_string())?;
} else {
log::error!("Failed to convert path to string: {path:?}");
}
}
let mut commit_db = MerkleNodeDB::open_read_write(repo, &node, parent_id)?;
write_commit_entries(
repo,
commit_id,
&mut commit_db,
&dir_hash_db,
&dir_hashes,
&vnode_entries,
)?;
commit_progress_bar.finish_and_clear();
cleanup_rm_dirs(&dir_hash_db, &dir_entries)?;
Ok(node.to_commit())
}
pub fn commit_dir_entries(
repo: &LocalRepository,
dir_entries: HashMap<PathBuf, Vec<StagedMerkleTreeNode>>,
new_commit: &NewCommitBody,
target_branch: impl AsRef<str>,
commit_progress_bar: &ProgressBar,
) -> Result<Commit, OxenError> {
log::debug!("commit_dir_entries got {} entries", dir_entries.len());
if log::max_level() == log::Level::Debug {
for (path, entries) in &dir_entries {
log::debug!(
"commit_dir_entries entry {:?} with {} nodes",
path,
entries.len()
);
}
}
if dir_entries.is_empty() {
return Err(OxenError::basic_str("No changes to commit"));
}
let message = &new_commit.message;
let head_path = util::fs::oxen_hidden_dir(&repo.path).join(HEAD_FILE);
let maybe_head_commit = if head_path.exists() {
repositories::revisions::get(repo, target_branch)?
} else {
None
};
let mut parent_ids = vec![];
if let Some(parent) = &maybe_head_commit {
parent_ids.push(parent.hash()?);
}
let directories = dir_entries
.keys()
.map(|path| path.to_path_buf())
.collect::<Vec<_>>();
log::debug!("commit_dir_entries directories: {directories:?}");
let mut existing_nodes: HashMap<PathBuf, MerkleTreeNode> = HashMap::new();
if let Some(commit) = &maybe_head_commit {
existing_nodes = repositories::tree::list_nodes_from_paths(repo, commit, &directories)?;
}
let vnode_entries = split_into_vnodes(repo, &dir_entries, &existing_nodes, new_commit)?;
let timestamp = OffsetDateTime::now_utc();
let new_commit = NewCommit {
parent_ids: parent_ids.iter().map(|id| id.to_string()).collect(),
message: message.to_string(),
author: new_commit.author.clone(),
email: new_commit.email.clone(),
timestamp,
};
let commit_id = compute_commit_id(&new_commit)?;
let node = CommitNode::new(
repo,
CommitNodeOpts {
hash: commit_id,
parent_ids,
email: new_commit.email.clone(),
author: new_commit.author.clone(),
message: message.to_string(),
timestamp,
},
)?;
let opts = db::key_val::opts::default();
let commit_id_string = format!("{commit_id}").to_string();
let dir_hash_db_path =
CommitMerkleTree::dir_hash_db_path_from_commit_id(repo, &commit_id_string);
let dir_hash_db: DBWithThreadMode<SingleThreaded> =
DBWithThreadMode::open(&opts, dunce::simplified(&dir_hash_db_path))?;
let dir_hashes = match &maybe_head_commit {
Some(commit) => CommitMerkleTree::dir_hashes(repo, commit)?,
None => HashMap::new(),
};
for (path, hash) in &dir_hashes {
if let Some(path_str) = path.to_str() {
str_val_db::put(&dir_hash_db, path_str, &hash.to_owned().to_string())?;
} else {
log::error!("Failed to convert path to string: {path:?}");
}
}
let mut commit_db = MerkleNodeDB::open_read_write(repo, &node, None)?;
write_commit_entries(
repo,
commit_id,
&mut commit_db,
&dir_hash_db,
&dir_hashes,
&vnode_entries,
)?;
commit_progress_bar.finish_and_clear();
cleanup_rm_dirs(&dir_hash_db, &dir_entries)?;
Ok(node.to_commit())
}
fn cleanup_rm_dirs(
dir_hash_db: &DBWithThreadMode<SingleThreaded>,
dir_entries: &HashMap<PathBuf, Vec<StagedMerkleTreeNode>>,
) -> Result<(), OxenError> {
for (_path, entries) in dir_entries.iter() {
for entry in entries.iter() {
if let EMerkleTreeNode::Directory(dir_node) = &entry.node.node
&& entry.status == StagedEntryStatus::Removed
{
let dir_path = PathBuf::from(dir_node.name());
log::debug!("dir path for cleanup: {dir_path:?}");
let key = dir_path.to_str().unwrap();
dir_hash_db.delete(key)?;
}
}
}
Ok(())
}
fn node_data_to_staged_node(
base_dir: impl AsRef<Path>,
node: &MerkleTreeNode,
) -> Result<Option<StagedMerkleTreeNode>, OxenError> {
let base_dir = base_dir.as_ref();
match node.node.node_type() {
MerkleTreeNodeType::Dir => {
let mut dir_node = node.dir()?;
let path = base_dir.join(dir_node.name());
dir_node.set_name(path.to_str().unwrap());
Ok(Some(StagedMerkleTreeNode {
status: StagedEntryStatus::Unmodified,
node: MerkleTreeNode::from_dir(dir_node),
}))
}
MerkleTreeNodeType::File => {
let mut file_node = node.file()?;
let path = base_dir.join(file_node.name());
file_node.set_name(path.to_str().unwrap());
Ok(Some(StagedMerkleTreeNode {
status: StagedEntryStatus::Unmodified,
node: MerkleTreeNode::from_file(file_node),
}))
}
_ => Ok(None),
}
}
fn get_node_dir_children(
base_dir: impl AsRef<Path>,
node: &MerkleTreeNode,
) -> Result<HashSet<StagedMerkleTreeNode>, OxenError> {
let dir_children = repositories::tree::list_files_and_folders(node)?;
let children = dir_children
.into_iter()
.flat_map(|child| node_data_to_staged_node(&base_dir, &child))
.flatten()
.collect();
Ok(children)
}
#[allow(clippy::type_complexity)]
fn split_into_vnodes(
repo: &LocalRepository,
entries: &HashMap<PathBuf, Vec<StagedMerkleTreeNode>>,
existing_nodes: &HashMap<PathBuf, MerkleTreeNode>,
new_commit: &NewCommitBody,
) -> Result<HashMap<PathBuf, (Vec<EntryVNode>, Vec<StagedMerkleTreeNode>)>, OxenError> {
let mut results: HashMap<PathBuf, (Vec<EntryVNode>, Vec<StagedMerkleTreeNode>)> =
HashMap::new();
if log::max_level() == log::Level::Debug {
log::debug!("split_into_vnodes new_commit: {:?}", new_commit.message);
log::debug!("split_into_vnodes entries keys: {:?}", entries.keys());
log::debug!(
"split_into_vnodes existing_nodes keys: {:?}",
existing_nodes.keys()
);
}
for (directory, new_children) in entries {
let mut children = HashSet::new();
let mut removed_children = HashSet::new();
if let Some(existing_node) = existing_nodes.get(directory) {
log::debug!("got existing node for {directory:?}");
children = get_node_dir_children(directory, existing_node)?;
log::debug!(
"got {} existing children for dir {:?}",
children.len(),
directory
);
} else {
log::debug!("no existing node for {directory:?}");
};
log::debug!("new_children {}", new_children.len());
for child in new_children.iter() {
log::debug!(
"new_child {:?} {:?}",
child.node.node.node_type(),
child.node.maybe_path()
);
if let Ok(path) = child.node.maybe_path()
&& path != Path::new("")
{
match child.status {
StagedEntryStatus::Removed => {
log::debug!(
"removing child {:?} {:?} with {:?}",
child.node.node.node_type(),
path,
child.node.maybe_path().unwrap()
);
children.remove(child);
removed_children.insert(child.to_owned());
}
_ => {
log::debug!(
"replacing child {:?} {:?} with {:?}",
child.node.node.node_type(),
path,
child.node.maybe_path().unwrap()
);
log::debug!("replaced child {}", child.node);
children.replace(child.clone());
}
}
}
}
if log::max_level() == log::Level::Debug {
for child in children.iter() {
log::debug!(
"child populated {:?} {:?} status {:?}",
child.node.node.node_type(),
child.node.maybe_path().unwrap(),
child.status
);
}
}
let total_children = children.len();
let vnode_size = repo.vnode_size();
let num_vnodes = (total_children as f32 / vnode_size as f32).ceil() as u128;
log::debug!(
"{num_vnodes} VNodes for {total_children} children in {directory:?} with vnode size {vnode_size}"
);
let mut vnode_children: Vec<EntryVNode> =
vec![EntryVNode::new(MerkleHash::new(0)); num_vnodes as usize];
for child in children.into_iter() {
let bucket = hasher::hash_buffer_128bit(
child
.node
.maybe_path()
.unwrap()
.to_str()
.unwrap()
.as_bytes(),
) % num_vnodes;
vnode_children[bucket as usize].entries.push(child.clone());
}
for vnode in vnode_children.iter_mut() {
vnode.entries.sort_by(|a, b| {
a.node
.maybe_path()
.unwrap()
.cmp(&b.node.maybe_path().unwrap())
});
let mut vnode_hasher = xxhash_rust::xxh3::Xxh3::new();
vnode_hasher.update(b"vnode");
vnode_hasher.update(directory.to_str().unwrap().as_bytes());
let mut has_new_entries = false;
for entry in vnode.entries.iter() {
if let EMerkleTreeNode::File(file_node) = &entry.node.node {
vnode_hasher.update(&file_node.combined_hash().to_le_bytes());
} else {
vnode_hasher.update(&entry.node.hash.to_le_bytes());
}
if entry.status != StagedEntryStatus::Unmodified {
has_new_entries = true;
}
}
if existing_nodes.contains_key(directory) && has_new_entries {
let uuid = uuid::Uuid::new_v4();
vnode_hasher.update(uuid.as_bytes());
}
vnode.id = MerkleHash::new(vnode_hasher.digest128());
}
let removed_children = removed_children.iter().cloned().collect();
results.insert(directory.to_owned(), (vnode_children, removed_children));
}
log::debug!(
"split_into_vnodes results: {:?} for commit {}",
results.len(),
new_commit.message
);
if log::max_level() == log::Level::Debug {
for (dir, (vnodes, _)) in results.iter_mut() {
log::debug!("dir {:?} has {} vnodes", dir, vnodes.len());
for vnode in vnodes.iter_mut() {
log::debug!(" vnode {} has {} entries", vnode.id, vnode.entries.len());
for entry in vnode.entries.iter() {
log::debug!(
" entry {:?} [{}] `{:?}` with status {:?}",
entry.node.node.node_type(),
entry.node.node.hash(),
entry.node.maybe_path(),
entry.status
);
}
}
}
}
Ok(results)
}
pub fn compute_commit_id(new_commit: &NewCommit) -> Result<MerkleHash, OxenError> {
let mut hasher = xxhash_rust::xxh3::Xxh3::new();
hasher.update(b"commit");
hasher.update(format!("{:?}", new_commit.parent_ids).as_bytes());
hasher.update(new_commit.message.as_bytes());
hasher.update(new_commit.author.as_bytes());
hasher.update(new_commit.email.as_bytes());
hasher.update(&new_commit.timestamp.unix_timestamp().to_le_bytes());
Ok(MerkleHash::new(hasher.digest128()))
}
fn write_commit_entries(
repo: &LocalRepository,
commit_id: MerkleHash,
commit_db: &mut MerkleNodeDB,
dir_hash_db: &DBWithThreadMode<SingleThreaded>,
dir_hashes: &HashMap<PathBuf, MerkleHash>,
entries: &HashMap<PathBuf, (Vec<EntryVNode>, Vec<StagedMerkleTreeNode>)>,
) -> Result<(), OxenError> {
let mut total_written = 0;
let root_path = PathBuf::from("");
let dir_node = compute_dir_node(repo, commit_id, entries, dir_hashes, &root_path)?;
commit_db.add_child(&dir_node)?;
total_written += 1;
str_val_db::put(
dir_hash_db,
root_path.to_str().unwrap(),
&dir_node.hash().to_string(),
)?;
let dir_db = MerkleNodeDB::open_read_write(repo, &dir_node, Some(commit_id))?;
r_create_dir_node(
repo,
commit_id,
&mut Some(dir_db),
dir_hash_db,
dir_hashes,
entries,
root_path,
&mut total_written,
)?;
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn r_create_dir_node(
repo: &LocalRepository,
commit_id: MerkleHash,
maybe_dir_db: &mut Option<MerkleNodeDB>,
dir_hash_db: &DBWithThreadMode<SingleThreaded>,
dir_hashes: &HashMap<PathBuf, MerkleHash>,
entries: &HashMap<PathBuf, (Vec<EntryVNode>, Vec<StagedMerkleTreeNode>)>,
path: impl AsRef<Path>,
total_written: &mut u64,
) -> Result<(), OxenError> {
let path = path.as_ref().to_path_buf();
let keys = entries.keys();
log::debug!("r_create_dir_node path {path:?} keys: {keys:?}");
let Some((vnodes, _)) = entries.get(&path) else {
log::debug!("r_create_dir_node No entries found for directory {path:?}");
return Ok(());
};
log::debug!("Processing dir {:?} with {} vnodes", path, vnodes.len());
for vnode in vnodes.iter() {
let opts = VNodeOpts {
hash: vnode.id,
num_entries: vnode.entries.len() as u64,
};
let vnode_obj = VNode::new(repo, opts)?;
if let Some(dir_db) = maybe_dir_db {
dir_db.add_child(&vnode_obj)?;
*total_written += 1;
}
let mut vnode_db = MerkleNodeDB::open_read_write(
repo,
&vnode_obj,
maybe_dir_db.as_ref().map(|db| db.node_id),
)?;
for entry in vnode.entries.iter() {
match &entry.node.node {
EMerkleTreeNode::Directory(node) => {
let dir_path = entry.node.maybe_path().unwrap();
let dir_node = if entries.contains_key(&dir_path) {
let dir_node =
compute_dir_node(repo, commit_id, entries, dir_hashes, &dir_path)?;
vnode_db.add_child(&dir_node)?;
*total_written += 1;
let mut child_db = Some(MerkleNodeDB::open_read_write(
repo,
&dir_node,
Some(vnode.id),
)?);
r_create_dir_node(
repo,
commit_id,
&mut child_db,
dir_hash_db,
dir_hashes,
entries,
&dir_path,
total_written,
)?;
dir_node
} else {
let Some(old_dir_node) =
CommitMerkleTree::read_node(repo, node.hash(), false)?
else {
continue;
};
let dir_node = old_dir_node.dir()?;
vnode_db.add_child(&dir_node)?;
*total_written += 1;
dir_node
};
str_val_db::put(
dir_hash_db,
dir_path.to_str().unwrap(),
&dir_node.hash().to_string(),
)?;
}
EMerkleTreeNode::File(file_node) => {
let mut file_node = file_node.clone();
let file_path = PathBuf::from(&file_node.name());
let file_name = file_path.file_name().unwrap().to_str().unwrap();
let chunks = vec![file_node.hash().to_u128()];
file_node.set_chunk_hashes(chunks);
let last_commit_id = if entry.status == StagedEntryStatus::Unmodified {
*file_node.last_commit_id()
} else {
commit_id
};
file_node.set_last_commit_id(&last_commit_id);
file_node.set_name(file_name);
vnode_db.add_child(&file_node)?;
*total_written += 1;
}
_ => {
return Err(OxenError::basic_str(format!(
"r_create_dir_node found unexpected node type: {:?}",
entry.node
)));
}
}
}
}
log::debug!("Finished processing dir {path:?} total written {total_written} entries");
Ok(())
}
fn get_children(
entries: &HashMap<PathBuf, (Vec<EntryVNode>, Vec<StagedMerkleTreeNode>)>,
dir_path: impl AsRef<Path>,
) -> Result<Vec<PathBuf>, OxenError> {
let dir_path = dir_path.as_ref().to_path_buf();
let mut children = vec![];
for (path, _) in entries.iter() {
if path.starts_with(&dir_path) {
children.push(path.clone());
}
}
Ok(children)
}
fn compute_dir_node(
repo: &LocalRepository,
commit_id: MerkleHash,
entries: &HashMap<PathBuf, (Vec<EntryVNode>, Vec<StagedMerkleTreeNode>)>,
dir_hashes: &HashMap<PathBuf, MerkleHash>,
path: impl AsRef<Path>,
) -> Result<DirNode, OxenError> {
let path = path.as_ref().to_path_buf();
let mut hasher = xxhash_rust::xxh3::Xxh3::new();
hasher.update(b"dir");
hasher.update(path.to_str().unwrap().as_bytes());
let mut num_bytes = 0;
let mut num_entries = 0;
let mut data_type_counts: HashMap<String, u64> = HashMap::new();
let mut data_type_sizes: HashMap<String, u64> = HashMap::new();
let children = get_children(entries, &path)?;
log::debug!(
"Aggregating dir {path:?} for [{commit_id}] with {children:?} children num_bytes {num_bytes:?} data_type_counts {data_type_counts:?}"
);
let head_commit_maybe = repositories::commits::head_commit_maybe(repo)?;
if let Some(ref head_commit) = head_commit_maybe
&& let Ok(Some(old_dir_node)) =
repositories::tree::get_dir_without_children(repo, head_commit, &path, Some(dir_hashes))
{
let old_dir_node = old_dir_node.dir().unwrap();
num_entries = old_dir_node.num_entries();
num_bytes = old_dir_node.num_bytes();
data_type_counts = old_dir_node.data_type_counts().clone();
data_type_sizes = old_dir_node.data_type_sizes().clone();
}
for child in children.iter() {
let Some((vnodes, removed_entries)) = entries.get(child) else {
let err_msg = format!("compute_dir_node No entries found for directory {path:?}");
return Err(OxenError::basic_str(err_msg));
};
for vnode in vnodes.iter() {
hasher.update(&vnode.id.to_le_bytes());
for entry in vnode.entries.iter() {
match &entry.node.node {
EMerkleTreeNode::Directory(dir_node) => {
if path == *child {
num_entries += 1;
}
hasher.update(dir_node.name().as_bytes());
hasher.update(&dir_node.hash().to_le_bytes());
}
EMerkleTreeNode::File(file_node) => {
hasher.update(file_node.name().as_bytes());
hasher.update(&file_node.combined_hash().to_le_bytes());
match entry.status {
StagedEntryStatus::Added => {
num_bytes += file_node.num_bytes();
if path == *child {
num_entries += 1;
}
*data_type_counts
.entry(file_node.data_type().to_string())
.or_insert(0) += 1;
*data_type_sizes
.entry(file_node.data_type().to_string())
.or_insert(0) += file_node.num_bytes();
}
StagedEntryStatus::Modified => {
if let Some(head) = &head_commit_maybe
&& let Some(old_file) = repositories::tree::get_file_by_path(
repo,
head,
path.join(file_node.name()),
)?
{
let delta =
file_node.num_bytes() as i64 - old_file.num_bytes() as i64;
num_bytes = (num_bytes as i64 + delta) as u64;
let size_entry = data_type_sizes
.entry(file_node.data_type().to_string())
.or_insert(0);
*size_entry = (*size_entry as i64 + delta) as u64;
}
}
_ => {
}
}
}
_ => {
return Err(OxenError::basic_str(format!(
"compute_dir_node found unexpected node type: {:?}",
entry.node
)));
}
}
}
}
for entry in removed_entries.iter() {
match &entry.node.node {
EMerkleTreeNode::Directory(_) => {
}
EMerkleTreeNode::File(file_node) => {
if entry.status == StagedEntryStatus::Removed {
if path == *child {
num_entries -= 1;
}
num_bytes = num_bytes.saturating_sub(file_node.num_bytes());
if let Some(count) =
data_type_counts.get_mut(&file_node.data_type().to_string())
{
*count = count.saturating_sub(1);
}
if let Some(size) =
data_type_sizes.get_mut(&file_node.data_type().to_string())
{
*size = size.saturating_sub(file_node.num_bytes());
}
}
}
_ => {
return Err(OxenError::basic_str(format!(
"compute_dir_node found unexpected node type: {:?}",
entry.node
)));
}
}
}
}
let hash = MerkleHash::new(hasher.digest128());
let file_name = path.file_name().unwrap_or_default().to_str().unwrap();
log::debug!(
"Aggregated dir {path:?} [{hash}] num_bytes {num_bytes:?} num_entries {num_entries:?} data_type_counts {data_type_counts:?}"
);
let node = DirNode::new(
repo,
DirNodeOpts {
name: file_name.to_owned(),
hash,
num_bytes,
num_entries,
last_commit_id: commit_id,
last_modified_seconds: 0,
last_modified_nanoseconds: 0,
data_type_counts,
data_type_sizes,
},
)?;
Ok(node)
}
fn create_merge_commit(
repo: &LocalRepository,
message: &str,
timestamp: OffsetDateTime,
new_commit: &NewCommitBody,
) -> Result<NewCommit, OxenError> {
let hidden_dir = util::fs::oxen_hidden_dir(&repo.path);
let merge_head_path = hidden_dir.join(MERGE_HEAD_FILE);
let orig_head_path = hidden_dir.join(ORIG_HEAD_FILE);
let merge_commit_id = util::fs::read_from_path(&merge_head_path)?;
let head_commit_id = util::fs::read_from_path(&orig_head_path)?;
util::fs::remove_file(merge_head_path)?;
util::fs::remove_file(orig_head_path)?;
Ok(NewCommit {
parent_ids: vec![merge_commit_id, head_commit_id],
message: String::from(message),
author: new_commit.author.clone(),
email: new_commit.email.clone(),
timestamp,
})
}
fn is_merge_commit(repo: &LocalRepository) -> bool {
let hidden_dir = util::fs::oxen_hidden_dir(&repo.path);
let merge_head_path = hidden_dir.join(MERGE_HEAD_FILE);
merge_head_path.exists()
}
fn create_commit_data(
repo: &LocalRepository,
message: &str,
timestamp: OffsetDateTime,
parent_commits: Vec<String>,
new_commit: &NewCommitBody,
) -> Result<NewCommit, OxenError> {
if is_merge_commit(repo) {
create_merge_commit(repo, message, timestamp, new_commit)
} else {
Ok(NewCommit {
parent_ids: parent_commits,
message: message.to_string(),
author: new_commit.author.clone(),
email: new_commit.email.clone(),
timestamp,
})
}
}
#[cfg(test)]
mod tests {
use crate::test;
use std::collections::HashSet;
use std::path::Path;
use crate::core::v_latest::index::CommitMerkleTree;
use crate::error::OxenError;
use crate::model::MerkleHash;
use crate::opts::RmOpts;
use crate::repositories;
use crate::util;
use test::add_n_files_m_dirs;
#[tokio::test]
async fn test_first_commit() -> Result<(), OxenError> {
test::run_empty_dir_test_async(|dir| async move {
let repo = repositories::init::init(dir)?;
add_n_files_m_dirs(&repo, 10, 2).await?;
let status = repositories::status(&repo)?;
status.print();
let commit = super::commit(&repo, "First commit")?;
let tree = CommitMerkleTree::from_commit(&repo, &commit)?;
tree.print();
let vnodes = tree.total_vnodes();
assert_eq!(vnodes, 4);
let root = &tree.root;
let commit = root.commit();
assert!(commit.is_ok());
let root_commit_children = &root.children;
assert_eq!(root_commit_children.len(), 1);
let dir_node_data = root_commit_children.iter().next().unwrap();
let dir_node = dir_node_data.dir();
assert!(dir_node.is_ok());
assert_eq!(dir_node.unwrap().name(), "");
let vnode_data = dir_node_data.children.first().unwrap();
let vnode = vnode_data.vnode();
assert!(vnode.is_ok());
let vnode_children = &vnode_data.children;
assert_eq!(vnode_children.len(), 3);
let has_paths_csv = tree.has_path(Path::new("files.csv"))?;
assert!(has_paths_csv);
let has_readme = tree.has_path(Path::new("README.md"))?;
assert!(has_readme);
let has_path0 = tree.has_path(Path::new("files/dir_0/file0.txt"))?;
assert!(has_path0);
Ok(())
})
.await
}
#[tokio::test]
async fn test_commit_only_dirs_at_top_level() -> Result<(), OxenError> {
test::run_empty_dir_test_async(async |dir| {
let repo = repositories::init::init(dir)?;
let new_file = repo.path.join("all_files/dir_0/new_file.txt");
util::fs::create_dir_all(new_file.parent().unwrap())?;
util::fs::write_to_path(&new_file, "New file")?;
repositories::add(&repo, &repo.path).await?;
let status = repositories::status(&repo)?;
status.print();
let commit = super::commit(&repo, "First commit")?;
let tree = CommitMerkleTree::from_commit(&repo, &commit)?;
tree.print();
let has_path0 = tree.has_path(Path::new("all_files/dir_0/new_file.txt"))?;
assert!(has_path0);
Ok(())
})
.await
}
#[tokio::test]
async fn test_commit_single_file_deep_in_dir() -> Result<(), OxenError> {
test::run_empty_dir_test_async(|dir| async move {
let repo = repositories::init::init(dir)?;
let new_file = repo.path.join("files/dir_0/new_file.txt");
util::fs::create_dir_all(new_file.parent().unwrap())?;
util::fs::write_to_path(&new_file, "New file")?;
repositories::add(&repo, &new_file).await?;
let status = repositories::status(&repo)?;
status.print();
let commit = super::commit(&repo, "First commit")?;
let tree = CommitMerkleTree::from_commit(&repo, &commit)?;
tree.print();
let has_path0 = tree.has_path(Path::new("files/dir_0/new_file.txt"))?;
assert!(has_path0);
Ok(())
})
.await
}
#[tokio::test]
async fn test_2nd_commit_keeps_num_bytes_and_data_type_counts() -> Result<(), OxenError> {
test::run_empty_dir_test_async(|dir| async move {
let repo = repositories::init::init(dir)?;
add_n_files_m_dirs(&repo, 10, 3).await?;
let status = repositories::status(&repo)?;
status.print();
let first_commit = super::commit(&repo, "First commit")?;
let first_tree = CommitMerkleTree::from_commit(&repo, &first_commit)?;
first_tree.print();
let original_root_node = first_tree.get_by_path(Path::new(""))?.unwrap();
let original_root_dir = original_root_node.dir()?;
let original_root_dir_file_count = original_root_dir.num_files();
assert_eq!(original_root_dir_file_count, 12);
let new_file = repo.path.join("README.md");
util::fs::write_to_path(&new_file, "Update that README.md")?;
repositories::add(&repo, &new_file).await?;
let second_commit = super::commit(&repo, "Second commit")?;
assert!(first_commit.id != second_commit.id);
let head_commit = repositories::commits::head_commit(&repo)?;
assert_eq!(head_commit.id, second_commit.id);
let second_tree = CommitMerkleTree::from_commit(&repo, &second_commit)?;
second_tree.print();
let updated_root_dir = second_tree.get_by_path(Path::new(""))?;
let updated_root_dir = updated_root_dir.unwrap().dir()?;
let updated_root_dir_file_count = updated_root_dir.num_files();
assert_eq!(updated_root_dir_file_count, original_root_dir_file_count);
Ok(())
})
.await
}
#[tokio::test]
async fn test_second_commit() -> Result<(), OxenError> {
test::run_empty_dir_test_async(|dir| async move {
let repo = repositories::init::init(dir)?;
add_n_files_m_dirs(&repo, 10, 3).await?;
let status = repositories::status(&repo)?;
status.print();
let first_commit = super::commit(&repo, "First commit")?;
let first_tree = CommitMerkleTree::from_commit(&repo, &first_commit)?;
first_tree.print();
let original_dir_1_node = first_tree.get_by_path(Path::new("files/dir_1"))?;
let original_dir_1_node = original_dir_1_node.unwrap().dir()?;
let original_dir_1_file_count = original_dir_1_node.num_files();
let new_file = repo.path.join("files/dir_1/new_file.txt");
util::fs::write_to_path(&new_file, "New file")?;
repositories::add(&repo, &new_file).await?;
let second_commit = super::commit(&repo, "Second commit")?;
assert!(first_commit.id != second_commit.id);
let head_commit = repositories::commits::head_commit(&repo)?;
assert_eq!(head_commit.id, second_commit.id);
let second_tree = CommitMerkleTree::from_commit(&repo, &second_commit)?;
second_tree.print();
assert_eq!(second_tree.total_vnodes(), 5);
assert!(!first_tree.has_path(Path::new("files/dir_1/new_file.txt"))?);
assert!(second_tree.has_path(Path::new("files/dir_1/new_file.txt"))?);
let updated_node = second_tree.get_by_path(Path::new("files/dir_1/new_file.txt"))?;
assert!(updated_node.is_some());
let updated_file_node = updated_node.unwrap().file()?;
let updated_commit_id = updated_file_node.last_commit_id().to_string();
assert_eq!(updated_commit_id, second_commit.id);
let other_file_node = second_tree.get_by_path(Path::new("files/dir_1/file7.txt"))?;
assert!(other_file_node.is_some());
let other_file_node = other_file_node.unwrap().file()?;
let other_commit_id = other_file_node.last_commit_id().to_string();
assert_eq!(other_commit_id, first_commit.id);
let dir_node = second_tree.get_by_path(Path::new("files/dir_1"))?;
assert!(dir_node.is_some());
let dir_node = dir_node.unwrap().dir()?;
let dir_commit_id = dir_node.last_commit_id().to_string();
assert_eq!(dir_commit_id, second_commit.id);
let first_tree_dir_1 = first_tree.get_by_path(Path::new("files/dir_1"))?;
let second_tree_dir_1 = second_tree.get_by_path(Path::new("files/dir_1"))?;
assert!(first_tree_dir_1.is_some());
assert!(second_tree_dir_1.is_some());
assert!(first_tree_dir_1.unwrap().hash != second_tree_dir_1.unwrap().hash);
let first_tree_vnodes = first_tree.get_vnodes_for_dir(Path::new("files/dir_1"))?;
let second_tree_vnodes = second_tree.get_vnodes_for_dir(Path::new("files/dir_1"))?;
assert_eq!(first_tree_vnodes.len(), 1);
assert_eq!(second_tree_vnodes.len(), 1);
assert!(first_tree_vnodes[0].hash != second_tree_vnodes[0].hash);
let first_tree_dir_0 = first_tree.get_by_path(Path::new("files/dir_0"))?;
let second_tree_dir_0 = second_tree.get_by_path(Path::new("files/dir_0"))?;
assert!(first_tree_dir_0.is_some());
assert!(second_tree_dir_0.is_some());
assert_eq!(
first_tree_dir_0.unwrap().hash,
second_tree_dir_0.unwrap().hash
);
let first_tree_files = first_tree.get_by_path(Path::new("files"))?;
let second_tree_files = second_tree.get_by_path(Path::new("files"))?;
assert!(first_tree_files.is_some());
assert!(second_tree_files.is_some());
assert!(first_tree_files.unwrap().hash != second_tree_files.unwrap().hash);
let first_tree_root = first_tree.get_by_path(Path::new(""))?;
let second_tree_root = second_tree.get_by_path(Path::new(""))?;
assert!(first_tree_root.is_some());
assert!(second_tree_root.is_some());
assert!(first_tree_root.unwrap().hash != second_tree_root.unwrap().hash);
let first_tree_again = CommitMerkleTree::from_commit(&repo, &first_commit)?;
first_tree_again.print();
let dir_1_node_again = first_tree_again.get_by_path(Path::new("files/dir_1"))?;
let dir_1_node_again = dir_1_node_again.unwrap().dir()?;
let dir_1_file_count_again = dir_1_node_again.num_files();
assert_eq!(original_dir_1_file_count, dir_1_file_count_again);
Ok(())
})
.await
}
#[tokio::test]
async fn test_commit_configurable_vnode_size() -> Result<(), OxenError> {
test::run_empty_dir_test_async(|dir| async move {
let mut repo = repositories::init::init(dir)?;
repo.set_vnode_size(5);
add_n_files_m_dirs(&repo, 23, 2).await?;
let status = repositories::status(&repo)?;
status.print();
let first_commit = super::commit(&repo, "First commit")?;
let first_tree = CommitMerkleTree::from_commit(&repo, &first_commit)?;
first_tree.print();
let root_node = first_tree.get_by_path(Path::new(""))?.unwrap();
assert_eq!(root_node.num_vnodes(), 1);
let dir_0_node = first_tree.get_by_path(Path::new("files/dir_0"))?.unwrap();
assert_eq!(dir_0_node.num_vnodes(), 3);
let dir_1_node = first_tree.get_by_path(Path::new("files/dir_1"))?.unwrap();
assert_eq!(dir_1_node.num_vnodes(), 3);
for i in 0..10 {
let dir_num = i % 2;
let new_file = repo
.path
.join("files")
.join(format!("dir_{dir_num}"))
.join(format!("new_file_{i}.txt"));
util::fs::write_to_path(&new_file, format!("New fileeeee {i}"))?;
repositories::add(&repo, &new_file).await?;
}
let second_commit = super::commit(&repo, "Second commit")?;
assert!(first_commit.id != second_commit.id);
let head_commit = repositories::commits::head_commit(&repo)?;
assert_eq!(head_commit.id, second_commit.id);
let second_tree = CommitMerkleTree::from_commit(&repo, &second_commit)?;
second_tree.print();
for i in 0..10 {
let dir_num = i % 2;
let file_path = Path::new("files")
.join(format!("dir_{dir_num}"))
.join(format!("new_file_{i}.txt"));
let file_node = second_tree.get_by_path(&file_path)?;
assert!(file_node.is_some());
let file_node =
repositories::tree::get_node_by_path(&repo, &second_commit, &file_path)?;
assert!(file_node.is_some());
}
Ok(())
})
.await
}
#[tokio::test]
async fn test_commit_20_files_6_vnode_size() -> Result<(), OxenError> {
test::run_empty_dir_test_async(|dir| async move {
let mut repo = repositories::init::init(dir)?;
repo.set_vnode_size(6);
add_n_files_m_dirs(&repo, 20, 1).await?;
let status = repositories::status(&repo)?;
status.print();
let first_commit = super::commit(&repo, "First commit")?;
let first_tree = CommitMerkleTree::from_commit(&repo, &first_commit)?;
first_tree.print();
let root_node = first_tree.get_by_path(Path::new(""))?.unwrap();
assert_eq!(root_node.num_vnodes(), 1);
let dir_0_node = first_tree.get_by_path(Path::new("files/dir_0"))?.unwrap();
assert_eq!(dir_0_node.num_vnodes(), 4);
let new_file = repo.path.join("files/dir_0/new_file.txt");
util::fs::write_to_path(&new_file, "New file")?;
repositories::add(&repo, &new_file).await?;
let second_commit = super::commit(&repo, "Second commit")?;
assert!(first_commit.id != second_commit.id);
let head_commit = repositories::commits::head_commit(&repo)?;
assert_eq!(head_commit.id, second_commit.id);
let second_tree = CommitMerkleTree::from_commit(&repo, &second_commit)?;
second_tree.print();
let second_dir_0_node = second_tree.get_by_path(Path::new("files/dir_0"))?.unwrap();
assert_eq!(second_dir_0_node.num_vnodes(), 4);
let first_children_hashes: HashSet<MerkleHash> =
dir_0_node.children.iter().map(|vnode| vnode.hash).collect();
let second_children_hashes: HashSet<MerkleHash> = second_dir_0_node
.children
.iter()
.map(|vnode| vnode.hash)
.collect();
let intersection: HashSet<&MerkleHash> = second_children_hashes
.intersection(&first_children_hashes)
.collect();
assert_eq!(intersection.len(), 3);
Ok(())
})
.await
}
#[tokio::test]
async fn test_third_commit() -> Result<(), OxenError> {
test::run_empty_dir_test_async(|dir| async move {
let repo = repositories::init::init(dir)?;
add_n_files_m_dirs(&repo, 10, 3).await?;
let status = repositories::status(&repo)?;
status.print();
let first_commit = super::commit(&repo, "First commit")?;
let first_tree = CommitMerkleTree::from_commit(&repo, &first_commit)?;
first_tree.print();
let original_readme_node = first_tree.get_by_path(Path::new("README.md"))?;
assert!(original_readme_node.is_some());
let original_readme_node = original_readme_node.unwrap();
let original_readme_hash = original_readme_node.hash;
let new_file = repo.path.join("README.md");
util::fs::write_to_path(&new_file, "Update README.md in second commit")?;
repositories::add(&repo, &new_file).await?;
let second_commit = super::commit(&repo, "Second commit")?;
assert!(first_commit.id != second_commit.id);
let head_commit = repositories::commits::head_commit(&repo)?;
assert_eq!(head_commit.id, second_commit.id);
let second_tree = CommitMerkleTree::from_commit(&repo, &second_commit)?;
second_tree.print();
let updated_readme_node = second_tree.get_by_path(Path::new("README.md"))?;
assert!(updated_readme_node.is_some());
let updated_readme_node = updated_readme_node.unwrap();
let updated_readme_hash = updated_readme_node.hash;
assert!(original_readme_hash != updated_readme_hash);
let new_file = repo.path.join("files/dir_1/new_file.txt");
util::fs::write_to_path(&new_file, "New file")?;
repositories::add(&repo, &new_file).await?;
let third_commit = super::commit(&repo, "Third commit")?;
let third_tree = CommitMerkleTree::from_commit(&repo, &third_commit)?;
third_tree.print();
let head_commit = repositories::commits::head_commit(&repo)?;
assert_eq!(head_commit.id, third_commit.id);
assert!(third_commit.id != second_commit.id);
assert!(third_commit.id != first_commit.id);
let dir_hashes = CommitMerkleTree::dir_hashes(&repo, &third_commit)?;
for (path, hash) in dir_hashes {
println!("dir_hash: {path:?} {hash}");
let node = third_tree.get_by_path(&path)?.unwrap();
assert_eq!(node.hash, hash);
}
Ok(())
})
.await
}
#[tokio::test]
async fn test_rm_dir_doesnt_break_tree() -> Result<(), OxenError> {
test::run_training_data_repo_test_no_commits_async(async |repo| {
let readme = repo.path.join("README.md");
repositories::add(&repo, &readme).await?;
let first_commit = super::commit(&repo, "Initial commit")?;
let first_tree = CommitMerkleTree::from_commit(&repo, &first_commit)?;
let first_root_dir_node = first_tree.get_by_path(Path::new(""))?.unwrap();
repositories::add(&repo, repo.path.join("train")).await?;
repositories::add(&repo, repo.path.join("test")).await?;
let second_commit = super::commit(&repo, "Adding the data")?;
let second_tree = CommitMerkleTree::from_commit(&repo, &second_commit)?;
let second_root_dir_node = second_tree.get_by_path(Path::new(""))?.unwrap();
assert_ne!(first_root_dir_node.hash, second_root_dir_node.hash);
let rm_opts = RmOpts::from_path_recursive(Path::new("train"));
repositories::rm(&repo, &rm_opts)?;
let third_commit = super::commit(&repo, "Removing train dir")?;
let third_tree = CommitMerkleTree::from_commit(&repo, &third_commit)?;
let third_root_dir_node = third_tree.get_by_path(Path::new(""))?.unwrap();
assert_ne!(first_root_dir_node.hash, third_root_dir_node.hash);
let first_tree_2 = CommitMerkleTree::from_commit(&repo, &first_commit)?;
let first_root_dir_node_2 = first_tree_2.get_by_path(Path::new(""))?.unwrap();
assert_eq!(first_root_dir_node.hash, first_root_dir_node_2.hash);
assert_ne!(first_root_dir_node_2.hash, third_root_dir_node.hash);
Ok(())
})
.await
}
}