use std::collections::BTreeMap;
use sha1::{Digest, Sha1};
use crate::error::{Error, Result};
use super::model::{Key, ParsedTree, Tombstone, TreeValue};
use crate::git_utils;
use crate::tree_paths;
use crate::types::{
decode_key_path_segments, decode_path_target_segments, TargetType, LIST_VALUE_DIR,
PATH_TARGET_SEPARATOR, SET_VALUE_DIR, STRING_VALUE_BLOB, TOMBSTONE_BLOB, TOMBSTONE_ROOT,
};
#[derive(Default)]
pub struct TreeDir {
pub files: BTreeMap<String, Vec<u8>>,
pub dirs: BTreeMap<String, TreeDir>,
}
pub fn insert_path(dir: &mut TreeDir, parts: &[&str], content: Vec<u8>) {
if parts.len() == 1 {
dir.files.insert(parts[0].to_string(), content);
} else {
let child = dir.dirs.entry(parts[0].to_string()).or_default();
insert_path(child, &parts[1..], content);
}
}
pub fn build_dir(repo: &gix::Repository, dir: &TreeDir) -> Result<gix::ObjectId> {
let mut editor = repo
.empty_tree()
.edit()
.map_err(|e| Error::Other(format!("{e}")))?;
for (name, content) in &dir.files {
let blob_id = repo
.write_blob(content)
.map_err(|e| Error::Other(format!("{e}")))?
.detach();
editor
.upsert(name, gix::objs::tree::EntryKind::Blob, blob_id)
.map_err(|e| Error::Other(format!("{e}")))?;
}
for (name, child_dir) in &dir.dirs {
let child_oid = build_dir(repo, child_dir)?;
editor
.upsert(name, gix::objs::tree::EntryKind::Tree, child_oid)
.map_err(|e| Error::Other(format!("{e}")))?;
}
Ok(editor
.write()
.map_err(|e| Error::Other(format!("{e}")))?
.detach())
}
pub fn build_tree_from_paths(
repo: &gix::Repository,
files: &BTreeMap<String, Vec<u8>>,
) -> Result<gix::ObjectId> {
let mut root = TreeDir::default();
for (path, content) in files {
let parts: Vec<&str> = path.split('/').collect();
insert_path(&mut root, &parts, content.clone());
}
build_dir(repo, &root)
}
pub fn parse_tree(
repo: &gix::Repository,
tree_id: gix::ObjectId,
prefix: &str,
) -> Result<ParsedTree> {
let mut parsed = ParsedTree::default();
let mut paths: BTreeMap<String, Vec<u8>> = BTreeMap::new();
collect_blobs(repo, tree_id, prefix, &mut paths)?;
for (path, content) in &paths {
let parts: Vec<&str> = path.split('/').collect();
if parts.is_empty() {
continue;
}
let Ok((target_type, target_value, key_parts)) = parse_path_parts(&parts) else {
continue;
};
if key_parts.is_empty() {
continue;
}
if key_parts[0] == TOMBSTONE_ROOT
&& key_parts.len() >= 3
&& key_parts[key_parts.len() - 1] == TOMBSTONE_BLOB
{
let key_segments = &key_parts[1..key_parts.len() - 1];
let Ok(key) = decode_key_path_segments(key_segments) else {
continue;
};
let Some(tombstone) = parse_tombstone_blob(content) else {
continue;
};
let entry_key = Key {
target_type,
target_value,
key,
};
match parsed.tombstones.get(&entry_key) {
Some(existing) if existing.timestamp >= tombstone.timestamp => {}
_ => {
parsed.tombstones.insert(entry_key, tombstone);
}
}
continue;
}
if key_parts.len() >= 3 && key_parts[key_parts.len() - 2] == TOMBSTONE_ROOT {
let key_segments = &key_parts[..key_parts.len() - 2];
let Ok(key) = decode_key_path_segments(key_segments) else {
continue;
};
let member_id = key_parts[key_parts.len() - 1].to_string();
let content_str = String::from_utf8_lossy(content).to_string();
let entry_key = (
Key {
target_type,
target_value,
key,
},
member_id,
);
parsed.set_tombstones.insert(entry_key, content_str);
continue;
}
if key_parts.len() >= 2 && key_parts[key_parts.len() - 2] == SET_VALUE_DIR {
let key_segments = &key_parts[..key_parts.len() - 2];
let Ok(key) = decode_key_path_segments(key_segments) else {
continue;
};
let member_id = key_parts[key_parts.len() - 1].to_string();
let content_str = String::from_utf8_lossy(content).to_string();
let entry = parsed
.values
.entry(Key {
target_type,
target_value,
key,
})
.or_insert_with(|| TreeValue::Set(BTreeMap::new()));
if let TreeValue::Set(ref mut set) = entry {
set.insert(member_id, content_str);
}
continue;
}
if key_parts.len() >= 4
&& key_parts[key_parts.len() - 3] == LIST_VALUE_DIR
&& key_parts[key_parts.len() - 2] == TOMBSTONE_ROOT
{
let key_segments = &key_parts[..key_parts.len() - 3];
let Ok(key) = decode_key_path_segments(key_segments) else {
continue;
};
let entry_name = key_parts[key_parts.len() - 1].to_string();
let Some(tombstone) = parse_tombstone_blob(content) else {
continue;
};
let entry_key = (
Key {
target_type,
target_value,
key,
},
entry_name,
);
match parsed.list_tombstones.get(&entry_key) {
Some(existing) if existing.timestamp >= tombstone.timestamp => {}
_ => {
parsed.list_tombstones.insert(entry_key, tombstone);
}
}
continue;
}
if key_parts.len() >= 3
&& key_parts[key_parts.len() - 2] == LIST_VALUE_DIR
&& git_utils::is_list_entry_name(key_parts[key_parts.len() - 1])
{
let key_segments = &key_parts[..key_parts.len() - 2];
let Ok(key) = decode_key_path_segments(key_segments) else {
continue;
};
let entry_name = key_parts[key_parts.len() - 1].to_string();
let content_str = String::from_utf8_lossy(content).to_string();
let entry = parsed
.values
.entry(Key {
target_type,
target_value,
key,
})
.or_insert_with(|| TreeValue::List(Vec::new()));
if let TreeValue::List(ref mut list) = entry {
list.push((entry_name, content_str));
}
continue;
}
if key_parts.len() >= 2 && key_parts[key_parts.len() - 1] == STRING_VALUE_BLOB {
let key_segments = &key_parts[..key_parts.len() - 1];
let Ok(key) = decode_key_path_segments(key_segments) else {
continue;
};
let content_str = String::from_utf8_lossy(content).to_string();
parsed.values.insert(
Key {
target_type,
target_value,
key,
},
TreeValue::String(content_str),
);
continue;
}
}
for value in parsed.values.values_mut() {
if let TreeValue::List(ref mut list) = value {
list.sort_by(|a, b| a.0.cmp(&b.0));
}
}
parsed
.tombstones
.retain(|key, _| !parsed.values.contains_key(key));
parsed
.set_tombstones
.retain(|(key, member_id), _| match parsed.values.get(key) {
Some(TreeValue::Set(set)) => !set.contains_key(member_id),
_ => true,
});
parsed
.list_tombstones
.retain(|(key, entry_name), _| match parsed.values.get(key) {
Some(TreeValue::List(list)) => !list.iter().any(|(name, _)| name == entry_name),
_ => true,
});
Ok(parsed)
}
fn parse_tombstone_blob(content: &[u8]) -> Option<Tombstone> {
serde_json::from_slice(content).ok()
}
fn collect_blobs(
repo: &gix::Repository,
tree_id: gix::ObjectId,
prefix: &str,
paths: &mut BTreeMap<String, Vec<u8>>,
) -> Result<()> {
let tree = repo
.find_tree(tree_id)
.map_err(|e| Error::Other(format!("{e}")))?;
for entry in tree.iter() {
let entry = entry.map_err(|e| Error::Other(format!("{e}")))?;
let name = std::str::from_utf8(entry.filename())
.map_err(|_| Error::Other("non-UTF8 tree entry".into()))?;
let full_path = if prefix.is_empty() {
name.to_string()
} else {
format!("{prefix}/{name}")
};
if entry.mode().is_blob() {
let blob = repo
.find_blob(entry.object_id())
.map_err(|e| Error::Other(format!("{e}")))?;
paths.insert(full_path, blob.data.clone());
} else if entry.mode().is_tree() {
collect_blobs(repo, entry.object_id(), &full_path, paths)?;
}
}
Ok(())
}
pub fn parse_path_parts<'a>(parts: &'a [&'a str]) -> Result<(TargetType, String, &'a [&'a str])> {
if parts.is_empty() {
return Err(Error::InvalidTreePath("empty path".into()));
}
let target_type_str = parts[0];
let target_type: TargetType = target_type_str
.parse()
.map_err(|_| Error::InvalidTreePath(format!("unknown target type: {target_type_str:?}")))?;
if target_type == TargetType::Project {
return Ok((TargetType::Project, String::new(), &parts[1..]));
}
if target_type == TargetType::Path {
let separator_index = parts
.iter()
.position(|part| *part == PATH_TARGET_SEPARATOR)
.ok_or_else(|| {
Error::InvalidTreePath(format!("path target missing separator: {parts:?}"))
})?;
if separator_index < 2 || separator_index + 1 >= parts.len() {
return Err(Error::InvalidTreePath(format!(
"invalid path target layout: {parts:?}"
)));
}
let target_value = decode_path_target_segments(&parts[1..separator_index])?;
return Ok((target_type, target_value, &parts[separator_index + 1..]));
}
if parts.len() < 4 {
return Err(Error::InvalidTreePath(format!(
"path too short for sharded target: {parts:?}"
)));
}
if target_type != TargetType::Commit {
let fanout = parts[1];
let target_end_limit = parts
.iter()
.enumerate()
.skip(3)
.find_map(|(idx, part)| {
matches!(
*part,
STRING_VALUE_BLOB | LIST_VALUE_DIR | SET_VALUE_DIR | TOMBSTONE_ROOT
)
.then_some(idx)
})
.unwrap_or(parts.len());
for target_end in 3..=target_end_limit {
let target_value = parts[2..target_end].join("/");
if value_shard_prefix(&target_value) == fanout {
return Ok((target_type, target_value, &parts[target_end..]));
}
}
}
let target_value = parts[2].to_string();
Ok((target_type, target_value, &parts[3..]))
}
fn value_shard_prefix(value: &str) -> String {
let mut hasher = Sha1::new();
hasher.update(value.as_bytes());
let hash = format!("{:x}", hasher.finalize());
hash[..2].to_string()
}
pub fn build_merged_tree(
repo: &gix::Repository,
values: &BTreeMap<Key, TreeValue>,
tombstones: &BTreeMap<Key, Tombstone>,
set_tombstones: &BTreeMap<(Key, String), String>,
list_tombstones: &BTreeMap<(Key, String), Tombstone>,
) -> Result<gix::ObjectId> {
let mut files: BTreeMap<String, Vec<u8>> = BTreeMap::new();
for (k, tree_val) in values {
let target = k.to_target();
match tree_val {
TreeValue::String(s) => {
let full_path = tree_paths::tree_path(&target, &k.key)?;
files.insert(full_path, s.as_bytes().to_vec());
}
TreeValue::List(list_entries) => {
let list_dir_path = tree_paths::list_dir_path(&target, &k.key)?;
for (entry_name, content) in list_entries {
let full_path = format!("{list_dir_path}/{entry_name}");
files.insert(full_path, content.as_bytes().to_vec());
}
}
TreeValue::Set(set_members) => {
let set_dir_path = tree_paths::set_dir_path(&target, &k.key)?;
for (member_id, content) in set_members {
let full_path = format!("{set_dir_path}/{member_id}");
files.insert(full_path, content.as_bytes().to_vec());
}
}
}
}
for (k, tombstone) in tombstones {
let target = k.to_target();
let full_path = tree_paths::tombstone_path(&target, &k.key)?;
let payload = serde_json::to_vec(&Tombstone {
timestamp: tombstone.timestamp,
email: tombstone.email.clone(),
})?;
files.insert(full_path, payload);
}
for ((k, member_id), tombstone_value) in set_tombstones {
let target = k.to_target();
let full_path = tree_paths::set_member_tombstone_path(&target, &k.key, member_id)?;
files.insert(full_path, tombstone_value.as_bytes().to_vec());
}
for ((k, entry_name), tombstone) in list_tombstones {
let target = k.to_target();
let full_path = tree_paths::list_entry_tombstone_path(&target, &k.key, entry_name)?;
let payload = serde_json::to_vec(&Tombstone {
timestamp: tombstone.timestamp,
email: tombstone.email.clone(),
})?;
files.insert(full_path, payload);
}
build_tree_from_paths(repo, &files)
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn test_parse_path_parts_for_path_target() {
let parts = [
"path",
"src",
"~__generated",
"file.rs",
"__target__",
"owner",
"__value",
];
let (target_type, target_value, key_parts) = parse_path_parts(&parts).unwrap();
assert_eq!(target_type, TargetType::Path);
assert_eq!(target_value, "src/__generated/file.rs");
assert_eq!(key_parts, &["owner", "__value"]);
}
#[test]
fn test_parse_path_parts_for_branch_with_slash() {
let branch = "alex/trails-multi-pr-a57e52c3";
let fanout = value_shard_prefix(branch);
let parts = [
"branch",
fanout.as_str(),
"alex",
"trails-multi-pr-a57e52c3",
"review",
"status",
"__value",
];
let (target_type, target_value, key_parts) = parse_path_parts(&parts).unwrap();
assert_eq!(target_type, TargetType::Branch);
assert_eq!(target_value, branch);
assert_eq!(key_parts, &["review", "status", "__value"]);
}
}