use crate::config::TreeConfig;
use crate::digest::ValueDigest;
use crate::git::types::*;
use crate::git::versioned_store::GitVersionedKvStore;
use crate::node::ProllyNode;
use gix::prelude::*;
use std::collections::HashMap;
pub struct GitOperations<const N: usize> {
store: GitVersionedKvStore<N>,
}
impl<const N: usize> GitOperations<N> {
pub fn new(store: GitVersionedKvStore<N>) -> Self {
GitOperations { store }
}
pub fn merge(&mut self, other_branch: &str) -> Result<MergeResult, GitKvError> {
let current_branch = self.store.current_branch();
let current_commit = self.get_branch_commit(current_branch)?;
let other_commit = self.get_branch_commit(other_branch)?;
if current_commit == other_commit {
return Ok(MergeResult::FastForward(current_commit));
}
if self.is_fast_forward_possible(¤t_commit, &other_commit)? {
self.store.checkout(other_branch)?;
return Ok(MergeResult::FastForward(other_commit));
}
let conflicts = vec![crate::git::types::KvConflict {
key: b"<merge>".to_vec(),
base_value: None,
our_value: Some(b"Cannot automatically merge - guide merge required".to_vec()),
their_value: Some(b"Use 'git merge' or resolve conflicts manually".to_vec()),
}];
Ok(MergeResult::Conflict(conflicts))
}
fn is_fast_forward_possible(
&self,
current_commit: &gix::ObjectId,
other_commit: &gix::ObjectId,
) -> Result<bool, GitKvError> {
self.is_ancestor(current_commit, other_commit)
}
fn is_ancestor(
&self,
ancestor: &gix::ObjectId,
descendant: &gix::ObjectId,
) -> Result<bool, GitKvError> {
if ancestor == descendant {
return Ok(true);
}
let mut visited = std::collections::HashSet::new();
let mut queue = std::collections::VecDeque::new();
queue.push_back(*descendant);
while let Some(current_commit) = queue.pop_front() {
if visited.contains(¤t_commit) {
continue;
}
visited.insert(current_commit);
if current_commit == *ancestor {
return Ok(true);
}
let mut buffer = Vec::new();
if let Ok(commit_obj) = self
.store
.git_repo()
.objects
.find(¤t_commit, &mut buffer)
{
if let Ok(gix::objs::ObjectRef::Commit(commit)) = commit_obj.decode() {
for parent_id in commit.parents() {
if !visited.contains(&parent_id) {
queue.push_back(parent_id);
}
}
}
}
}
Ok(false)
}
pub fn diff(&self, from: &str, to: &str) -> Result<Vec<KvDiff>, GitKvError> {
let from_commit_id = self.parse_commit_id(from)?;
let to_commit_id = self.parse_commit_id(to)?;
let from_state = self.get_kv_state_at_commit(&from_commit_id)?;
let to_state = self.get_kv_state_at_commit(&to_commit_id)?;
let mut diffs = Vec::new();
let mut all_keys = std::collections::HashSet::new();
for key in from_state.keys() {
all_keys.insert(key.clone());
}
for key in to_state.keys() {
all_keys.insert(key.clone());
}
for key in all_keys {
let from_value = from_state.get(&key);
let to_value = to_state.get(&key);
let operation = match (from_value, to_value) {
(None, Some(value)) => DiffOperation::Added(value.clone()),
(Some(value), None) => DiffOperation::Removed(value.clone()),
(Some(old), Some(new)) => {
if old != new {
DiffOperation::Modified {
old: old.clone(),
new: new.clone(),
}
} else {
continue; }
}
(None, None) => continue, };
diffs.push(KvDiff { key, operation });
}
Ok(diffs)
}
pub fn show(&self, commit: &str) -> Result<CommitDetails, GitKvError> {
let commit_id = self.parse_commit_id(commit)?;
let mut buffer = Vec::new();
let commit_obj = self
.store
.git_repo()
.objects
.find(&commit_id, &mut buffer)
.map_err(|e| GitKvError::GitObjectError(format!("Commit not found: {e}")))?;
let commit = match commit_obj.decode() {
Ok(gix::objs::ObjectRef::Commit(commit)) => commit,
_ => {
return Err(GitKvError::GitObjectError(
"Object is not a commit".to_string(),
))
}
};
let info = CommitInfo {
id: commit_id,
author: commit.author().name.to_string(),
committer: commit.committer().name.to_string(),
message: commit.message().title.to_string(),
timestamp: commit.time().seconds,
};
let parent_ids: Vec<gix::ObjectId> = commit.parents().collect();
let changes = if let Some(parent_id) = parent_ids.first() {
self.diff(&parent_id.to_string(), &commit_id.to_string())?
} else {
let state = self.get_kv_state_at_commit(&commit_id)?;
state
.iter()
.map(|(key, value)| KvDiff {
key: key.clone(),
operation: DiffOperation::Added(value.clone()),
})
.collect()
};
Ok(CommitDetails {
info,
changes,
parent_ids,
})
}
pub fn revert(&mut self, commit: &str) -> Result<(), GitKvError> {
let _commit_id = self.parse_commit_id(commit)?;
let details = self.show(commit)?;
for diff in details.changes {
match diff.operation {
DiffOperation::Added(_) => {
self.store.delete(&diff.key)?;
}
DiffOperation::Removed(value) => {
self.store.insert(diff.key, value)?;
}
DiffOperation::Modified { old, new: _ } => {
self.store.insert(diff.key, old)?;
}
}
}
let message = format!("Revert \"{}\"", details.info.message);
self.store.commit(&message)?;
Ok(())
}
fn get_branch_commit(&self, branch: &str) -> Result<gix::ObjectId, GitKvError> {
let branch_ref = if branch.starts_with("refs/") {
branch.to_string()
} else {
format!("refs/heads/{branch}")
};
match self.store.git_repo().refs.find(&branch_ref) {
Ok(reference) => {
match reference.target.try_id() {
Some(commit_id) => Ok(commit_id.to_owned()),
None => Err(GitKvError::GitObjectError(format!(
"Branch {branch} does not point to a commit"
))),
}
}
Err(_) => {
match self.store.git_repo().rev_parse_single(branch) {
Ok(object) => Ok(object.into()),
Err(e) => Err(GitKvError::GitObjectError(format!(
"Cannot resolve branch/commit {branch}: {e}"
))),
}
}
}
}
fn get_kv_state_at_commit(
&self,
commit_id: &gix::ObjectId,
) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let current_head = self
.store
.git_repo()
.head_id()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to get HEAD: {e}")))?;
if *commit_id == current_head {
return self.get_current_kv_state();
}
self.reconstruct_kv_state_from_commit(commit_id)
}
fn reconstruct_kv_state_from_commit(
&self,
commit_id: &gix::ObjectId,
) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
self.reconstruct_state_from_git_objects(commit_id)
}
fn reconstruct_state_from_git_objects(
&self,
commit_id: &gix::ObjectId,
) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let current_dir = std::env::current_dir()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to get current dir: {e}")))?;
let git_root =
self.store.git_repo().work_dir().ok_or_else(|| {
GitKvError::GitObjectError("Not in a working directory".to_string())
})?;
let relative_path = current_dir.strip_prefix(git_root).map_err(|_| {
GitKvError::GitObjectError("Current directory not within git repository".to_string())
})?;
let dataset_name = relative_path.to_string_lossy();
let config_paths = vec![
format!("{}/prolly_config_tree_config", dataset_name),
"prolly_config_tree_config".to_string(),
];
let mapping_paths = vec![
format!("{}/prolly_hash_mappings", dataset_name),
"prolly_hash_mappings".to_string(),
];
let mut tree_config = None;
for path in &config_paths {
if let Ok(config) = self.read_prolly_config_from_commit(commit_id, path) {
tree_config = Some(config);
break;
}
}
let tree_config = tree_config.ok_or_else(|| {
GitKvError::GitObjectError(
"Could not find prolly_config_tree_config in commit".to_string(),
)
})?;
let mut hash_mappings = None;
for path in &mapping_paths {
if let Ok(mappings) = self.read_hash_mappings_from_commit(commit_id, path) {
hash_mappings = Some(mappings);
break;
}
}
let hash_mappings = hash_mappings.ok_or_else(|| {
GitKvError::GitObjectError("Could not find prolly_hash_mappings in commit".to_string())
})?;
let root_hash = tree_config.root_hash.ok_or_else(|| {
GitKvError::GitObjectError("Tree config has no root hash".to_string())
})?;
self.collect_keys_from_root_hash(&root_hash, &hash_mappings)
}
fn read_prolly_config_from_commit(
&self,
commit_id: &gix::ObjectId,
file_path: &str,
) -> Result<TreeConfig<N>, GitKvError> {
let file_content = self.read_file_from_git_commit(commit_id, file_path)?;
let config: TreeConfig<N> = serde_json::from_slice(&file_content)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to parse tree config: {e}")))?;
Ok(config)
}
fn read_hash_mappings_from_commit(
&self,
commit_id: &gix::ObjectId,
file_path: &str,
) -> Result<HashMap<ValueDigest<N>, gix::ObjectId>, GitKvError> {
let file_content = self.read_file_from_git_commit(commit_id, file_path)?;
let content = String::from_utf8_lossy(&file_content);
let mut mappings = HashMap::new();
for line in content.lines() {
if let Some((hash_str, object_id_str)) = line.split_once(':') {
match self.decode_hex(hash_str) {
Ok(hash_bytes) => {
if hash_bytes.len() == N {
let prolly_hash = ValueDigest::raw_hash(&hash_bytes);
match gix::ObjectId::from_hex(object_id_str.as_bytes()) {
Ok(git_object_id) => {
mappings.insert(prolly_hash, git_object_id);
}
Err(_e) => {
}
}
} else {
}
}
Err(_e) => {
}
}
}
}
Ok(mappings)
}
fn read_file_from_git_commit(
&self,
commit_id: &gix::ObjectId,
file_path: &str,
) -> Result<Vec<u8>, GitKvError> {
let mut buffer = Vec::new();
let commit = self
.store
.git_repo()
.objects
.find(commit_id, &mut buffer)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to find commit: {e}")))?;
let commit_ref = commit
.decode()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to decode commit: {e}")))?
.into_commit()
.ok_or_else(|| GitKvError::GitObjectError("Object is not a commit".to_string()))?;
let tree_id = commit_ref.tree();
let path_parts: Vec<&str> = file_path.split('/').collect();
self.find_file_in_tree(&tree_id, &path_parts, 0)
}
fn find_file_in_tree(
&self,
tree_id: &gix::ObjectId,
path_parts: &[&str],
depth: usize,
) -> Result<Vec<u8>, GitKvError> {
if depth >= path_parts.len() {
return Err(GitKvError::GitObjectError(
"Path traversal error".to_string(),
));
}
let current_part = path_parts[depth];
let is_final = depth == path_parts.len() - 1;
let mut tree_buffer = Vec::new();
let tree = self
.store
.git_repo()
.objects
.find(tree_id, &mut tree_buffer)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to find tree: {e}")))?;
let tree_ref = tree
.decode()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to decode tree: {e}")))?
.into_tree()
.ok_or_else(|| GitKvError::GitObjectError("Object is not a tree".to_string()))?;
for entry in tree_ref.entries {
if entry.filename == current_part.as_bytes() {
if is_final {
if entry.mode.is_blob() {
let mut blob_buffer = Vec::new();
let blob_oid = gix::ObjectId::from(entry.oid);
let blob = self
.store
.git_repo()
.objects
.find(&blob_oid, &mut blob_buffer)
.map_err(|e| {
GitKvError::GitObjectError(format!("Failed to find blob: {e}"))
})?;
let blob_ref = blob
.decode()
.map_err(|e| {
GitKvError::GitObjectError(format!("Failed to decode blob: {e}"))
})?
.into_blob()
.ok_or_else(|| {
GitKvError::GitObjectError("Object is not a blob".to_string())
})?;
return Ok(blob_ref.data.to_vec());
} else {
return Err(GitKvError::GitObjectError(format!(
"Expected file but found directory: {current_part}"
)));
}
} else {
if entry.mode.is_tree() {
let tree_oid = gix::ObjectId::from(entry.oid);
return self.find_file_in_tree(&tree_oid, path_parts, depth + 1);
} else {
return Err(GitKvError::GitObjectError(format!(
"Expected directory but found file: {current_part}"
)));
}
}
}
}
Err(GitKvError::GitObjectError(format!(
"Path component '{}' not found in tree (depth: {}, full path: {})",
current_part,
depth,
path_parts.join("/")
)))
}
fn collect_keys_from_root_hash(
&self,
root_hash: &ValueDigest<N>,
hash_mappings: &HashMap<ValueDigest<N>, gix::ObjectId>,
) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let root_git_id = hash_mappings.get(root_hash).ok_or_else(|| {
GitKvError::GitObjectError("Root hash not found in mappings".to_string())
})?;
let mut buffer = Vec::new();
let root_blob = self
.store
.git_repo()
.objects
.find(root_git_id, &mut buffer)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to find root node: {e}")))?;
let blob_ref = root_blob
.decode()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to decode root node: {e}")))?
.into_blob()
.ok_or_else(|| GitKvError::GitObjectError("Root object is not a blob".to_string()))?;
let root_node: ProllyNode<N> = bincode::deserialize(blob_ref.data).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to deserialize root node: {e}"))
})?;
let mut result = HashMap::new();
self.collect_keys_from_node(&root_node, hash_mappings, &mut result)?;
Ok(result)
}
fn collect_keys_from_node(
&self,
node: &ProllyNode<N>,
hash_mappings: &HashMap<ValueDigest<N>, gix::ObjectId>,
result: &mut HashMap<Vec<u8>, Vec<u8>>,
) -> Result<(), GitKvError> {
if node.is_leaf {
for (i, key) in node.keys.iter().enumerate() {
if let Some(value) = node.values.get(i) {
result.insert(key.clone(), value.clone());
}
}
} else {
for value in &node.values {
let child_hash = ValueDigest::raw_hash(value);
if let Some(child_git_id) = hash_mappings.get(&child_hash) {
let mut buffer = Vec::new();
let child_blob = self
.store
.git_repo()
.objects
.find(child_git_id, &mut buffer)
.map_err(|e| {
GitKvError::GitObjectError(format!("Failed to find child node: {e}"))
})?;
let blob_ref = child_blob
.decode()
.map_err(|e| {
GitKvError::GitObjectError(format!("Failed to decode child node: {e}"))
})?
.into_blob()
.ok_or_else(|| {
GitKvError::GitObjectError("Child object is not a blob".to_string())
})?;
let child_node: ProllyNode<N> =
bincode::deserialize(blob_ref.data).map_err(|e| {
GitKvError::GitObjectError(format!(
"Failed to deserialize child node: {e}"
))
})?;
self.collect_keys_from_node(&child_node, hash_mappings, result)?;
}
}
}
Ok(())
}
fn decode_hex(&self, hex_str: &str) -> Result<Vec<u8>, GitKvError> {
if !hex_str.len().is_multiple_of(2) {
return Err(GitKvError::GitObjectError(
"Invalid hex string length".to_string(),
));
}
let mut bytes = Vec::with_capacity(hex_str.len() / 2);
for chunk in hex_str.as_bytes().chunks(2) {
let hex_byte = std::str::from_utf8(chunk)
.map_err(|_| GitKvError::GitObjectError("Invalid hex characters".to_string()))?;
let byte = u8::from_str_radix(hex_byte, 16)
.map_err(|_| GitKvError::GitObjectError("Invalid hex digit".to_string()))?;
bytes.push(byte);
}
Ok(bytes)
}
fn get_current_kv_state(&self) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
self.get_current_kv_state_from_store(&self.store)
}
fn get_current_kv_state_from_store(
&self,
store: &GitVersionedKvStore<N>,
) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let mut state = HashMap::new();
let keys = store.list_keys();
for key in keys {
if let Some(value) = store.get(&key) {
state.insert(key, value);
}
}
Ok(state)
}
fn parse_commit_id(&self, commit: &str) -> Result<gix::ObjectId, GitKvError> {
match self.store.git_repo().rev_parse_single(commit) {
Ok(object) => Ok(object.into()),
Err(e) => Err(GitKvError::GitObjectError(format!(
"Cannot resolve commit {commit}: {e}"
))),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_git_operations_creation() {
let temp_dir = TempDir::new().unwrap();
gix::init(temp_dir.path()).unwrap();
let dataset_dir = temp_dir.path().join("dataset");
std::fs::create_dir_all(&dataset_dir).unwrap();
let store = GitVersionedKvStore::<32>::init(&dataset_dir).unwrap();
let _ops = GitOperations::new(store);
}
#[test]
fn test_parse_commit_id() {
let temp_dir = TempDir::new().unwrap();
gix::init(temp_dir.path()).unwrap();
let dataset_dir = temp_dir.path().join("dataset");
std::fs::create_dir_all(&dataset_dir).unwrap();
let store = GitVersionedKvStore::<32>::init(&dataset_dir).unwrap();
let ops = GitOperations::new(store);
let head_id = ops.parse_commit_id("HEAD");
assert!(head_id.is_ok());
}
}