use super::{
HistoricalAccess, HistoricalCommitAccess, TreeConfigSaver, VersionedKvStore,
DEFAULT_TREE_CONFIG_FILENAME,
};
use crate::config::TreeConfig;
use crate::diff::{ConflictResolver, IgnoreConflictsResolver};
use crate::digest::ValueDigest;
use crate::git::metadata::{GitMetadataBackend, MetadataBackend};
use crate::git::types::*;
use crate::storage::{FileNodeStorage, GitNodeStorage, InMemoryNodeStorage};
use crate::tree::{ProllyTree, Tree};
use gix::prelude::*;
use std::collections::HashMap;
use std::path::Path;
#[cfg(feature = "rocksdb_storage")]
use crate::storage::RocksDBNodeStorage;
impl<const N: usize> TreeConfigSaver<N>
for VersionedKvStore<N, GitNodeStorage<N>, GitMetadataBackend>
{
fn save_tree_config_to_git_internal(&self) -> Result<(), GitKvError> {
self.save_tree_config_to_git()
}
}
impl<const N: usize> VersionedKvStore<N, GitNodeStorage<N>, GitMetadataBackend> {
pub fn git_repo(&self) -> &gix::Repository {
self.metadata.repo()
}
fn save_tree_config_to_git(&self) -> Result<(), GitKvError> {
let config = self.tree.config.clone();
let config_json = serde_json::to_string_pretty(&config)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to serialize config: {e}")))?;
let config_path = self.tree.storage.dataset_dir().join(&self.config_filename);
std::fs::write(&config_path, config_json)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to write config file: {e}")))?;
let mappings = self.tree.storage.get_hash_mappings();
let mut entries: Vec<(String, String)> = mappings
.iter()
.map(|(hash, object_id)| {
let hash_hex: String = hash.as_bytes().iter().map(|b| format!("{b:02x}")).collect();
(hash_hex, object_id.to_hex().to_string())
})
.collect();
entries.sort();
let mut mappings_content = String::new();
for (hash_hex, object_hex) in &entries {
mappings_content.push_str(&format!("{hash_hex}:{object_hex}\n"));
}
let mappings_path = self.tree.storage.dataset_dir().join("prolly_hash_mappings");
std::fs::write(&mappings_path, mappings_content).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to write mappings file: {e}"))
})?;
Ok(())
}
pub fn checkout(&mut self, branch_or_commit: &str) -> Result<(), GitKvError> {
self.staging_area.clear();
self.save_staging_area()?;
let target_ref = if branch_or_commit.starts_with("refs/") {
branch_or_commit.to_string()
} else {
format!("refs/heads/{branch_or_commit}")
};
match self.metadata.repo().refs.find(&target_ref) {
Ok(_reference) => {
self.current_branch = branch_or_commit.to_string();
let head_file = self.metadata.repo().path().join("HEAD");
let head_content = format!("ref: {target_ref}\n");
std::fs::write(&head_file, head_content).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to update HEAD: {e}"))
})?;
}
Err(_) => {
return Err(GitKvError::BranchNotFound(branch_or_commit.to_string()));
}
}
self.sync_working_tree_to_head()?;
self.reload_tree_from_head()?;
Ok(())
}
pub fn merge<R: ConflictResolver>(
&mut self,
source_branch: &str,
resolver: &R,
) -> Result<gix::ObjectId, GitKvError> {
let dest_branch = self.current_branch.clone();
let base_commit = self.find_merge_base(&dest_branch, source_branch)?;
let base_kv = self.collect_keys_at_commit(&base_commit)?;
let source_kv = self.collect_keys_at_commit(&self.get_branch_commit(source_branch)?)?;
let mut dest_kv = HashMap::new();
for key in self.tree.collect_keys() {
if let Some(value) = self.get(&key) {
dest_kv.insert(key, value);
}
}
let mut merge_results = Vec::new();
let mut all_keys = std::collections::HashSet::new();
all_keys.extend(base_kv.keys().cloned());
all_keys.extend(source_kv.keys().cloned());
all_keys.extend(dest_kv.keys().cloned());
for key in all_keys {
let base_value = base_kv.get(&key);
let source_value = source_kv.get(&key);
let dest_value = dest_kv.get(&key);
match (base_value, source_value, dest_value) {
(Some(base), Some(source), Some(dest)) => {
if base == source && base == dest {
continue;
} else if base == dest && base != source {
merge_results.push(crate::diff::MergeResult::Modified(key, source.clone()));
} else if base == source && base != dest {
continue;
} else if source == dest {
continue;
} else {
let conflict = crate::diff::MergeConflict {
key: key.clone(),
base_value: Some(base.clone()),
source_value: Some(source.clone()),
destination_value: Some(dest.clone()),
};
merge_results.push(crate::diff::MergeResult::Conflict(conflict));
}
}
(None, Some(source), None) => {
merge_results.push(crate::diff::MergeResult::Added(key, source.clone()));
}
(None, None, Some(_dest)) => {
continue;
}
(None, Some(source), Some(dest)) => {
if source == dest {
continue;
} else {
let conflict = crate::diff::MergeConflict {
key: key.clone(),
base_value: None,
source_value: Some(source.clone()),
destination_value: Some(dest.clone()),
};
merge_results.push(crate::diff::MergeResult::Conflict(conflict));
}
}
(Some(_base), None, Some(_dest)) => {
merge_results.push(crate::diff::MergeResult::Removed(key));
}
(Some(_base), Some(_source), None) => {
continue;
}
(Some(_base), None, None) => {
continue;
}
_ => continue,
}
}
let mut resolved_results = Vec::new();
let mut unresolved_conflicts = Vec::new();
for result in merge_results {
match result {
crate::diff::MergeResult::Conflict(conflict) => {
if let Some(resolved_result) = resolver.resolve_conflict(&conflict) {
resolved_results.push(resolved_result);
} else {
unresolved_conflicts.push(conflict);
}
}
other => resolved_results.push(other),
}
}
if !unresolved_conflicts.is_empty() {
return Err(GitKvError::MergeConflictError(unresolved_conflicts));
}
for result in resolved_results {
match result {
crate::diff::MergeResult::Added(key, value) => {
self.tree.insert(key, value);
}
crate::diff::MergeResult::Modified(key, value) => {
self.tree.insert(key, value); }
crate::diff::MergeResult::Removed(key) => {
self.tree.delete(&key);
}
crate::diff::MergeResult::Conflict(_) => {
unreachable!("Conflicts should have been resolved");
}
}
}
self.staging_area.clear();
self.save_staging_area()?;
let message = format!("Merge branch '{source_branch}' into '{dest_branch}'");
let merge_commit_id = self.create_merge_commit(&message, source_branch)?;
Ok(merge_commit_id)
}
pub fn merge_ignore_conflicts(
&mut self,
source_branch: &str,
) -> Result<gix::ObjectId, GitKvError> {
self.merge(source_branch, &IgnoreConflictsResolver)
}
fn find_merge_base(&self, branch1: &str, branch2: &str) -> Result<gix::ObjectId, GitKvError> {
let commit1 = self.get_branch_commit(branch1)?;
let commit2 = self.get_branch_commit(branch2)?;
let mut visited1 = std::collections::HashSet::new();
let mut queue1 = std::collections::VecDeque::new();
queue1.push_back(commit1);
while let Some(commit_id) = queue1.pop_front() {
if visited1.contains(&commit_id) {
continue;
}
visited1.insert(commit_id);
if let Ok(parents) = self.get_commit_parents(&commit_id) {
for parent in parents {
if !visited1.contains(&parent) {
queue1.push_back(parent);
}
}
}
}
let mut visited2 = std::collections::HashSet::new();
let mut queue2 = std::collections::VecDeque::new();
queue2.push_back(commit2);
while let Some(commit_id) = queue2.pop_front() {
if visited2.contains(&commit_id) {
continue;
}
visited2.insert(commit_id);
if visited1.contains(&commit_id) {
return Ok(commit_id);
}
if let Ok(parents) = self.get_commit_parents(&commit_id) {
for parent in parents {
if !visited2.contains(&parent) {
queue2.push_back(parent);
}
}
}
}
Err(GitKvError::GitObjectError(
"No common ancestor found".to_string(),
))
}
fn get_branch_commit(&self, branch: &str) -> Result<gix::ObjectId, GitKvError> {
let branch_ref = format!("refs/heads/{branch}");
match self.metadata.repo().refs.find(&branch_ref) {
Ok(reference) => match reference.target.try_id() {
Some(commit_id) => Ok(commit_id.to_owned()),
None => Err(GitKvError::GitObjectError(format!(
"Branch {branch} does not point to a commit"
))),
},
Err(_) => Err(GitKvError::BranchNotFound(branch.to_string())),
}
}
fn get_commit_parents(
&self,
commit_id: &gix::ObjectId,
) -> Result<Vec<gix::ObjectId>, GitKvError> {
let mut buffer = Vec::new();
let commit_obj = self
.metadata
.repo()
.objects
.find(commit_id, &mut buffer)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to find commit: {e}")))?;
let parents = match commit_obj.decode() {
Ok(gix::objs::ObjectRef::Commit(commit)) => commit.parents().collect(),
_ => {
return Err(GitKvError::GitObjectError(
"Object is not a commit".to_string(),
))
}
};
Ok(parents)
}
pub(crate) fn create_merge_commit(
&mut self,
message: &str,
source_branch: &str,
) -> Result<gix::ObjectId, GitKvError> {
let current_commit = self.metadata.head_commit_id()?;
let source_commit = self.get_branch_commit(source_branch)?;
self.tree.persist_root();
self.tree
.save_config()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to save config: {e}")))?;
self.save_tree_config_to_git_internal()?;
let dataset_dir = self
.dataset_dir
.as_ref()
.ok_or_else(|| GitKvError::GitObjectError("Dataset directory not set".into()))?;
let git_root = self
.metadata
.work_dir()
.or_else(|| Self::find_git_root(dataset_dir))
.ok_or_else(|| GitKvError::GitObjectError("Could not find git root".into()))?;
let tree_id = self.metadata.stage_and_write_tree(&git_root)?;
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map_err(|e| GitKvError::GitObjectError(format!("System time error: {e}")))?
.as_secs() as i64;
let (name, email) = self.metadata.user_config();
let signature = gix::actor::Signature {
name: name.into(),
email: email.into(),
time: gix::date::Time {
seconds: now,
offset: 0,
},
};
let commit = gix::objs::Commit {
tree: tree_id,
parents: vec![current_commit, source_commit].into(),
author: signature.clone(),
committer: signature,
encoding: None,
message: message.as_bytes().into(),
extra_headers: vec![],
};
let commit_id = self
.metadata
.repo()
.objects
.write(&commit)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to write commit: {e}")))?;
self.metadata
.update_branch(&self.current_branch, commit_id)?;
self.metadata.update_head(&self.current_branch)?;
Ok(commit_id)
}
fn reload_tree_from_head(&mut self) -> Result<(), GitKvError> {
let head = self
.metadata
.repo()
.head()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to get HEAD: {e}")))?;
let head_commit_id = head.id().ok_or_else(|| {
GitKvError::GitObjectError("HEAD does not point to a commit".to_string())
})?;
let head_object_id = head_commit_id.detach();
let keys_at_head = self.collect_keys_at_commit(&head_object_id)?;
let mut config = self.tree.config.clone();
if let Ok(commit_config) = self.read_tree_config_from_commit(&head_object_id) {
config.root_hash = commit_config.root_hash;
}
self.tree = ProllyTree::new(self.tree.storage.clone(), config);
for (key, value) in keys_at_head {
self.tree.insert(key, value);
}
Ok(())
}
pub fn init<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
Self::init_with_config_filename(path, DEFAULT_TREE_CONFIG_FILENAME)
}
pub fn init_with_config_filename<P: AsRef<Path>>(
path: P,
config_filename: &str,
) -> Result<Self, GitKvError> {
let path = path.as_ref();
if Self::is_in_git_root(path)? {
return Err(GitKvError::GitObjectError(
"Cannot initialize git-prolly in git root directory. \
Please use a subdirectory to create a dataset, or the commit operation \
may accidentally stage all files in the repository."
.to_string(),
));
}
let git_root = Self::find_git_root(path).ok_or_else(|| {
GitKvError::GitObjectError(
"Not inside a git repository. Please run from within a git repository.".to_string(),
)
})?;
let dataset_dir = path.to_path_buf();
std::fs::create_dir_all(&dataset_dir).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create dataset directory: {e}"))
})?;
let config_path = dataset_dir.join(config_filename);
let default_config_path = dataset_dir.join(DEFAULT_TREE_CONFIG_FILENAME);
let mappings_path = dataset_dir.join("prolly_hash_mappings");
if config_path.exists() || default_config_path.exists() || mappings_path.exists() {
return Self::open_with_config_filename(path, config_filename);
}
let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?;
let storage = GitNodeStorage::new(git_repo.clone(), dataset_dir.clone())?;
let config: TreeConfig<N> = TreeConfig::default();
let tree = ProllyTree::new(storage, config);
let mut store = VersionedKvStore {
tree,
metadata: GitMetadataBackend::new(git_repo),
staging_area: HashMap::new(),
current_branch: "main".to_string(),
storage_backend: StorageBackend::Git,
dataset_dir: Some(dataset_dir),
config_filename: config_filename.to_string(),
};
let _ = store.tree.save_config();
store.commit("Initial commit")?;
Ok(store)
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
Self::open_with_config_filename(path, DEFAULT_TREE_CONFIG_FILENAME)
}
pub fn open_with_config_filename<P: AsRef<Path>>(
path: P,
config_filename: &str,
) -> Result<Self, GitKvError> {
let path = path.as_ref();
if Self::is_in_git_root(path)? {
return Err(GitKvError::GitObjectError(
"Cannot open git-prolly in git root directory. \
Please use a subdirectory for your dataset, or the commit operation \
may accidentally stage all files in the repository."
.to_string(),
));
}
let git_root = Self::find_git_root(path).ok_or_else(|| {
GitKvError::GitObjectError(
"Not inside a git repository. Please run from within a git repository.".to_string(),
)
})?;
let dataset_dir = path.to_path_buf();
let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?;
let storage = GitNodeStorage::new(git_repo.clone(), dataset_dir.clone())?;
let config_path = dataset_dir.join(config_filename);
let fallback_path = dataset_dir.join(DEFAULT_TREE_CONFIG_FILENAME);
let resolved_path = if config_path.exists() {
config_path
} else if fallback_path.exists() && config_filename != DEFAULT_TREE_CONFIG_FILENAME {
fallback_path
} else {
return Err(GitKvError::GitObjectError(format!(
"Config file not found ({} or {}). The store may not be \
initialized. Call init() to create a new store.",
config_filename, DEFAULT_TREE_CONFIG_FILENAME,
)));
};
let config_data = std::fs::read_to_string(&resolved_path)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to read config file: {e}")))?;
let config: TreeConfig<N> = serde_json::from_str(&config_data)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to parse config file: {e}")))?;
let tree = if let Some(existing_tree) =
ProllyTree::load_from_storage(storage.clone(), config.clone())
{
existing_tree
} else if config.root_hash.is_some() {
eprintln!("Warning: Failed to load tree from saved root hash. This may indicate missing git objects or corrupted hash mappings.");
eprintln!("Attempting to create tree with saved config to avoid data loss...");
ProllyTree::new(storage, config)
} else {
ProllyTree::new(storage, config)
};
let current_branch = git_repo
.head_ref()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to get head ref: {e}")))?
.map(|r| r.name().shorten().to_string())
.unwrap_or_else(|| "main".to_string());
let mut store = VersionedKvStore {
tree,
metadata: GitMetadataBackend::new(git_repo),
staging_area: HashMap::new(),
current_branch,
storage_backend: StorageBackend::Git,
dataset_dir: Some(dataset_dir),
config_filename: config_filename.to_string(),
};
store.load_staging_area()?;
Ok(store)
}
pub fn tree(&self) -> &ProllyTree<N, GitNodeStorage<N>> {
&self.tree
}
pub fn tree_mut(&mut self) -> &mut ProllyTree<N, GitNodeStorage<N>> {
&mut self.tree
}
pub(crate) fn collect_keys_at_commit(
&self,
commit_id: &gix::ObjectId,
) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let dataset_dir = self.tree.storage.dataset_dir();
let git_root = self
.metadata
.work_dir()
.or_else(|| Self::find_git_root(dataset_dir))
.ok_or_else(|| GitKvError::GitObjectError("Could not find git root".to_string()))?;
let dataset_relative_path = dataset_dir
.strip_prefix(&git_root)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to get relative path: {e}")))?;
let relative_path_str = dataset_relative_path
.components()
.map(|c| c.as_os_str().to_string_lossy())
.collect::<Vec<_>>()
.join("/");
let config_path = format!("{}/{}", relative_path_str, self.config_filename);
let mapping_path = format!("{}/prolly_hash_mappings", relative_path_str);
let config_result = self.metadata.read_file_at_commit(commit_id, &config_path);
let mapping_result = self.metadata.read_file_at_commit(commit_id, &mapping_path);
if config_result.is_err() || mapping_result.is_err() {
return Ok(HashMap::new());
}
let config_data = config_result?;
let config: TreeConfig<N> = serde_json::from_slice(&config_data).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to deserialize config: {e}"))
})?;
let mapping_data = mapping_result?;
let mapping_str = String::from_utf8(mapping_data)
.map_err(|e| GitKvError::GitObjectError(format!("Invalid UTF-8 in mappings: {e}")))?;
let mut key_values = HashMap::new();
let mut hash_mappings = HashMap::new();
let mut is_simple_mapping = false;
for line in mapping_str.lines() {
if let Some((prefix, rest)) = line.split_once(':') {
if prefix == "key" {
is_simple_mapping = true;
if let Some((key_hex, value_hex)) = rest.split_once(':') {
if let (Ok(key), Ok(value)) = (hex::decode(key_hex), hex::decode(value_hex))
{
key_values.insert(key, value);
}
}
} else {
let hash_hex = prefix;
let object_hex = rest;
if hash_hex.len() == N * 2 {
let mut hash_bytes = Vec::new();
for i in 0..N {
if let Ok(byte) = u8::from_str_radix(&hash_hex[i * 2..i * 2 + 2], 16) {
hash_bytes.push(byte);
} else {
break;
}
}
if hash_bytes.len() == N {
if let Ok(object_id) = gix::ObjectId::from_hex(object_hex.as_bytes()) {
let hash = ValueDigest::raw_hash(&hash_bytes);
hash_mappings.insert(hash, object_id);
}
}
}
}
}
}
if is_simple_mapping {
return Ok(key_values);
}
if hash_mappings.is_empty() {
return Ok(HashMap::new());
}
let temp_storage = GitNodeStorage::with_mappings(
self.metadata.clone_repo(),
self.tree.storage.dataset_dir().to_path_buf(),
hash_mappings,
)?;
let tree = ProllyTree::load_from_storage(temp_storage, config).ok_or_else(|| {
GitKvError::GitObjectError("Failed to load tree from storage".to_string())
})?;
let mut result_key_values = HashMap::new();
for key in tree.collect_keys() {
if let Some(node) = tree.find(&key) {
if let Some(index) = node.keys.iter().position(|k| k == &key) {
result_key_values.insert(key, node.values[index].clone());
}
}
}
Ok(result_key_values)
}
}
impl<const N: usize> HistoricalAccess<N>
for VersionedKvStore<N, GitNodeStorage<N>, GitMetadataBackend>
{
fn get_keys_at_ref(&self, reference: &str) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let commit_id = self.resolve_commit(reference)?;
self.collect_keys_at_commit(&commit_id)
}
}
impl<const N: usize> HistoricalCommitAccess<N>
for VersionedKvStore<N, GitNodeStorage<N>, GitMetadataBackend>
{
fn get_commits_for_key(&self, key: &[u8]) -> Result<Vec<CommitInfo>, GitKvError> {
let mut commit_history = self.get_commit_history()?;
commit_history.reverse();
let mut commits_with_key_changes = Vec::new();
let mut previous_value: Option<Vec<u8>> = None;
for commit in commit_history {
let current_value = self.collect_keys_at_commit(&commit.id)?.get(key).cloned();
let value_changed = previous_value != current_value;
if value_changed {
commits_with_key_changes.push(commit);
}
previous_value = current_value;
}
commits_with_key_changes.reverse();
Ok(commits_with_key_changes)
}
fn get_commit_history(&self) -> Result<Vec<CommitInfo>, GitKvError> {
self.log()
}
}
impl<const N: usize> VersionedKvStore<N, InMemoryNodeStorage<N>, GitMetadataBackend> {
pub fn init<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
let path = path.as_ref();
if Self::is_in_git_root(path)? {
return Err(GitKvError::GitObjectError(
"Cannot initialize in-memory store in git root directory. \
Please use a subdirectory to create a dataset, or the commit operation \
may accidentally stage all files in the repository."
.to_string(),
));
}
let git_root = Self::find_git_root(path).ok_or_else(|| {
GitKvError::GitObjectError(
"Not inside a git repository. Please run from within a git repository.".to_string(),
)
})?;
let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?;
let dataset_dir = path.to_path_buf();
std::fs::create_dir_all(&dataset_dir).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create dataset directory: {e}"))
})?;
let storage = InMemoryNodeStorage::<N>::new();
let config: TreeConfig<N> = TreeConfig::default();
let tree = ProllyTree::new(storage, config);
let mut store = VersionedKvStore {
tree,
metadata: GitMetadataBackend::new(git_repo),
staging_area: HashMap::new(),
current_branch: "main".to_string(),
storage_backend: StorageBackend::InMemory,
dataset_dir: Some(dataset_dir),
config_filename: DEFAULT_TREE_CONFIG_FILENAME.to_string(),
};
store.commit("Initial commit")?;
Ok(store)
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
Self::init(path)
}
}
impl<const N: usize> HistoricalAccess<N>
for VersionedKvStore<N, InMemoryNodeStorage<N>, GitMetadataBackend>
{
fn get_keys_at_ref(&self, reference: &str) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let commit_id = self.resolve_commit(reference)?;
let tree_config = self.read_tree_config_from_commit(&commit_id)?;
self.collect_keys_from_config(&tree_config)
}
}
impl<const N: usize> HistoricalCommitAccess<N>
for VersionedKvStore<N, InMemoryNodeStorage<N>, GitMetadataBackend>
{
fn get_commits_for_key(&self, key: &[u8]) -> Result<Vec<CommitInfo>, GitKvError> {
self.get_commits_for_key_generic(key)
}
fn get_commit_history(&self) -> Result<Vec<CommitInfo>, GitKvError> {
self.get_commit_history_generic()
}
}
impl<const N: usize> VersionedKvStore<N, FileNodeStorage<N>, GitMetadataBackend> {
pub fn init<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
let path = path.as_ref();
if Self::is_in_git_root(path)? {
return Err(GitKvError::GitObjectError(
"Cannot initialize file store in git root directory. \
Please use a subdirectory to create a dataset, or the commit operation \
may accidentally stage all files in the repository."
.to_string(),
));
}
let git_root = Self::find_git_root(path).ok_or_else(|| {
GitKvError::GitObjectError(
"Not inside a git repository. Please run from within a git repository.".to_string(),
)
})?;
let prolly_dir = Self::ensure_prolly_dir(&git_root)?;
let dataset_dir = path.to_path_buf();
std::fs::create_dir_all(&dataset_dir).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create dataset directory: {e}"))
})?;
let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?;
let file_storage_path = prolly_dir.join("nodes").join("files");
std::fs::create_dir_all(&file_storage_path).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create file storage directory: {e}"))
})?;
let storage = FileNodeStorage::<N>::new(file_storage_path).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create file storage: {e}"))
})?;
let config: TreeConfig<N> = TreeConfig::default();
let tree = ProllyTree::new(storage, config);
let mut store = VersionedKvStore {
tree,
metadata: GitMetadataBackend::new(git_repo),
staging_area: HashMap::new(),
current_branch: "main".to_string(),
storage_backend: StorageBackend::File,
dataset_dir: Some(dataset_dir),
config_filename: DEFAULT_TREE_CONFIG_FILENAME.to_string(),
};
store.commit("Initial commit")?;
Ok(store)
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
let path = path.as_ref();
let dataset_dir = path.to_path_buf();
if Self::is_in_git_root(path)? {
return Err(GitKvError::GitObjectError(
"Cannot open file store in git root directory. \
Please use a subdirectory for your dataset, or the commit operation \
may accidentally stage all files in the repository."
.to_string(),
));
}
if !dataset_dir.exists() {
return Err(GitKvError::GitObjectError(
"Dataset directory not found. Call init() first to create the store.".to_string(),
));
}
let git_root = Self::find_git_root(path).ok_or_else(|| {
GitKvError::GitObjectError(
"Not inside a git repository. Please run from within a git repository.".to_string(),
)
})?;
let prolly_dir = Self::get_prolly_dir(&git_root);
let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?;
let file_storage_path = prolly_dir.join("nodes").join("files");
if !file_storage_path.exists() {
return Err(GitKvError::GitObjectError(
"File store not initialized. Call init() first to create the store.".to_string(),
));
}
let storage = FileNodeStorage::<N>::new(file_storage_path.clone()).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create file storage: {e}"))
})?;
let config_path = dataset_dir.join("prolly_config_tree_config");
if !config_path.exists() {
return Err(GitKvError::GitObjectError(
"Config file not found. The store may not be initialized. \
Call init() to create a new store."
.to_string(),
));
}
let config_data = std::fs::read_to_string(&config_path)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to read config file: {e}")))?;
let config: TreeConfig<N> = serde_json::from_str(&config_data)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to parse config file: {e}")))?;
let tree =
if let Some(existing_tree) = ProllyTree::load_from_storage(storage, config.clone()) {
existing_tree
} else {
let new_storage = FileNodeStorage::<N>::new(file_storage_path).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create file storage: {e}"))
})?;
ProllyTree::new(new_storage, config)
};
let current_branch = git_repo
.head_ref()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to get head ref: {e}")))?
.map(|r| r.name().shorten().to_string())
.unwrap_or_else(|| "main".to_string());
let mut store = VersionedKvStore {
tree,
metadata: GitMetadataBackend::new(git_repo),
staging_area: HashMap::new(),
current_branch,
storage_backend: StorageBackend::File,
dataset_dir: Some(dataset_dir),
config_filename: DEFAULT_TREE_CONFIG_FILENAME.to_string(),
};
store.load_staging_area()?;
Ok(store)
}
}
impl<const N: usize> HistoricalAccess<N>
for VersionedKvStore<N, FileNodeStorage<N>, GitMetadataBackend>
{
fn get_keys_at_ref(&self, reference: &str) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let commit_id = self.resolve_commit(reference)?;
let tree_config = self.read_tree_config_from_commit(&commit_id)?;
self.collect_keys_from_config(&tree_config)
}
}
impl<const N: usize> HistoricalCommitAccess<N>
for VersionedKvStore<N, FileNodeStorage<N>, GitMetadataBackend>
{
fn get_commits_for_key(&self, key: &[u8]) -> Result<Vec<CommitInfo>, GitKvError> {
self.get_commits_for_key_generic(key)
}
fn get_commit_history(&self) -> Result<Vec<CommitInfo>, GitKvError> {
self.get_commit_history_generic()
}
}
#[cfg(feature = "rocksdb_storage")]
impl<const N: usize> VersionedKvStore<N, RocksDBNodeStorage<N>, GitMetadataBackend> {
pub fn init<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
let path = path.as_ref();
if Self::is_in_git_root(path)? {
return Err(GitKvError::GitObjectError(
"Cannot initialize RocksDB store in git root directory. \
Please use a subdirectory to create a dataset, or the commit operation \
may accidentally stage all files in the repository."
.to_string(),
));
}
let git_root = Self::find_git_root(path).ok_or_else(|| {
GitKvError::GitObjectError(
"Not inside a git repository. Please run from within a git repository.".to_string(),
)
})?;
let prolly_dir = Self::ensure_prolly_dir(&git_root)?;
let dataset_dir = path.to_path_buf();
std::fs::create_dir_all(&dataset_dir).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create dataset directory: {e}"))
})?;
let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?;
let rocksdb_path = prolly_dir.join("nodes").join("rocksdb");
std::fs::create_dir_all(&rocksdb_path).map_err(|e| {
GitKvError::GitObjectError(format!("Failed to create RocksDB directory: {e}"))
})?;
let storage = RocksDBNodeStorage::<N>::new(rocksdb_path)
.map_err(|e| GitKvError::GitObjectError(format!("RocksDB creation failed: {e}")))?;
let config: TreeConfig<N> = TreeConfig::default();
let tree = ProllyTree::new(storage, config);
let mut store = VersionedKvStore {
tree,
metadata: GitMetadataBackend::new(git_repo),
staging_area: HashMap::new(),
current_branch: "main".to_string(),
storage_backend: StorageBackend::RocksDB,
dataset_dir: Some(dataset_dir),
config_filename: DEFAULT_TREE_CONFIG_FILENAME.to_string(),
};
store.commit("Initial commit")?;
Ok(store)
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, GitKvError> {
let path = path.as_ref();
let dataset_dir = path.to_path_buf();
if Self::is_in_git_root(path)? {
return Err(GitKvError::GitObjectError(
"Cannot open RocksDB store in git root directory. \
Please use a subdirectory for your dataset, or the commit operation \
may accidentally stage all files in the repository."
.to_string(),
));
}
if !dataset_dir.exists() {
return Err(GitKvError::GitObjectError(
"Dataset directory not found. Call init() first to create the store.".to_string(),
));
}
let git_root = Self::find_git_root(path).ok_or_else(|| {
GitKvError::GitObjectError(
"Not inside a git repository. Please run from within a git repository.".to_string(),
)
})?;
let prolly_dir = Self::get_prolly_dir(&git_root);
let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?;
let rocksdb_path = prolly_dir.join("nodes").join("rocksdb");
if !rocksdb_path.exists() {
return Err(GitKvError::GitObjectError(
"RocksDB store not initialized. Call init() first to create the store.".to_string(),
));
}
let storage = RocksDBNodeStorage::<N>::new(rocksdb_path)
.map_err(|e| GitKvError::GitObjectError(format!("RocksDB creation failed: {e}")))?;
let config_path = dataset_dir.join("prolly_config_tree_config");
if !config_path.exists() {
return Err(GitKvError::GitObjectError(
"Config file not found. The store may not be initialized. \
Call init() to create a new store."
.to_string(),
));
}
let config_data = std::fs::read_to_string(&config_path)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to read config file: {e}")))?;
let config: TreeConfig<N> = serde_json::from_str(&config_data)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to parse config file: {e}")))?;
let tree = ProllyTree::load_from_storage(storage.clone(), config.clone())
.unwrap_or_else(|| ProllyTree::new(storage, config));
let current_branch = git_repo
.head_ref()
.map_err(|e| GitKvError::GitObjectError(format!("Failed to get head ref: {e}")))?
.map(|r| r.name().shorten().to_string())
.unwrap_or_else(|| "main".to_string());
let mut store = VersionedKvStore {
tree,
metadata: GitMetadataBackend::new(git_repo),
staging_area: HashMap::new(),
current_branch,
storage_backend: StorageBackend::RocksDB,
dataset_dir: Some(dataset_dir),
config_filename: DEFAULT_TREE_CONFIG_FILENAME.to_string(),
};
store.load_staging_area()?;
Ok(store)
}
}
#[cfg(feature = "rocksdb_storage")]
impl<const N: usize> HistoricalAccess<N>
for VersionedKvStore<N, RocksDBNodeStorage<N>, GitMetadataBackend>
{
fn get_keys_at_ref(&self, reference: &str) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
let commit_id = self.resolve_commit(reference)?;
let tree_config = self.read_tree_config_from_commit(&commit_id)?;
self.collect_keys_from_config(&tree_config)
}
}
#[cfg(feature = "rocksdb_storage")]
impl<const N: usize> HistoricalCommitAccess<N>
for VersionedKvStore<N, RocksDBNodeStorage<N>, GitMetadataBackend>
{
fn get_commits_for_key(&self, key: &[u8]) -> Result<Vec<CommitInfo>, GitKvError> {
self.get_commits_for_key_generic(key)
}
fn get_commit_history(&self) -> Result<Vec<CommitInfo>, GitKvError> {
self.get_commit_history_generic()
}
}
impl<const N: usize> TreeConfigSaver<N>
for VersionedKvStore<N, InMemoryNodeStorage<N>, GitMetadataBackend>
{
fn save_tree_config_to_git_internal(&self) -> Result<(), GitKvError> {
self.save_tree_config_to_git()
}
}
impl<const N: usize> VersionedKvStore<N, InMemoryNodeStorage<N>, GitMetadataBackend> {
fn save_tree_config_to_git(&self) -> Result<(), GitKvError> {
let dataset_dir = self
.dataset_dir
.as_ref()
.ok_or_else(|| GitKvError::GitObjectError("Dataset directory not set".to_string()))?;
let config = self.tree.config.clone();
let config_json = serde_json::to_string_pretty(&config)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to serialize config: {e}")))?;
let config_path = dataset_dir.join("prolly_config_tree_config");
std::fs::write(&config_path, config_json)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to write config file: {e}")))?;
Ok(())
}
}
impl<const N: usize> TreeConfigSaver<N>
for VersionedKvStore<N, FileNodeStorage<N>, GitMetadataBackend>
{
fn save_tree_config_to_git_internal(&self) -> Result<(), GitKvError> {
self.save_tree_config_to_git()
}
}
impl<const N: usize> VersionedKvStore<N, FileNodeStorage<N>, GitMetadataBackend> {
fn save_tree_config_to_git(&self) -> Result<(), GitKvError> {
let dataset_dir = self
.dataset_dir
.as_ref()
.ok_or_else(|| GitKvError::GitObjectError("Dataset directory not set".to_string()))?;
let config = self.tree.config.clone();
let config_json = serde_json::to_string_pretty(&config)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to serialize config: {e}")))?;
let config_path = dataset_dir.join("prolly_config_tree_config");
std::fs::write(&config_path, config_json)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to write config file: {e}")))?;
Ok(())
}
}
#[cfg(feature = "rocksdb_storage")]
impl<const N: usize> TreeConfigSaver<N>
for VersionedKvStore<N, RocksDBNodeStorage<N>, GitMetadataBackend>
{
fn save_tree_config_to_git_internal(&self) -> Result<(), GitKvError> {
self.save_tree_config_to_git()
}
}
#[cfg(feature = "rocksdb_storage")]
impl<const N: usize> VersionedKvStore<N, RocksDBNodeStorage<N>, GitMetadataBackend> {
fn save_tree_config_to_git(&self) -> Result<(), GitKvError> {
let dataset_dir = self
.dataset_dir
.as_ref()
.ok_or_else(|| GitKvError::GitObjectError("Dataset directory not set".to_string()))?;
let config = self.tree.config.clone();
let config_json = serde_json::to_string_pretty(&config)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to serialize config: {e}")))?;
let config_path = dataset_dir.join("prolly_config_tree_config");
std::fs::write(&config_path, config_json)
.map_err(|e| GitKvError::GitObjectError(format!("Failed to write config file: {e}")))?;
Ok(())
}
}