prollytree 0.3.2

A prolly (probabilistic) tree for efficient storage, retrieval, and modification of ordered data.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

use crate::config::TreeConfig;
use crate::digest::ValueDigest;
use crate::git::types::*;
use crate::git::versioned_store::GitVersionedKvStore;
use crate::node::ProllyNode;
use gix::prelude::*;
use std::collections::HashMap;

/// Git operations for versioned KV store
pub struct GitOperations<const N: usize> {
    store: GitVersionedKvStore<N>,
}

impl<const N: usize> GitOperations<N> {
    pub fn new(store: GitVersionedKvStore<N>) -> Self {
        GitOperations { store }
    }

    /// Perform a merge between two branches, focusing on fast-forward merges
    pub fn merge(&mut self, other_branch: &str) -> Result<MergeResult, GitKvError> {
        // Get the current branch state
        let current_branch = self.store.current_branch();

        // Get the commit IDs for both branches
        let current_commit = self.get_branch_commit(current_branch)?;
        let other_commit = self.get_branch_commit(other_branch)?;

        // Check if they're the same (nothing to merge)
        if current_commit == other_commit {
            return Ok(MergeResult::FastForward(current_commit));
        }

        // Check if we can do a fast-forward merge
        if self.is_fast_forward_possible(&current_commit, &other_commit)? {
            // Fast-forward merge: just update HEAD to the other branch
            self.store.checkout(other_branch)?;
            return Ok(MergeResult::FastForward(other_commit));
        }

        // For now, we don't support three-way merges
        // Return a conflict indicating guide merge is needed
        let conflicts = vec![crate::git::types::KvConflict {
            key: b"<merge>".to_vec(),
            base_value: None,
            our_value: Some(b"Cannot automatically merge - guide merge required".to_vec()),
            their_value: Some(b"Use 'git merge' or resolve conflicts manually".to_vec()),
        }];

        Ok(MergeResult::Conflict(conflicts))
    }

    /// Check if a fast-forward merge is possible
    fn is_fast_forward_possible(
        &self,
        current_commit: &gix::ObjectId,
        other_commit: &gix::ObjectId,
    ) -> Result<bool, GitKvError> {
        // Fast-forward is possible if the other commit is a descendant of the current commit
        // This means the current commit should be an ancestor of the other commit
        self.is_ancestor(current_commit, other_commit)
    }

    /// Check if commit A is an ancestor of commit B
    fn is_ancestor(
        &self,
        ancestor: &gix::ObjectId,
        descendant: &gix::ObjectId,
    ) -> Result<bool, GitKvError> {
        // If they're the same, ancestor relationship is true
        if ancestor == descendant {
            return Ok(true);
        }

        // Walk through the parents of the descendant commit
        let mut visited = std::collections::HashSet::new();
        let mut queue = std::collections::VecDeque::new();
        queue.push_back(*descendant);

        while let Some(current_commit) = queue.pop_front() {
            if visited.contains(&current_commit) {
                continue;
            }
            visited.insert(current_commit);

            // If we found the ancestor, return true
            if current_commit == *ancestor {
                return Ok(true);
            }

            // Add parents to queue
            let mut buffer = Vec::new();
            if let Ok(commit_obj) = self
                .store
                .git_repo()
                .objects
                .find(&current_commit, &mut buffer)
            {
                if let Ok(gix::objs::ObjectRef::Commit(commit)) = commit_obj.decode() {
                    for parent_id in commit.parents() {
                        if !visited.contains(&parent_id) {
                            queue.push_back(parent_id);
                        }
                    }
                }
            }
        }

        // If we didn't find the ancestor, return false
        Ok(false)
    }

    /// Generate a diff between two branches or commits
    pub fn diff(&self, from: &str, to: &str) -> Result<Vec<KvDiff>, GitKvError> {
        // Optimize by directly resolving to commit IDs and using the optimized path
        let from_commit_id = self.parse_commit_id(from)?;
        let to_commit_id = self.parse_commit_id(to)?;

        let from_state = self.get_kv_state_at_commit(&from_commit_id)?;
        let to_state = self.get_kv_state_at_commit(&to_commit_id)?;

        let mut diffs = Vec::new();
        let mut all_keys = std::collections::HashSet::new();

        // Collect all keys from both states
        for key in from_state.keys() {
            all_keys.insert(key.clone());
        }
        for key in to_state.keys() {
            all_keys.insert(key.clone());
        }

        // Compare each key
        for key in all_keys {
            let from_value = from_state.get(&key);
            let to_value = to_state.get(&key);

            let operation = match (from_value, to_value) {
                (None, Some(value)) => DiffOperation::Added(value.clone()),
                (Some(value), None) => DiffOperation::Removed(value.clone()),
                (Some(old), Some(new)) => {
                    if old != new {
                        DiffOperation::Modified {
                            old: old.clone(),
                            new: new.clone(),
                        }
                    } else {
                        continue; // No change
                    }
                }
                (None, None) => continue, // Shouldn't happen
            };

            diffs.push(KvDiff { key, operation });
        }

        Ok(diffs)
    }

    /// Show the KV state at a specific commit
    pub fn show(&self, commit: &str) -> Result<CommitDetails, GitKvError> {
        // Parse commit ID
        let commit_id = self.parse_commit_id(commit)?;

        // Get commit object from git
        let mut buffer = Vec::new();
        let commit_obj = self
            .store
            .git_repo()
            .objects
            .find(&commit_id, &mut buffer)
            .map_err(|e| GitKvError::GitObjectError(format!("Commit not found: {e}")))?;

        let commit = match commit_obj.decode() {
            Ok(gix::objs::ObjectRef::Commit(commit)) => commit,
            _ => {
                return Err(GitKvError::GitObjectError(
                    "Object is not a commit".to_string(),
                ))
            }
        };

        // Extract commit info
        let info = CommitInfo {
            id: commit_id,
            author: commit.author().name.to_string(),
            committer: commit.committer().name.to_string(),
            message: commit.message().title.to_string(),
            timestamp: commit.time().seconds,
        };

        // Get parent commits
        let parent_ids: Vec<gix::ObjectId> = commit.parents().collect();

        // Generate diff from parent (if exists)
        let changes = if let Some(parent_id) = parent_ids.first() {
            self.diff(&parent_id.to_string(), &commit_id.to_string())?
        } else {
            // Root commit - show all keys as added
            let state = self.get_kv_state_at_commit(&commit_id)?;
            state
                .iter()
                .map(|(key, value)| KvDiff {
                    key: key.clone(),
                    operation: DiffOperation::Added(value.clone()),
                })
                .collect()
        };

        Ok(CommitDetails {
            info,
            changes,
            parent_ids,
        })
    }

    /// Revert a commit
    pub fn revert(&mut self, commit: &str) -> Result<(), GitKvError> {
        let _commit_id = self.parse_commit_id(commit)?;

        // Get the changes in the commit
        let details = self.show(commit)?;

        // Apply the reverse of each change
        for diff in details.changes {
            match diff.operation {
                DiffOperation::Added(_) => {
                    // If it was added, delete it
                    self.store.delete(&diff.key)?;
                }
                DiffOperation::Removed(value) => {
                    // If it was removed, add it back
                    self.store.insert(diff.key, value)?;
                }
                DiffOperation::Modified { old, new: _ } => {
                    // If it was modified, revert to old value
                    self.store.insert(diff.key, old)?;
                }
            }
        }

        // Commit the revert
        let message = format!("Revert \"{}\"", details.info.message);
        self.store.commit(&message)?;

        Ok(())
    }

    /// Get the commit ID for a branch
    fn get_branch_commit(&self, branch: &str) -> Result<gix::ObjectId, GitKvError> {
        // Try to resolve the branch reference
        let branch_ref = if branch.starts_with("refs/") {
            branch.to_string()
        } else {
            format!("refs/heads/{branch}")
        };

        // Find the reference
        match self.store.git_repo().refs.find(&branch_ref) {
            Ok(reference) => {
                // Get the target commit ID
                match reference.target.try_id() {
                    Some(commit_id) => Ok(commit_id.to_owned()),
                    None => Err(GitKvError::GitObjectError(format!(
                        "Branch {branch} does not point to a commit"
                    ))),
                }
            }
            Err(_) => {
                // If branch not found, try to resolve as commit ID
                match self.store.git_repo().rev_parse_single(branch) {
                    Ok(object) => Ok(object.into()),
                    Err(e) => Err(GitKvError::GitObjectError(format!(
                        "Cannot resolve branch/commit {branch}: {e}"
                    ))),
                }
            }
        }
    }

    /// Get KV state at a specific commit
    fn get_kv_state_at_commit(
        &self,
        commit_id: &gix::ObjectId,
    ) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
        // Check if we're asking for the current HEAD
        let current_head = self
            .store
            .git_repo()
            .head_id()
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to get HEAD: {e}")))?;

        if *commit_id == current_head {
            // For current HEAD, use the current state
            return self.get_current_kv_state();
        }

        // Reconstruct the ProllyTree state from the specific commit
        self.reconstruct_kv_state_from_commit(commit_id)
    }

    /// Reconstruct KV state from a specific commit using git objects directly
    fn reconstruct_kv_state_from_commit(
        &self,
        commit_id: &gix::ObjectId,
    ) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
        // Use git's object database to read prolly config directly from the commit
        // This is much more efficient than checking out the entire commit
        self.reconstruct_state_from_git_objects(commit_id)
    }

    /// Reconstruct KV state using git's tree and blob objects directly
    /// This uses the current working directory's config files and git object database
    fn reconstruct_state_from_git_objects(
        &self,
        commit_id: &gix::ObjectId,
    ) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
        // Try to read prolly config and hash mappings directly from the commit
        let current_dir = std::env::current_dir()
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to get current dir: {e}")))?;

        // Get the dataset directory name relative to git root
        let git_root =
            self.store.git_repo().work_dir().ok_or_else(|| {
                GitKvError::GitObjectError("Not in a working directory".to_string())
            })?;

        let relative_path = current_dir.strip_prefix(git_root).map_err(|_| {
            GitKvError::GitObjectError("Current directory not within git repository".to_string())
        })?;

        let dataset_name = relative_path.to_string_lossy();

        // Try different possible file paths
        let config_paths = vec![
            format!("{}/prolly_config_tree_config", dataset_name),
            "prolly_config_tree_config".to_string(),
        ];

        let mapping_paths = vec![
            format!("{}/prolly_hash_mappings", dataset_name),
            "prolly_hash_mappings".to_string(),
        ];

        // Try to read the prolly config from the commit
        let mut tree_config = None;
        for path in &config_paths {
            if let Ok(config) = self.read_prolly_config_from_commit(commit_id, path) {
                tree_config = Some(config);
                break;
            }
        }

        let tree_config = tree_config.ok_or_else(|| {
            GitKvError::GitObjectError(
                "Could not find prolly_config_tree_config in commit".to_string(),
            )
        })?;

        // Try to read the hash mappings from the commit
        let mut hash_mappings = None;
        for path in &mapping_paths {
            if let Ok(mappings) = self.read_hash_mappings_from_commit(commit_id, path) {
                hash_mappings = Some(mappings);
                break;
            }
        }

        let hash_mappings = hash_mappings.ok_or_else(|| {
            GitKvError::GitObjectError("Could not find prolly_hash_mappings in commit".to_string())
        })?;

        // Collect all key-value pairs from the root hash
        let root_hash = tree_config.root_hash.ok_or_else(|| {
            GitKvError::GitObjectError("Tree config has no root hash".to_string())
        })?;

        // Successfully loaded config and mappings using direct git object access

        self.collect_keys_from_root_hash(&root_hash, &hash_mappings)
    }

    /// Read prolly config file content from a specific git commit
    fn read_prolly_config_from_commit(
        &self,
        commit_id: &gix::ObjectId,
        file_path: &str,
    ) -> Result<TreeConfig<N>, GitKvError> {
        let file_content = self.read_file_from_git_commit(commit_id, file_path)?;
        let config: TreeConfig<N> = serde_json::from_slice(&file_content)
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to parse tree config: {e}")))?;
        Ok(config)
    }

    /// Read hash mappings file from a specific git commit
    fn read_hash_mappings_from_commit(
        &self,
        commit_id: &gix::ObjectId,
        file_path: &str,
    ) -> Result<HashMap<ValueDigest<N>, gix::ObjectId>, GitKvError> {
        let file_content = self.read_file_from_git_commit(commit_id, file_path)?;
        let content = String::from_utf8_lossy(&file_content);

        let mut mappings = HashMap::new();
        for line in content.lines() {
            if let Some((hash_str, object_id_str)) = line.split_once(':') {
                // Parse prolly hash (decode hex string manually)
                match self.decode_hex(hash_str) {
                    Ok(hash_bytes) => {
                        if hash_bytes.len() == N {
                            let prolly_hash = ValueDigest::raw_hash(&hash_bytes);

                            // Parse git object ID
                            match gix::ObjectId::from_hex(object_id_str.as_bytes()) {
                                Ok(git_object_id) => {
                                    mappings.insert(prolly_hash, git_object_id);
                                }
                                Err(_e) => {
                                    // Silently skip invalid git object IDs
                                }
                            }
                        } else {
                            // Silently skip hashes with wrong length
                        }
                    }
                    Err(_e) => {
                        // Silently skip invalid hex strings
                    }
                }
            }
        }

        Ok(mappings)
    }

    /// Read a file from a specific git commit using gix
    /// Supports nested paths like "dataset/prolly_config_tree_config"
    fn read_file_from_git_commit(
        &self,
        commit_id: &gix::ObjectId,
        file_path: &str,
    ) -> Result<Vec<u8>, GitKvError> {
        // Get the commit object
        let mut buffer = Vec::new();
        let commit = self
            .store
            .git_repo()
            .objects
            .find(commit_id, &mut buffer)
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to find commit: {e}")))?;

        let commit_ref = commit
            .decode()
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to decode commit: {e}")))?
            .into_commit()
            .ok_or_else(|| GitKvError::GitObjectError("Object is not a commit".to_string()))?;

        // Get the root tree from the commit
        let tree_id = commit_ref.tree();

        // Split the path into components
        let path_parts: Vec<&str> = file_path.split('/').collect();

        // Navigate through the tree structure
        self.find_file_in_tree(&tree_id, &path_parts, 0)
    }

    /// Recursively find a file in a git tree, following directory structure
    fn find_file_in_tree(
        &self,
        tree_id: &gix::ObjectId,
        path_parts: &[&str],
        depth: usize,
    ) -> Result<Vec<u8>, GitKvError> {
        if depth >= path_parts.len() {
            return Err(GitKvError::GitObjectError(
                "Path traversal error".to_string(),
            ));
        }

        let current_part = path_parts[depth];
        let is_final = depth == path_parts.len() - 1;

        // Read the tree object
        let mut tree_buffer = Vec::new();
        let tree = self
            .store
            .git_repo()
            .objects
            .find(tree_id, &mut tree_buffer)
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to find tree: {e}")))?;

        let tree_ref = tree
            .decode()
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to decode tree: {e}")))?
            .into_tree()
            .ok_or_else(|| GitKvError::GitObjectError("Object is not a tree".to_string()))?;

        // Look for the current path component in the tree entries
        for entry in tree_ref.entries {
            if entry.filename == current_part.as_bytes() {
                if is_final {
                    // This should be a blob (file)
                    if entry.mode.is_blob() {
                        let mut blob_buffer = Vec::new();
                        let blob_oid = gix::ObjectId::from(entry.oid);
                        let blob = self
                            .store
                            .git_repo()
                            .objects
                            .find(&blob_oid, &mut blob_buffer)
                            .map_err(|e| {
                                GitKvError::GitObjectError(format!("Failed to find blob: {e}"))
                            })?;

                        let blob_ref = blob
                            .decode()
                            .map_err(|e| {
                                GitKvError::GitObjectError(format!("Failed to decode blob: {e}"))
                            })?
                            .into_blob()
                            .ok_or_else(|| {
                                GitKvError::GitObjectError("Object is not a blob".to_string())
                            })?;

                        return Ok(blob_ref.data.to_vec());
                    } else {
                        return Err(GitKvError::GitObjectError(format!(
                            "Expected file but found directory: {current_part}"
                        )));
                    }
                } else {
                    // This should be a tree (directory) - recurse into it
                    if entry.mode.is_tree() {
                        let tree_oid = gix::ObjectId::from(entry.oid);
                        return self.find_file_in_tree(&tree_oid, path_parts, depth + 1);
                    } else {
                        return Err(GitKvError::GitObjectError(format!(
                            "Expected directory but found file: {current_part}"
                        )));
                    }
                }
            }
        }

        Err(GitKvError::GitObjectError(format!(
            "Path component '{}' not found in tree (depth: {}, full path: {})",
            current_part,
            depth,
            path_parts.join("/")
        )))
    }

    /// Reconstruct key-value pairs from a prolly tree root hash and hash mappings
    fn collect_keys_from_root_hash(
        &self,
        root_hash: &ValueDigest<N>,
        hash_mappings: &HashMap<ValueDigest<N>, gix::ObjectId>,
    ) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
        // Get the git object ID for the root hash
        let root_git_id = hash_mappings.get(root_hash).ok_or_else(|| {
            GitKvError::GitObjectError("Root hash not found in mappings".to_string())
        })?;

        // Read the root node from git
        let mut buffer = Vec::new();
        let root_blob = self
            .store
            .git_repo()
            .objects
            .find(root_git_id, &mut buffer)
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to find root node: {e}")))?;

        let blob_ref = root_blob
            .decode()
            .map_err(|e| GitKvError::GitObjectError(format!("Failed to decode root node: {e}")))?
            .into_blob()
            .ok_or_else(|| GitKvError::GitObjectError("Root object is not a blob".to_string()))?;

        // Deserialize the prolly node
        let root_node: ProllyNode<N> = bincode::deserialize(blob_ref.data).map_err(|e| {
            GitKvError::GitObjectError(format!("Failed to deserialize root node: {e}"))
        })?;

        // Traverse the tree and collect all key-value pairs
        let mut result = HashMap::new();
        self.collect_keys_from_node(&root_node, hash_mappings, &mut result)?;

        Ok(result)
    }

    /// Recursively collect key-value pairs from a prolly tree node
    fn collect_keys_from_node(
        &self,
        node: &ProllyNode<N>,
        hash_mappings: &HashMap<ValueDigest<N>, gix::ObjectId>,
        result: &mut HashMap<Vec<u8>, Vec<u8>>,
    ) -> Result<(), GitKvError> {
        if node.is_leaf {
            // Leaf node: add all key-value pairs
            for (i, key) in node.keys.iter().enumerate() {
                if let Some(value) = node.values.get(i) {
                    result.insert(key.clone(), value.clone());
                }
            }
        } else {
            // Internal node: recursively process child nodes
            for value in &node.values {
                let child_hash = ValueDigest::raw_hash(value);
                if let Some(child_git_id) = hash_mappings.get(&child_hash) {
                    // Read child node from git
                    let mut buffer = Vec::new();
                    let child_blob = self
                        .store
                        .git_repo()
                        .objects
                        .find(child_git_id, &mut buffer)
                        .map_err(|e| {
                            GitKvError::GitObjectError(format!("Failed to find child node: {e}"))
                        })?;

                    let blob_ref = child_blob
                        .decode()
                        .map_err(|e| {
                            GitKvError::GitObjectError(format!("Failed to decode child node: {e}"))
                        })?
                        .into_blob()
                        .ok_or_else(|| {
                            GitKvError::GitObjectError("Child object is not a blob".to_string())
                        })?;

                    let child_node: ProllyNode<N> =
                        bincode::deserialize(blob_ref.data).map_err(|e| {
                            GitKvError::GitObjectError(format!(
                                "Failed to deserialize child node: {e}"
                            ))
                        })?;

                    // Recursively collect from child
                    self.collect_keys_from_node(&child_node, hash_mappings, result)?;
                }
            }
        }

        Ok(())
    }

    /// Simple hex decoder (replaces need for hex crate dependency)
    fn decode_hex(&self, hex_str: &str) -> Result<Vec<u8>, GitKvError> {
        if !hex_str.len().is_multiple_of(2) {
            return Err(GitKvError::GitObjectError(
                "Invalid hex string length".to_string(),
            ));
        }

        let mut bytes = Vec::with_capacity(hex_str.len() / 2);
        for chunk in hex_str.as_bytes().chunks(2) {
            let hex_byte = std::str::from_utf8(chunk)
                .map_err(|_| GitKvError::GitObjectError("Invalid hex characters".to_string()))?;
            let byte = u8::from_str_radix(hex_byte, 16)
                .map_err(|_| GitKvError::GitObjectError("Invalid hex digit".to_string()))?;
            bytes.push(byte);
        }

        Ok(bytes)
    }

    /// Get current KV state
    fn get_current_kv_state(&self) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
        self.get_current_kv_state_from_store(&self.store)
    }

    /// Get current KV state from a specific store
    fn get_current_kv_state_from_store(
        &self,
        store: &GitVersionedKvStore<N>,
    ) -> Result<HashMap<Vec<u8>, Vec<u8>>, GitKvError> {
        let mut state = HashMap::new();

        // Get all keys from the store
        let keys = store.list_keys();

        // For each key, get its value
        for key in keys {
            if let Some(value) = store.get(&key) {
                state.insert(key, value);
            }
        }

        Ok(state)
    }

    /// Parse a commit ID from a string
    fn parse_commit_id(&self, commit: &str) -> Result<gix::ObjectId, GitKvError> {
        // Try to resolve using git's rev-parse functionality
        match self.store.git_repo().rev_parse_single(commit) {
            Ok(object) => Ok(object.into()),
            Err(e) => Err(GitKvError::GitObjectError(format!(
                "Cannot resolve commit {commit}: {e}"
            ))),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    #[test]
    fn test_git_operations_creation() {
        let temp_dir = TempDir::new().unwrap();
        // Initialize git repository (regular, not bare)
        gix::init(temp_dir.path()).unwrap();
        // Create subdirectory for dataset
        let dataset_dir = temp_dir.path().join("dataset");
        std::fs::create_dir_all(&dataset_dir).unwrap();
        let store = GitVersionedKvStore::<32>::init(&dataset_dir).unwrap();
        let _ops = GitOperations::new(store);
    }

    #[test]
    fn test_parse_commit_id() {
        let temp_dir = TempDir::new().unwrap();
        // Initialize git repository (regular, not bare)
        gix::init(temp_dir.path()).unwrap();
        // Create subdirectory for dataset
        let dataset_dir = temp_dir.path().join("dataset");
        std::fs::create_dir_all(&dataset_dir).unwrap();
        let store = GitVersionedKvStore::<32>::init(&dataset_dir).unwrap();
        let ops = GitOperations::new(store);

        // Test HEAD parsing
        let head_id = ops.parse_commit_id("HEAD");
        assert!(head_id.is_ok());
    }
}