hashtree_git/
storage.rs

1//! Hashtree-backed git object and ref storage using LMDB persistence
2//!
3//! Stores git objects and refs in a hashtree merkle tree with working tree:
4//!   root/
5//!     .git/
6//!       HEAD -> "ref: refs/heads/main"
7//!       refs/
8//!         heads/main -> <commit-sha1>
9//!         tags/v1.0 -> <tag-sha1>
10//!       objects/
11//!         <sha1> -> zlib-compressed loose object
12//!     README.md -> actual file content (from HEAD)
13//!     src/main.rs -> actual file content
14//!
15//! The working tree is extracted from HEAD commit for direct browsing.
16//! The root hash (SHA-256) is the content-addressed identifier for the entire repo state.
17//! All hashtree nodes are persisted to LMDB via LmdbBlobStore.
18
19use flate2::read::ZlibDecoder;
20use flate2::write::ZlibEncoder;
21use flate2::Compression;
22use hashtree_core::{sha256, HashTree, HashTreeConfig, DirEntry, Store, Cid};
23use hashtree_lmdb::LmdbBlobStore;
24use std::collections::HashMap;
25use std::io::{Read, Write};
26use std::path::Path;
27use std::sync::{Arc, RwLock};
28use tokio::runtime::{Handle, Runtime};
29
30use crate::object::{GitObject, ObjectId, ObjectType, parse_tree};
31use crate::refs::{validate_ref_name, NamedRef, Ref};
32use crate::{Error, Result};
33
34/// Represents a file in the working tree
35#[derive(Debug, Clone)]
36pub struct WorkingTreeEntry {
37    pub path: String,
38    pub mode: u32,
39    pub content: Vec<u8>,
40}
41
42/// Interior mutable state for GitStorage
43struct GitStorageState {
44    /// Git objects: SHA-1 hex -> zlib-compressed loose object (cached in memory)
45    objects: HashMap<String, Vec<u8>>,
46    /// Refs: name -> value ("ref: <target>" for symbolic, or SHA-1 hex)
47    refs: HashMap<String, String>,
48    /// Cached root hash (invalidated on mutation)
49    root_hash: Option<[u8; 32]>,
50}
51
52/// Runtime executor for GitStorage - either owns a runtime or reuses an existing one
53enum RuntimeExecutor {
54    /// Owned runtime (for standalone use outside async context)
55    Owned(Runtime),
56    /// Handle to existing runtime (for use within async context)
57    Handle(Handle),
58}
59
60impl RuntimeExecutor {
61    /// Run a future to completion
62    fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
63        match self {
64            RuntimeExecutor::Owned(rt) => rt.block_on(f),
65            RuntimeExecutor::Handle(handle) => {
66                // Use block_in_place to allow blocking within async context
67                tokio::task::block_in_place(|| handle.block_on(f))
68            }
69        }
70    }
71}
72
73/// Git storage backed by hashtree with LMDB persistence
74pub struct GitStorage {
75    store: Arc<LmdbBlobStore>,
76    runtime: RuntimeExecutor,
77    state: RwLock<GitStorageState>,
78}
79
80impl GitStorage {
81    /// Open or create a git storage at the given path
82    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
83        // Try to use existing runtime handle if we're already in an async context,
84        // otherwise create a new runtime. This avoids the "Cannot drop a runtime
85        // in a context where blocking is not allowed" panic when nested.
86        let runtime = match Handle::try_current() {
87            Ok(handle) => RuntimeExecutor::Handle(handle),
88            Err(_) => {
89                let rt = Runtime::new()
90                    .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
91                RuntimeExecutor::Owned(rt)
92            }
93        };
94
95        // Use "blobs" subdirectory to match hashtree-cli's HashtreeStore
96        let store_path = path.as_ref().join("blobs");
97        let store = Arc::new(
98            LmdbBlobStore::new(&store_path)
99                .map_err(|e| Error::StorageError(format!("lmdb: {}", e)))?,
100        );
101
102        Ok(Self {
103            store,
104            runtime,
105            state: RwLock::new(GitStorageState {
106                objects: HashMap::new(),
107                refs: HashMap::new(),
108                root_hash: None,
109            }),
110        })
111    }
112
113    // === Object operations ===
114
115    /// Check if an object exists
116    pub fn has_object(&self, oid: &ObjectId) -> Result<bool> {
117        let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
118        Ok(state.objects.contains_key(&oid.to_hex()))
119    }
120
121    /// Read an object by ID
122    pub fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
123        let key = oid.to_hex();
124        let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
125        let compressed = state
126            .objects
127            .get(&key)
128            .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
129
130        // Decompress
131        let mut decoder = ZlibDecoder::new(compressed.as_slice());
132        let mut data = Vec::new();
133        decoder.read_to_end(&mut data)?;
134
135        GitObject::from_loose_format(&data)
136    }
137
138    /// Write an object, returning its ID
139    pub fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
140        let oid = obj.id();
141        let key = oid.to_hex();
142
143        // Compress
144        let loose = obj.to_loose_format();
145        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
146        encoder.write_all(&loose)?;
147        let compressed = encoder.finish()?;
148
149        let mut state = self.state.write().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
150        state.objects.insert(key, compressed);
151        state.root_hash = None; // Invalidate cache
152
153        Ok(oid)
154    }
155
156    /// Write a blob, returning its ID
157    pub fn write_blob(&self, content: &[u8]) -> Result<ObjectId> {
158        let obj = GitObject::new(ObjectType::Blob, content.to_vec());
159        self.write_object(&obj)
160    }
161
162    /// Write a tree, returning its ID
163    pub fn write_tree(&self, content: &[u8]) -> Result<ObjectId> {
164        let obj = GitObject::new(ObjectType::Tree, content.to_vec());
165        self.write_object(&obj)
166    }
167
168    /// Write a commit, returning its ID
169    pub fn write_commit(&self, content: &[u8]) -> Result<ObjectId> {
170        let obj = GitObject::new(ObjectType::Commit, content.to_vec());
171        self.write_object(&obj)
172    }
173
174    /// Write a tag, returning its ID
175    pub fn write_tag(&self, content: &[u8]) -> Result<ObjectId> {
176        let obj = GitObject::new(ObjectType::Tag, content.to_vec());
177        self.write_object(&obj)
178    }
179
180    /// Write raw object data (type + content already parsed)
181    pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
182        let obj = GitObject::new(obj_type, content.to_vec());
183        self.write_object(&obj)
184    }
185
186    /// List all object IDs
187    pub fn list_objects(&self) -> Result<Vec<ObjectId>> {
188        let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
189        let mut oids = Vec::new();
190        for key in state.objects.keys() {
191            if let Some(oid) = ObjectId::from_hex(key) {
192                oids.push(oid);
193            }
194        }
195        Ok(oids)
196    }
197
198    // === Ref operations ===
199
200    /// Read a ref
201    pub fn read_ref(&self, name: &str) -> Result<Ref> {
202        let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
203        let value = state
204            .refs
205            .get(name)
206            .ok_or_else(|| Error::RefNotFound(name.into()))?;
207
208        if let Some(target) = value.strip_prefix("ref: ") {
209            Ok(Ref::Symbolic(target.to_string()))
210        } else {
211            let oid = ObjectId::from_hex(value)
212                .ok_or_else(|| Error::InvalidObjectFormat("invalid oid in ref".into()))?;
213            Ok(Ref::Direct(oid))
214        }
215    }
216
217    /// Write a ref
218    pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
219        validate_ref_name(name)?;
220
221        let value = match target {
222            Ref::Direct(oid) => oid.to_hex(),
223            Ref::Symbolic(target) => format!("ref: {}", target),
224        };
225
226        let mut state = self.state.write().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
227        state.refs.insert(name.to_string(), value);
228        state.root_hash = None;
229
230        Ok(())
231    }
232
233    /// Delete a ref
234    pub fn delete_ref(&self, name: &str) -> Result<bool> {
235        let mut state = self.state.write().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
236        let deleted = state.refs.remove(name).is_some();
237        state.root_hash = None;
238        Ok(deleted)
239    }
240
241    /// Resolve a ref to its final object ID (follows symbolic refs)
242    pub fn resolve_ref(&self, name: &str) -> Result<ObjectId> {
243        let mut current = name.to_string();
244        let mut depth = 0;
245        const MAX_DEPTH: usize = 10;
246
247        loop {
248            if depth >= MAX_DEPTH {
249                return Err(Error::RefNotFound(format!(
250                    "symbolic ref loop or too deep: {}",
251                    name
252                )));
253            }
254
255            match self.read_ref(&current)? {
256                Ref::Direct(oid) => return Ok(oid),
257                Ref::Symbolic(target) => {
258                    current = target;
259                    depth += 1;
260                }
261            }
262        }
263    }
264
265    /// List all refs
266    pub fn list_refs(&self) -> Result<Vec<NamedRef>> {
267        let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
268        let mut named_refs = Vec::new();
269
270        for (name, value) in &state.refs {
271            let reference = if let Some(target) = value.strip_prefix("ref: ") {
272                Ref::Symbolic(target.to_string())
273            } else if let Some(oid) = ObjectId::from_hex(value) {
274                Ref::Direct(oid)
275            } else {
276                continue;
277            };
278            named_refs.push(NamedRef::new(name.clone(), reference));
279        }
280
281        Ok(named_refs)
282    }
283
284    /// List refs matching a prefix (e.g., "refs/heads/")
285    pub fn list_refs_with_prefix(&self, prefix: &str) -> Result<Vec<NamedRef>> {
286        let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
287        let mut named_refs = Vec::new();
288
289        for (name, value) in &state.refs {
290            if !name.starts_with(prefix) {
291                continue;
292            }
293            let reference = if let Some(target) = value.strip_prefix("ref: ") {
294                Ref::Symbolic(target.to_string())
295            } else if let Some(oid) = ObjectId::from_hex(value) {
296                Ref::Direct(oid)
297            } else {
298                continue;
299            };
300            named_refs.push(NamedRef::new(name.clone(), reference));
301        }
302
303        Ok(named_refs)
304    }
305
306    /// Update a ref atomically, checking the old value
307    pub fn compare_and_swap_ref(
308        &self,
309        name: &str,
310        expected: Option<&ObjectId>,
311        new_value: Option<&ObjectId>,
312    ) -> Result<bool> {
313        validate_ref_name(name)?;
314
315        let mut state = self.state.write().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
316
317        // Check current value
318        let current = state.refs.get(name);
319        let current_oid = current.and_then(|v| ObjectId::from_hex(v));
320
321        match (expected, current_oid.as_ref()) {
322            (None, None) => {}                         // Creating new ref
323            (Some(exp), Some(cur)) if exp == cur => {} // Expected matches
324            (None, Some(_)) => return Ok(false),       // Expected empty but exists
325            (Some(_), None) => return Ok(false),       // Expected value but empty
326            (Some(_), Some(_)) => return Ok(false),    // Values don't match
327        }
328
329        match new_value {
330            Some(oid) => {
331                state.refs.insert(name.to_string(), oid.to_hex());
332            }
333            None => {
334                state.refs.remove(name);
335            }
336        }
337        state.root_hash = None;
338
339        Ok(true)
340    }
341
342    // === Hashtree operations ===
343
344    /// Build the merkle tree and return root hash (SHA-256)
345    /// Includes .git/ directory and working tree from HEAD
346    /// Also persists all nodes to LMDB
347    pub fn build_tree(&mut self) -> Result<[u8; 32]> {
348        {
349            let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
350            if let Some(hash) = state.root_hash {
351                return Ok(hash);
352            }
353        }
354
355        // Determine HEAD and extract working tree
356        let default_branch = self.determine_default_branch();
357        let working_tree = if let Some(ref branch) = default_branch {
358            if let Ok(commit_oid) = self.resolve_ref(branch) {
359                self.extract_working_tree(&commit_oid).ok()
360            } else {
361                None
362            }
363        } else {
364            None
365        };
366
367        let (objects, refs) = {
368            let state = self.state.read().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
369            (state.objects.clone(), state.refs.clone())
370        };
371        let store = self.store.clone();
372
373        let tree = HashTree::new(HashTreeConfig::new(store.clone()).public());
374
375        let root_hash = self.runtime.block_on(async {
376            // Build .git directory
377            let git_dir_hash = build_git_dir(&tree, &store, &objects, &refs, &default_branch).await?;
378
379            // Build working tree entries
380            let mut root_entries = vec![
381                DirEntry::new(".git", git_dir_hash),
382            ];
383
384            // Add working tree files
385            if let Some(wt) = working_tree {
386                let wt_entries = build_working_tree(&tree, wt).await?;
387                root_entries.extend(wt_entries);
388            }
389
390            let root_cid = tree
391                .put_directory(root_entries)
392                .await
393                .map_err(|e| Error::StorageError(format!("build tree: {}", e)))?;
394
395            Ok::<[u8; 32], Error>(root_cid.hash)
396        })?;
397
398        self.state.write().map_err(|e| Error::StorageError(format!("lock: {}", e)))?.root_hash = Some(root_hash);
399        Ok(root_hash)
400    }
401
402    /// Get root hash as hex string
403    pub fn get_root_hash(&mut self) -> Result<String> {
404        let hash = self.build_tree()?;
405        Ok(hex::encode(hash))
406    }
407
408    /// Get the underlying store
409    pub fn store(&self) -> &Arc<LmdbBlobStore> {
410        &self.store
411    }
412
413    /// Determine the default branch for HEAD
414    /// Priority: master > main > alphabetically first branch
415    pub fn determine_default_branch(&self) -> Option<String> {
416        let state = self.state.read().ok()?;
417
418        let branches: Vec<&String> = state.refs.keys()
419            .filter(|k| k.starts_with("refs/heads/"))
420            .collect();
421
422        if branches.is_empty() {
423            return None;
424        }
425
426        // Check for master first
427        if branches.iter().any(|b| *b == "refs/heads/master") {
428            return Some("refs/heads/master".to_string());
429        }
430
431        // Then main
432        if branches.iter().any(|b| *b == "refs/heads/main") {
433            return Some("refs/heads/main".to_string());
434        }
435
436        // Fall back to alphabetically first
437        let mut sorted: Vec<_> = branches.into_iter().collect();
438        sorted.sort();
439        sorted.first().map(|s| (*s).clone())
440    }
441
442    /// Extract the working tree from a commit
443    /// Returns a list of (path, mode, content) for all files
444    pub fn extract_working_tree(&self, commit_oid: &ObjectId) -> Result<Vec<WorkingTreeEntry>> {
445        let commit_obj = self.read_object(commit_oid)?;
446        if commit_obj.obj_type != ObjectType::Commit {
447            return Err(Error::InvalidObjectFormat("expected commit".into()));
448        }
449
450        // Parse commit to get tree OID
451        let commit_content = String::from_utf8_lossy(&commit_obj.content);
452        let tree_line = commit_content.lines()
453            .find(|l| l.starts_with("tree "))
454            .ok_or_else(|| Error::InvalidObjectFormat("commit missing tree".into()))?;
455
456        let tree_hex = tree_line.strip_prefix("tree ").unwrap().trim();
457        let tree_oid = ObjectId::from_hex(tree_hex)
458            .ok_or_else(|| Error::InvalidObjectFormat("invalid tree oid".into()))?;
459
460        // Recursively extract files from tree
461        let mut entries = Vec::new();
462        self.extract_tree_recursive(&tree_oid, "", &mut entries)?;
463        Ok(entries)
464    }
465
466    /// Recursively extract files from a git tree object
467    fn extract_tree_recursive(
468        &self,
469        tree_oid: &ObjectId,
470        prefix: &str,
471        entries: &mut Vec<WorkingTreeEntry>,
472    ) -> Result<()> {
473        let tree_obj = self.read_object(tree_oid)?;
474        if tree_obj.obj_type != ObjectType::Tree {
475            return Err(Error::InvalidObjectFormat("expected tree".into()));
476        }
477
478        let tree_entries = parse_tree(&tree_obj.content)?;
479
480        for entry in tree_entries {
481            let path = if prefix.is_empty() {
482                entry.name.clone()
483            } else {
484                format!("{}/{}", prefix, entry.name)
485            };
486
487            if entry.is_tree() {
488                // Recurse into subdirectory
489                self.extract_tree_recursive(&entry.oid, &path, entries)?;
490            } else {
491                // Read blob content
492                let blob_obj = self.read_object(&entry.oid)?;
493                entries.push(WorkingTreeEntry {
494                    path,
495                    mode: entry.mode,
496                    content: blob_obj.content,
497                });
498            }
499        }
500
501        Ok(())
502    }
503
504    /// Load from a root hash (fetches tree structure from LMDB store)
505    pub fn load_from_root(&mut self, root_hash: &str) -> Result<()> {
506        let hash_bytes = hex::decode(root_hash)
507            .map_err(|_| Error::StorageError("invalid root hash hex".into()))?;
508
509        if hash_bytes.len() != 32 {
510            return Err(Error::StorageError("root hash must be 32 bytes".into()));
511        }
512
513        let mut root = [0u8; 32];
514        root.copy_from_slice(&hash_bytes);
515
516        let store = self.store.clone();
517
518        // Load into temporary collections
519        let mut objects = HashMap::new();
520        let mut refs = HashMap::new();
521
522        self.runtime.block_on(async {
523            let tree = HashTree::new(HashTreeConfig::new(store).public());
524            load_tree_recursive(&tree, root, &mut objects, &mut refs).await
525        })?;
526
527        // Merge into state
528        let mut state = self.state.write().map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
529        state.objects.extend(objects);
530        state.refs.extend(refs);
531        state.root_hash = Some(root);
532
533        Ok(())
534    }
535}
536
537/// Build objects/ directory in hashtree
538async fn build_objects_dir<S: Store>(
539    tree: &HashTree<S>,
540    store: &Arc<S>,
541    objects: &HashMap<String, Vec<u8>>,
542) -> Result<[u8; 32]> {
543    let mut entries = Vec::new();
544
545    for (sha1, compressed) in objects {
546        let hash = tree
547            .put_blob(compressed)
548            .await
549            .map_err(|e| Error::StorageError(format!("put blob: {}", e)))?;
550        entries.push(DirEntry::new(sha1.clone(), hash).with_size(compressed.len() as u64));
551    }
552
553    if entries.is_empty() {
554        let hash = sha256(b"");
555        store
556            .put(hash, vec![])
557            .await
558            .map_err(|e| Error::StorageError(format!("put empty: {}", e)))?;
559        return Ok(hash);
560    }
561
562    tree
563        .put_directory(entries)
564        .await
565        .map(|cid| cid.hash)
566        .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))
567}
568
569/// Build refs/ directory in hashtree
570async fn build_refs_dir<S: Store>(
571    tree: &HashTree<S>,
572    store: &Arc<S>,
573    refs: &HashMap<String, String>,
574) -> Result<[u8; 32]> {
575    // Group refs by category (heads, tags, etc.)
576    let mut groups: HashMap<String, Vec<(String, String)>> = HashMap::new();
577
578    for (ref_name, value) in refs {
579        let parts: Vec<&str> = ref_name.split('/').collect();
580        if parts.len() >= 3 && parts[0] == "refs" {
581            let category = parts[1].to_string();
582            let name = parts[2..].join("/");
583            groups
584                .entry(category)
585                .or_default()
586                .push((name, value.clone()));
587        } else if ref_name == "HEAD" {
588            groups
589                .entry("HEAD".to_string())
590                .or_default()
591                .push(("".to_string(), value.clone()));
592        }
593    }
594
595    let mut ref_entries = Vec::new();
596
597    for (category, refs_in_category) in groups {
598        if category == "HEAD" {
599            if let Some((_, value)) = refs_in_category.first() {
600                let hash = tree
601                    .put_blob(value.as_bytes())
602                    .await
603                    .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
604                ref_entries.push(DirEntry::new("HEAD", hash).with_size(value.len() as u64));
605            }
606        } else {
607            let mut cat_entries = Vec::new();
608            for (name, value) in refs_in_category {
609                let hash = tree
610                    .put_blob(value.as_bytes())
611                    .await
612                    .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
613                cat_entries.push(DirEntry::new(name, hash).with_size(value.len() as u64));
614            }
615            let cat_cid = tree
616                .put_directory(cat_entries)
617                .await
618                .map_err(|e| Error::StorageError(format!("put {} dir: {}", category, e)))?;
619            ref_entries.push(DirEntry::new(category, cat_cid.hash));
620        }
621    }
622
623    if ref_entries.is_empty() {
624        let hash = sha256(b"");
625        store
626            .put(hash, vec![])
627            .await
628            .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
629        return Ok(hash);
630    }
631
632    tree
633        .put_directory(ref_entries)
634        .await
635        .map(|cid| cid.hash)
636        .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))
637}
638
639/// Build .git/ directory in hashtree (objects, refs, HEAD)
640async fn build_git_dir<S: Store>(
641    tree: &HashTree<S>,
642    store: &Arc<S>,
643    objects: &HashMap<String, Vec<u8>>,
644    refs: &HashMap<String, String>,
645    default_branch: &Option<String>,
646) -> Result<[u8; 32]> {
647    let objects_hash = build_objects_dir(tree, store, objects).await?;
648    let refs_hash = build_refs_dir(tree, store, refs).await?;
649
650    let mut git_entries = vec![
651        DirEntry::new("objects", objects_hash),
652        DirEntry::new("refs", refs_hash),
653    ];
654
655    // Add HEAD pointing to default branch
656    if let Some(branch) = default_branch {
657        let head_content = format!("ref: {}", branch);
658        let head_hash = tree
659            .put_blob(head_content.as_bytes())
660            .await
661            .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
662        git_entries.push(DirEntry::new("HEAD", head_hash).with_size(head_content.len() as u64));
663    }
664
665    tree
666        .put_directory(git_entries)
667        .await
668        .map(|cid| cid.hash)
669        .map_err(|e| Error::StorageError(format!("put .git dir: {}", e)))
670}
671
672/// Build working tree entries from extracted files
673/// Returns DirEntry items for the root directory
674async fn build_working_tree<S: Store>(
675    tree: &HashTree<S>,
676    entries: Vec<WorkingTreeEntry>,
677) -> Result<Vec<DirEntry>> {
678    // Group entries by top-level directory
679    let mut dirs: HashMap<String, Vec<WorkingTreeEntry>> = HashMap::new();
680    let mut root_files: Vec<WorkingTreeEntry> = Vec::new();
681
682    for entry in entries {
683        if let Some(slash_pos) = entry.path.find('/') {
684            let top_dir = entry.path[..slash_pos].to_string();
685            let rest = entry.path[slash_pos + 1..].to_string();
686            dirs.entry(top_dir).or_default().push(WorkingTreeEntry {
687                path: rest,
688                mode: entry.mode,
689                content: entry.content,
690            });
691        } else {
692            root_files.push(entry);
693        }
694    }
695
696    let mut result = Vec::new();
697
698    // Add root-level files
699    for file in root_files {
700        let hash = tree
701            .put_blob(&file.content)
702            .await
703            .map_err(|e| Error::StorageError(format!("put file {}: {}", file.path, e)))?;
704        result.push(DirEntry::new(file.path, hash).with_size(file.content.len() as u64));
705    }
706
707    // Recursively build subdirectories
708    for (dir_name, sub_entries) in dirs {
709        let sub_dir_entries = build_working_tree_recursive(tree, sub_entries).await?;
710        let dir_cid = tree
711            .put_directory(sub_dir_entries)
712            .await
713            .map_err(|e| Error::StorageError(format!("put dir {}: {}", dir_name, e)))?;
714        result.push(DirEntry::new(dir_name, dir_cid.hash));
715    }
716
717    Ok(result)
718}
719
720/// Recursively build a subdirectory's entries
721async fn build_working_tree_recursive<S: Store>(
722    tree: &HashTree<S>,
723    entries: Vec<WorkingTreeEntry>,
724) -> Result<Vec<DirEntry>> {
725    let mut dirs: HashMap<String, Vec<WorkingTreeEntry>> = HashMap::new();
726    let mut files: Vec<WorkingTreeEntry> = Vec::new();
727
728    for entry in entries {
729        if let Some(slash_pos) = entry.path.find('/') {
730            let top_dir = entry.path[..slash_pos].to_string();
731            let rest = entry.path[slash_pos + 1..].to_string();
732            dirs.entry(top_dir).or_default().push(WorkingTreeEntry {
733                path: rest,
734                mode: entry.mode,
735                content: entry.content,
736            });
737        } else {
738            files.push(entry);
739        }
740    }
741
742    let mut result = Vec::new();
743
744    // Add files
745    for file in files {
746        let hash = tree
747            .put_blob(&file.content)
748            .await
749            .map_err(|e| Error::StorageError(format!("put file {}: {}", file.path, e)))?;
750        result.push(DirEntry::new(file.path, hash).with_size(file.content.len() as u64));
751    }
752
753    // Recurse into subdirectories
754    for (dir_name, sub_entries) in dirs {
755        let sub_dir_entries = Box::pin(build_working_tree_recursive(tree, sub_entries)).await?;
756        let dir_cid = tree
757            .put_directory(sub_dir_entries)
758            .await
759            .map_err(|e| Error::StorageError(format!("put dir {}: {}", dir_name, e)))?;
760        result.push(DirEntry::new(dir_name, dir_cid.hash));
761    }
762
763    Ok(result)
764}
765
766/// Recursively load tree from hashtree using HashTree walk
767/// Supports both old format (objects/, refs/) and new format (.git/objects/, .git/refs/)
768async fn load_tree_recursive<S: Store>(
769    tree: &HashTree<S>,
770    root: [u8; 32],
771    objects: &mut HashMap<String, Vec<u8>>,
772    refs: &mut HashMap<String, String>,
773) -> Result<()> {
774    // Walk the entire tree (public mode - no encryption key)
775    let root_cid = Cid::public(root, 0);
776    let entries = tree
777        .walk(&root_cid, "")
778        .await
779        .map_err(|e| Error::StorageError(format!("walk tree: {}", e)))?;
780
781    for entry in entries {
782        // Skip directory entries, only process files
783        if entry.link_type.is_tree() {
784            continue;
785        }
786
787        // Read the file content
788        let data = tree
789            .read_file(&entry.hash)
790            .await
791            .map_err(|e| Error::StorageError(format!("read file: {}", e)))?
792            .ok_or_else(|| Error::StorageError("file not found".into()))?;
793
794        // Determine if this is an object or ref based on path
795        // Support both .git/ prefix (new) and no prefix (old format)
796        let path = &entry.path;
797
798        if let Some(rest) = path.strip_prefix(".git/objects/") {
799            objects.insert(rest.to_string(), data);
800        } else if let Some(rest) = path.strip_prefix(".git/refs/") {
801            refs.insert(format!("refs/{}", rest), String::from_utf8_lossy(&data).to_string());
802        } else if path == ".git/HEAD" {
803            refs.insert("HEAD".to_string(), String::from_utf8_lossy(&data).to_string());
804        } else if let Some(rest) = path.strip_prefix("objects/") {
805            // Old format compatibility
806            objects.insert(rest.to_string(), data);
807        } else if let Some(rest) = path.strip_prefix("refs/") {
808            // Old format compatibility
809            refs.insert(format!("refs/{}", rest), String::from_utf8_lossy(&data).to_string());
810        } else if path == "HEAD" {
811            // Old format compatibility
812            refs.insert("HEAD".to_string(), String::from_utf8_lossy(&data).to_string());
813        }
814        // Skip working tree files (not in .git/)
815    }
816
817    Ok(())
818}
819
820#[cfg(test)]
821mod tests {
822    use super::*;
823    use tempfile::tempdir;
824
825    #[test]
826    fn test_object_roundtrip() {
827        let dir = tempdir().unwrap();
828        let storage = GitStorage::open(dir.path().join("git")).unwrap();
829
830        let content = b"hello world\n";
831        let oid = storage.write_blob(content).unwrap();
832
833        // Known hash for "hello world\n"
834        assert_eq!(oid.to_hex(), "3b18e512dba79e4c8300dd08aeb37f8e728b8dad");
835
836        let obj = storage.read_object(&oid).unwrap();
837        assert_eq!(obj.content, content);
838    }
839
840    #[test]
841    fn test_ref_operations() {
842        let dir = tempdir().unwrap();
843        let storage = GitStorage::open(dir.path().join("git")).unwrap();
844
845        let oid = storage.write_blob(b"test").unwrap();
846
847        // Write direct ref
848        storage
849            .write_ref("refs/heads/main", &Ref::Direct(oid))
850            .unwrap();
851
852        // Read it back
853        let resolved = storage.resolve_ref("refs/heads/main").unwrap();
854        assert_eq!(resolved, oid);
855
856        // Write symbolic ref
857        storage
858            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".into()))
859            .unwrap();
860
861        // Resolve through symbolic
862        let head_resolved = storage.resolve_ref("HEAD").unwrap();
863        assert_eq!(head_resolved, oid);
864
865        // List refs
866        let refs = storage.list_refs().unwrap();
867        assert_eq!(refs.len(), 2);
868    }
869
870    #[test]
871    fn test_has_object() {
872        let dir = tempdir().unwrap();
873        let storage = GitStorage::open(dir.path().join("git")).unwrap();
874
875        let oid = storage.write_blob(b"test").unwrap();
876        assert!(storage.has_object(&oid).unwrap());
877
878        let fake_oid = ObjectId::from_hex("0000000000000000000000000000000000000000").unwrap();
879        assert!(!storage.has_object(&fake_oid).unwrap());
880    }
881
882    #[test]
883    fn test_build_tree_and_persist() {
884        let dir = tempdir().unwrap();
885        let mut storage = GitStorage::open(dir.path().join("git")).unwrap();
886
887        // Add some data
888        storage.write_blob(b"hello").unwrap();
889        storage
890            .write_ref(
891                "refs/heads/main",
892                &Ref::Direct(
893                    ObjectId::from_hex("abc123def456abc123def456abc123def456abc1").unwrap(),
894                ),
895            )
896            .unwrap();
897
898        // Build tree (persists to LMDB)
899        let root_hash = storage.build_tree().unwrap();
900        assert_eq!(root_hash.len(), 32);
901
902        // Verify data is in LMDB
903        let stats = storage.store().stats().unwrap();
904        assert!(stats.count > 0, "should have stored hashtree nodes in LMDB");
905
906        // Get hex
907        let hex = storage.get_root_hash().unwrap();
908        assert_eq!(hex.len(), 64);
909    }
910
911    #[test]
912    fn test_load_from_root() {
913        let dir = tempdir().unwrap();
914
915        // Create and populate storage
916        let root_hex = {
917            let mut storage = GitStorage::open(dir.path().join("git")).unwrap();
918            storage.write_blob(b"test content").unwrap();
919            storage
920                .write_ref(
921                    "refs/heads/main",
922                    &Ref::Direct(
923                        ObjectId::from_hex("abc123def456abc123def456abc123def456abc1").unwrap(),
924                    ),
925                )
926                .unwrap();
927            storage.get_root_hash().unwrap()
928        };
929
930        // Load from root in new storage instance
931        let mut storage2 = GitStorage::open(dir.path().join("git")).unwrap();
932        storage2.load_from_root(&root_hex).unwrap();
933
934        // Verify refs loaded (now includes HEAD pointing to default branch)
935        let refs = storage2.list_refs().unwrap();
936        // Should have refs/heads/main and HEAD
937        assert!(refs.iter().any(|r| r.name == "refs/heads/main"));
938    }
939
940    #[test]
941    fn test_determine_default_branch_master_first() {
942        let dir = tempdir().unwrap();
943        let storage = GitStorage::open(dir.path().join("git")).unwrap();
944
945        let oid = ObjectId::from_hex("abc123def456abc123def456abc123def456abc1").unwrap();
946
947        // Add both main and master
948        storage.write_ref("refs/heads/main", &Ref::Direct(oid)).unwrap();
949        storage.write_ref("refs/heads/master", &Ref::Direct(oid)).unwrap();
950        storage.write_ref("refs/heads/develop", &Ref::Direct(oid)).unwrap();
951
952        // master should win
953        assert_eq!(
954            storage.determine_default_branch(),
955            Some("refs/heads/master".to_string())
956        );
957    }
958
959    #[test]
960    fn test_determine_default_branch_main_second() {
961        let dir = tempdir().unwrap();
962        let storage = GitStorage::open(dir.path().join("git")).unwrap();
963
964        let oid = ObjectId::from_hex("abc123def456abc123def456abc123def456abc1").unwrap();
965
966        // Add main and others, but not master
967        storage.write_ref("refs/heads/main", &Ref::Direct(oid)).unwrap();
968        storage.write_ref("refs/heads/develop", &Ref::Direct(oid)).unwrap();
969
970        // main should win
971        assert_eq!(
972            storage.determine_default_branch(),
973            Some("refs/heads/main".to_string())
974        );
975    }
976
977    #[test]
978    fn test_determine_default_branch_alphabetical() {
979        let dir = tempdir().unwrap();
980        let storage = GitStorage::open(dir.path().join("git")).unwrap();
981
982        let oid = ObjectId::from_hex("abc123def456abc123def456abc123def456abc1").unwrap();
983
984        // No main or master
985        storage.write_ref("refs/heads/develop", &Ref::Direct(oid)).unwrap();
986        storage.write_ref("refs/heads/feature", &Ref::Direct(oid)).unwrap();
987
988        // Alphabetically first should win
989        assert_eq!(
990            storage.determine_default_branch(),
991            Some("refs/heads/develop".to_string())
992        );
993    }
994
995    #[test]
996    fn test_determine_default_branch_empty() {
997        let dir = tempdir().unwrap();
998        let storage = GitStorage::open(dir.path().join("git")).unwrap();
999
1000        // No branches
1001        assert_eq!(storage.determine_default_branch(), None);
1002    }
1003
1004    #[test]
1005    fn test_working_tree_extraction() {
1006        use crate::object::serialize_tree;
1007
1008        let dir = tempdir().unwrap();
1009        let mut storage = GitStorage::open(dir.path().join("git")).unwrap();
1010
1011        // Create a simple repo structure:
1012        // README.md
1013        // src/main.rs
1014
1015        // 1. Create blobs
1016        let readme_content = b"# Test Repo\n\nThis is a test.";
1017        let main_rs_content = b"fn main() {\n    println!(\"Hello\");\n}";
1018
1019        let readme_oid = storage.write_blob(readme_content).unwrap();
1020        let main_rs_oid = storage.write_blob(main_rs_content).unwrap();
1021
1022        // 2. Create src/ tree
1023        let src_tree_content = serialize_tree(&[
1024            crate::object::TreeEntry::new(0o100644, "main.rs".to_string(), main_rs_oid),
1025        ]);
1026        let src_tree_oid = storage.write_tree(&src_tree_content).unwrap();
1027
1028        // 3. Create root tree
1029        let root_tree_content = serialize_tree(&[
1030            crate::object::TreeEntry::new(0o100644, "README.md".to_string(), readme_oid),
1031            crate::object::TreeEntry::new(0o40000, "src".to_string(), src_tree_oid),
1032        ]);
1033        let root_tree_oid = storage.write_tree(&root_tree_content).unwrap();
1034
1035        // 4. Create commit
1036        let commit_content = format!(
1037            "tree {}\nauthor Test <test@test.com> 1234567890 +0000\ncommitter Test <test@test.com> 1234567890 +0000\n\nInitial commit\n",
1038            root_tree_oid.to_hex()
1039        );
1040        let commit_oid = storage.write_commit(commit_content.as_bytes()).unwrap();
1041
1042        // 5. Create ref
1043        storage.write_ref("refs/heads/main", &Ref::Direct(commit_oid)).unwrap();
1044
1045        // 6. Extract working tree
1046        let working_tree = storage.extract_working_tree(&commit_oid).unwrap();
1047
1048        // Verify we got both files
1049        assert_eq!(working_tree.len(), 2);
1050
1051        let readme = working_tree.iter().find(|e| e.path == "README.md").unwrap();
1052        assert_eq!(readme.content, readme_content);
1053        assert_eq!(readme.mode, 0o100644);
1054
1055        let main_rs = working_tree.iter().find(|e| e.path == "src/main.rs").unwrap();
1056        assert_eq!(main_rs.content, main_rs_content);
1057    }
1058
1059    #[test]
1060    fn test_build_tree_with_working_tree() {
1061        use crate::object::serialize_tree;
1062
1063        let dir = tempdir().unwrap();
1064        let mut storage = GitStorage::open(dir.path().join("git")).unwrap();
1065
1066        // Create a repo with files
1067        let readme_content = b"# Hello World";
1068        let readme_oid = storage.write_blob(readme_content).unwrap();
1069
1070        let root_tree_content = serialize_tree(&[
1071            crate::object::TreeEntry::new(0o100644, "README.md".to_string(), readme_oid),
1072        ]);
1073        let root_tree_oid = storage.write_tree(&root_tree_content).unwrap();
1074
1075        let commit_content = format!(
1076            "tree {}\nauthor Test <test@test.com> 1234567890 +0000\ncommitter Test <test@test.com> 1234567890 +0000\n\nTest\n",
1077            root_tree_oid.to_hex()
1078        );
1079        let commit_oid = storage.write_commit(commit_content.as_bytes()).unwrap();
1080
1081        storage.write_ref("refs/heads/main", &Ref::Direct(commit_oid)).unwrap();
1082
1083        // Build the tree
1084        let root_hash = storage.build_tree().unwrap();
1085        assert_eq!(root_hash.len(), 32);
1086
1087        // Verify we can load it back
1088        let root_hex = hex::encode(root_hash);
1089
1090        // Create new storage and load
1091        let mut storage2 = GitStorage::open(dir.path().join("git")).unwrap();
1092        storage2.load_from_root(&root_hex).unwrap();
1093
1094        // Should have the ref
1095        let refs = storage2.list_refs().unwrap();
1096        assert!(refs.iter().any(|r| r.name == "refs/heads/main"));
1097
1098        // Should be able to read the blob
1099        assert!(storage2.has_object(&readme_oid).unwrap());
1100    }
1101
1102    #[test]
1103    fn test_full_push_simulation() {
1104        use crate::object::serialize_tree;
1105
1106        let dir = tempdir().unwrap();
1107        let mut storage = GitStorage::open(dir.path().join("git")).unwrap();
1108
1109        // Simulate what git-remote-htree does on push:
1110        // 1. Receive objects from git
1111        // 2. Store them
1112        // 3. Update refs
1113        // 4. Build tree (which now includes working tree)
1114
1115        // Create files
1116        let files = vec![
1117            ("README.md", b"# My Project\n".as_slice()),
1118            ("Cargo.toml", b"[package]\nname = \"test\"\n".as_slice()),
1119            ("src/lib.rs", b"pub fn hello() {}\n".as_slice()),
1120            ("src/main.rs", b"fn main() { hello(); }\n".as_slice()),
1121        ];
1122
1123        // Store blobs
1124        let mut blob_oids = Vec::new();
1125        for (_, content) in &files {
1126            blob_oids.push(storage.write_blob(content).unwrap());
1127        }
1128
1129        // Create src/ tree
1130        let src_tree = serialize_tree(&[
1131            crate::object::TreeEntry::new(0o100644, "lib.rs".to_string(), blob_oids[2]),
1132            crate::object::TreeEntry::new(0o100644, "main.rs".to_string(), blob_oids[3]),
1133        ]);
1134        let src_tree_oid = storage.write_tree(&src_tree).unwrap();
1135
1136        // Create root tree
1137        let root_tree = serialize_tree(&[
1138            crate::object::TreeEntry::new(0o100644, "Cargo.toml".to_string(), blob_oids[1]),
1139            crate::object::TreeEntry::new(0o100644, "README.md".to_string(), blob_oids[0]),
1140            crate::object::TreeEntry::new(0o40000, "src".to_string(), src_tree_oid),
1141        ]);
1142        let root_tree_oid = storage.write_tree(&root_tree).unwrap();
1143
1144        // Create commit
1145        let commit = format!(
1146            "tree {}\nauthor Dev <dev@example.com> 1700000000 +0000\ncommitter Dev <dev@example.com> 1700000000 +0000\n\nAdd project files\n",
1147            root_tree_oid.to_hex()
1148        );
1149        let commit_oid = storage.write_commit(commit.as_bytes()).unwrap();
1150
1151        // Update ref (this is what push does)
1152        storage.write_ref("refs/heads/main", &Ref::Direct(commit_oid)).unwrap();
1153
1154        // Build the hashtree (this should now include working tree)
1155        let root_hash = storage.get_root_hash().unwrap();
1156        println!("Root hash: {}", root_hash);
1157
1158        // Verify: load and check structure
1159        let store = storage.store().clone();
1160        let tree = HashTree::new(HashTreeConfig::new(store).public());
1161
1162        let root_bytes: [u8; 32] = hex::decode(&root_hash).unwrap().try_into().unwrap();
1163        let root_cid = Cid::public(root_bytes, 0);
1164
1165        // Walk the tree and collect paths
1166        let rt = tokio::runtime::Runtime::new().unwrap();
1167        let entries = rt.block_on(async {
1168            tree.walk(&root_cid, "").await.unwrap()
1169        });
1170
1171        let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1172        println!("Paths in tree: {:?}", paths);
1173
1174        // Should have .git directory
1175        assert!(paths.iter().any(|p| p.starts_with(".git/")), "Missing .git/ directory");
1176
1177        // Should have .git/objects
1178        assert!(paths.iter().any(|p| p.starts_with(".git/objects/")), "Missing .git/objects/");
1179
1180        // Should have .git/refs
1181        assert!(paths.iter().any(|p| p.starts_with(".git/refs/")), "Missing .git/refs/");
1182
1183        // Should have .git/HEAD
1184        assert!(paths.iter().any(|p| p == ".git/HEAD"), "Missing .git/HEAD");
1185
1186        // Should have working tree files at root
1187        assert!(paths.iter().any(|p| p == "README.md"), "Missing README.md in working tree");
1188        assert!(paths.iter().any(|p| p == "Cargo.toml"), "Missing Cargo.toml in working tree");
1189        assert!(paths.iter().any(|p| p == "src/lib.rs"), "Missing src/lib.rs in working tree");
1190        assert!(paths.iter().any(|p| p == "src/main.rs"), "Missing src/main.rs in working tree");
1191
1192        println!("All checks passed!");
1193    }
1194}