Skip to main content

git_remote_htree/git/
storage.rs

1//! Hashtree-backed git object and ref storage with configurable persistence
2//!
3//! Stores git objects and refs in a hashtree merkle tree:
4//!   root/
5//!     .git/
6//!       HEAD -> "ref: refs/heads/main"
7//!       refs/heads/main -> <commit-sha1>
8//!       info/refs -> dumb-HTTP ref advertisement
9//!       objects/XX/YYYY... -> zlib-compressed loose object (standard git layout)
10//!       objects/info/packs -> dumb-HTTP pack advertisement
11//!
12//! The root hash (SHA-256) is the content-addressed identifier for the entire repo state.
13
14use flate2::read::ZlibDecoder;
15use flate2::write::ZlibEncoder;
16use flate2::Compression;
17use hashtree_config::{Config, StorageBackend};
18use hashtree_core::store::{Store, StoreError, StoreStats};
19use hashtree_core::types::Hash;
20use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
21use hashtree_fs::FsBlobStore;
22#[cfg(feature = "lmdb")]
23use hashtree_lmdb::LmdbBlobStore;
24use sha1::{Digest, Sha1};
25use std::collections::{BTreeMap, HashMap};
26use std::io::{Read, Write};
27use std::path::Path;
28use std::sync::Arc;
29use tokio::runtime::{Handle, Runtime};
30use tracing::{debug, info};
31
32use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
33use super::refs::{validate_ref_name, Ref};
34use super::{Error, Result};
35
36/// Box type for async recursion
37type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
38
39#[derive(Default)]
40struct RefDirectory {
41    files: BTreeMap<String, String>,
42    dirs: BTreeMap<String, RefDirectory>,
43}
44
45impl RefDirectory {
46    fn insert(&mut self, parts: &[&str], value: String) {
47        let Some((name, rest)) = parts.split_first() else {
48            return;
49        };
50
51        if rest.is_empty() {
52            self.files.insert((*name).to_string(), value);
53        } else {
54            self.dirs
55                .entry((*name).to_string())
56                .or_default()
57                .insert(rest, value);
58        }
59    }
60}
61
62/// Runtime executor - either owns a runtime or reuses an existing one
63enum RuntimeExecutor {
64    Owned(Runtime),
65    Handle(Handle),
66}
67
68impl RuntimeExecutor {
69    fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
70        match self {
71            RuntimeExecutor::Owned(rt) => rt.block_on(f),
72            RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
73        }
74    }
75}
76
77/// Local blob store - wraps either FsBlobStore or LmdbBlobStore
78pub enum LocalStore {
79    Fs(FsBlobStore),
80    #[cfg(feature = "lmdb")]
81    Lmdb(LmdbBlobStore),
82}
83
84impl LocalStore {
85    fn new_for_backend<P: AsRef<Path>>(
86        path: P,
87        backend: StorageBackend,
88        max_bytes: u64,
89    ) -> std::result::Result<Self, StoreError> {
90        match backend {
91            StorageBackend::Fs => {
92                if max_bytes > 0 {
93                    Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
94                        path, max_bytes,
95                    )?))
96                } else {
97                    Ok(LocalStore::Fs(FsBlobStore::new(path)?))
98                }
99            }
100            #[cfg(feature = "lmdb")]
101            StorageBackend::Lmdb => {
102                if max_bytes > 0 {
103                    Ok(LocalStore::Lmdb(LmdbBlobStore::with_max_bytes(
104                        path, max_bytes,
105                    )?))
106                } else {
107                    Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?))
108                }
109            }
110            #[cfg(not(feature = "lmdb"))]
111            StorageBackend::Lmdb => {
112                warn!(
113                    "LMDB backend requested but lmdb feature not enabled, using filesystem storage"
114                );
115                if max_bytes > 0 {
116                    Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
117                        path, max_bytes,
118                    )?))
119                } else {
120                    Ok(LocalStore::Fs(FsBlobStore::new(path)?))
121                }
122            }
123        }
124    }
125
126    /// Create a new local store based on config
127    pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
128        Self::new_with_max_bytes(path, 0)
129    }
130
131    /// Create a new local store based on config with an optional byte limit.
132    pub fn new_with_max_bytes<P: AsRef<Path>>(
133        path: P,
134        max_bytes: u64,
135    ) -> std::result::Result<Self, StoreError> {
136        let config = Config::load_or_default();
137        Self::new_for_backend(path, config.storage.backend, max_bytes)
138    }
139
140    /// List all hashes in the store
141    pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
142        match self {
143            LocalStore::Fs(store) => store.list(),
144            #[cfg(feature = "lmdb")]
145            LocalStore::Lmdb(store) => store.list(),
146        }
147    }
148
149    /// Sync get operation
150    pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
151        match self {
152            LocalStore::Fs(store) => store.get_sync(hash),
153            #[cfg(feature = "lmdb")]
154            LocalStore::Lmdb(store) => store.get_sync(hash),
155        }
156    }
157}
158
159#[async_trait::async_trait]
160impl Store for LocalStore {
161    async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
162        match self {
163            LocalStore::Fs(store) => store.put(hash, data).await,
164            #[cfg(feature = "lmdb")]
165            LocalStore::Lmdb(store) => store.put(hash, data).await,
166        }
167    }
168
169    async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
170        match self {
171            LocalStore::Fs(store) => store.get(hash).await,
172            #[cfg(feature = "lmdb")]
173            LocalStore::Lmdb(store) => store.get(hash).await,
174        }
175    }
176
177    async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
178        match self {
179            LocalStore::Fs(store) => store.has(hash).await,
180            #[cfg(feature = "lmdb")]
181            LocalStore::Lmdb(store) => store.has(hash).await,
182        }
183    }
184
185    async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
186        match self {
187            LocalStore::Fs(store) => store.delete(hash).await,
188            #[cfg(feature = "lmdb")]
189            LocalStore::Lmdb(store) => store.delete(hash).await,
190        }
191    }
192
193    fn set_max_bytes(&self, max: u64) {
194        match self {
195            LocalStore::Fs(store) => store.set_max_bytes(max),
196            #[cfg(feature = "lmdb")]
197            LocalStore::Lmdb(store) => store.set_max_bytes(max),
198        }
199    }
200
201    fn max_bytes(&self) -> Option<u64> {
202        match self {
203            LocalStore::Fs(store) => store.max_bytes(),
204            #[cfg(feature = "lmdb")]
205            LocalStore::Lmdb(store) => store.max_bytes(),
206        }
207    }
208
209    async fn stats(&self) -> StoreStats {
210        match self {
211            LocalStore::Fs(store) => match store.stats() {
212                Ok(stats) => StoreStats {
213                    count: stats.count as u64,
214                    bytes: stats.total_bytes,
215                    pinned_count: stats.pinned_count as u64,
216                    pinned_bytes: stats.pinned_bytes,
217                },
218                Err(_) => StoreStats::default(),
219            },
220            #[cfg(feature = "lmdb")]
221            LocalStore::Lmdb(store) => match store.stats() {
222                Ok(stats) => StoreStats {
223                    count: stats.count as u64,
224                    bytes: stats.total_bytes,
225                    pinned_count: 0,
226                    pinned_bytes: 0,
227                },
228                Err(_) => StoreStats::default(),
229            },
230        }
231    }
232
233    async fn evict_if_needed(&self) -> std::result::Result<u64, StoreError> {
234        match self {
235            LocalStore::Fs(store) => store.evict_if_needed().await,
236            #[cfg(feature = "lmdb")]
237            LocalStore::Lmdb(store) => store.evict_if_needed().await,
238        }
239    }
240}
241
242/// Git storage backed by HashTree with configurable persistence
243pub struct GitStorage {
244    store: Arc<LocalStore>,
245    tree: HashTree<LocalStore>,
246    runtime: RuntimeExecutor,
247    /// In-memory state for the current session
248    objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
249    refs: std::sync::RwLock<HashMap<String, String>>,
250    /// Cached root CID (hash + encryption key)
251    root_cid: std::sync::RwLock<Option<Cid>>,
252}
253
254impl GitStorage {
255    /// Open or create a git storage at the given path
256    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
257        let config = Config::load_or_default();
258        let max_size_bytes = config
259            .storage
260            .max_size_gb
261            .saturating_mul(1024 * 1024 * 1024);
262        Self::open_with_max_bytes(path, max_size_bytes)
263    }
264
265    /// Open or create a git storage at the given path with an explicit byte limit.
266    pub fn open_with_max_bytes(path: impl AsRef<Path>, max_size_bytes: u64) -> Result<Self> {
267        let config = Config::load_or_default();
268        Self::open_with_backend_and_max_bytes(path, config.storage.backend, max_size_bytes)
269    }
270
271    pub fn open_with_backend_and_max_bytes(
272        path: impl AsRef<Path>,
273        backend: StorageBackend,
274        max_size_bytes: u64,
275    ) -> Result<Self> {
276        let runtime = match Handle::try_current() {
277            Ok(handle) => RuntimeExecutor::Handle(handle),
278            Err(_) => {
279                let rt = Runtime::new()
280                    .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
281                RuntimeExecutor::Owned(rt)
282            }
283        };
284
285        let store_path = path.as_ref().join("blobs");
286        let store = Arc::new(
287            LocalStore::new_for_backend(&store_path, backend, max_size_bytes)
288                .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
289        );
290
291        // Use encrypted mode (default) - blossom servers require encrypted data
292        let tree = HashTree::new(HashTreeConfig::new(store.clone()));
293
294        Ok(Self {
295            store,
296            tree,
297            runtime,
298            objects: std::sync::RwLock::new(HashMap::new()),
299            refs: std::sync::RwLock::new(HashMap::new()),
300            root_cid: std::sync::RwLock::new(None),
301        })
302    }
303
304    /// Evict old local blobs if storage is over the configured limit.
305    pub fn evict_if_needed(&self) -> Result<u64> {
306        self.runtime
307            .block_on(self.store.evict_if_needed())
308            .map_err(|e| Error::StorageError(format!("evict: {}", e)))
309    }
310
311    /// Write an object, returning its ID
312    fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
313        let oid = obj.id();
314        let key = oid.to_hex();
315
316        let loose = obj.to_loose_format();
317        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
318        encoder.write_all(&loose)?;
319        let compressed = encoder.finish()?;
320
321        let mut objects = self
322            .objects
323            .write()
324            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
325        objects.insert(key, compressed);
326
327        // Invalidate cached root
328        if let Ok(mut root) = self.root_cid.write() {
329            *root = None;
330        }
331
332        Ok(oid)
333    }
334
335    /// Write raw object data (type + content already parsed)
336    pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
337        let obj = GitObject::new(obj_type, content.to_vec());
338        self.write_object(&obj)
339    }
340
341    /// Read an object by ID from in-memory cache
342    #[allow(dead_code)]
343    fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
344        let key = oid.to_hex();
345        let objects = self
346            .objects
347            .read()
348            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
349        let compressed = objects
350            .get(&key)
351            .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
352
353        let mut decoder = ZlibDecoder::new(compressed.as_slice());
354        let mut data = Vec::new();
355        decoder.read_to_end(&mut data)?;
356
357        GitObject::from_loose_format(&data)
358    }
359
360    /// Write a ref
361    pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
362        validate_ref_name(name)?;
363
364        let value = match target {
365            Ref::Direct(oid) => oid.to_hex(),
366            Ref::Symbolic(target) => format!("ref: {}", target),
367        };
368
369        let mut refs = self
370            .refs
371            .write()
372            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
373        refs.insert(name.to_string(), value);
374
375        // Invalidate cached root
376        if let Ok(mut root) = self.root_cid.write() {
377            *root = None;
378        }
379
380        Ok(())
381    }
382
383    /// Read a ref
384    #[allow(dead_code)]
385    pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
386        let refs = self
387            .refs
388            .read()
389            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
390
391        match refs.get(name) {
392            Some(value) => {
393                if let Some(target) = value.strip_prefix("ref: ") {
394                    Ok(Some(Ref::Symbolic(target.to_string())))
395                } else {
396                    let oid = ObjectId::from_hex(value)
397                        .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
398                    Ok(Some(Ref::Direct(oid)))
399                }
400            }
401            None => Ok(None),
402        }
403    }
404
405    /// List all refs
406    #[allow(dead_code)]
407    pub fn list_refs(&self) -> Result<HashMap<String, String>> {
408        let refs = self
409            .refs
410            .read()
411            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
412        Ok(refs.clone())
413    }
414
415    /// Delete a ref
416    pub fn delete_ref(&self, name: &str) -> Result<bool> {
417        let mut refs = self
418            .refs
419            .write()
420            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
421        let existed = refs.remove(name).is_some();
422
423        // Invalidate cached root
424        if let Ok(mut root) = self.root_cid.write() {
425            *root = None;
426        }
427
428        Ok(existed)
429    }
430
431    /// Import a raw git object (already in loose format, zlib compressed)
432    /// Used when fetching existing objects from remote before push
433    pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
434        let mut objects = self
435            .objects
436            .write()
437            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
438        objects.insert(oid.to_string(), compressed_data);
439
440        // Invalidate cached root
441        if let Ok(mut root) = self.root_cid.write() {
442            *root = None;
443        }
444
445        Ok(())
446    }
447
448    /// Import a ref directly (used when loading existing refs from remote)
449    pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
450        let mut refs = self
451            .refs
452            .write()
453            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
454        refs.insert(name.to_string(), value.to_string());
455
456        // Invalidate cached root
457        if let Ok(mut root) = self.root_cid.write() {
458            *root = None;
459        }
460
461        Ok(())
462    }
463
464    /// Check if a ref exists
465    #[cfg(test)]
466    pub fn has_ref(&self, name: &str) -> Result<bool> {
467        let refs = self
468            .refs
469            .read()
470            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
471        Ok(refs.contains_key(name))
472    }
473
474    /// Get count of objects in storage
475    #[cfg(test)]
476    pub fn object_count(&self) -> Result<usize> {
477        let objects = self
478            .objects
479            .read()
480            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
481        Ok(objects.len())
482    }
483
484    /// Get the cached root CID (returns None if tree hasn't been built)
485    #[allow(dead_code)]
486    pub fn get_root_cid(&self) -> Result<Option<Cid>> {
487        let root = self
488            .root_cid
489            .read()
490            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
491        Ok(root.clone())
492    }
493
494    /// Get the default branch name
495    #[allow(dead_code)]
496    pub fn default_branch(&self) -> Result<Option<String>> {
497        let refs = self
498            .refs
499            .read()
500            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
501
502        if let Some(head) = refs.get("HEAD") {
503            if let Some(target) = head.strip_prefix("ref: ") {
504                return Ok(Some(target.to_string()));
505            }
506        }
507        Ok(None)
508    }
509
510    /// Get the tree SHA from a commit object
511    fn get_commit_tree(
512        &self,
513        commit_oid: &str,
514        objects: &HashMap<String, Vec<u8>>,
515    ) -> Option<String> {
516        let compressed = objects.get(commit_oid)?;
517
518        // Decompress the object
519        let mut decoder = ZlibDecoder::new(&compressed[..]);
520        let mut decompressed = Vec::new();
521        decoder.read_to_end(&mut decompressed).ok()?;
522
523        // Parse git object format: "type size\0content"
524        let null_pos = decompressed.iter().position(|&b| b == 0)?;
525        let content = &decompressed[null_pos + 1..];
526
527        // Parse commit content - first line is "tree <sha>"
528        let content_str = std::str::from_utf8(content).ok()?;
529        let first_line = content_str.lines().next()?;
530        first_line
531            .strip_prefix("tree ")
532            .map(|tree_hash| tree_hash.to_string())
533    }
534
535    /// Get git object content (decompressed, without header)
536    fn get_object_content(
537        &self,
538        oid: &str,
539        objects: &HashMap<String, Vec<u8>>,
540    ) -> Option<(ObjectType, Vec<u8>)> {
541        let compressed = objects.get(oid)?;
542
543        // Decompress the object
544        let mut decoder = ZlibDecoder::new(&compressed[..]);
545        let mut decompressed = Vec::new();
546        decoder.read_to_end(&mut decompressed).ok()?;
547
548        // Parse git object format: "type size\0content"
549        let null_pos = decompressed.iter().position(|&b| b == 0)?;
550        let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
551        let obj_type = if header.starts_with("blob") {
552            ObjectType::Blob
553        } else if header.starts_with("tree") {
554            ObjectType::Tree
555        } else if header.starts_with("commit") {
556            ObjectType::Commit
557        } else if header.starts_with("tag") {
558            ObjectType::Tag
559        } else {
560            return None;
561        };
562        let content = decompressed[null_pos + 1..].to_vec();
563        Some((obj_type, content))
564    }
565
566    fn peel_tag_target(&self, oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<String> {
567        let (obj_type, content) = self.get_object_content(oid, objects)?;
568        if obj_type != ObjectType::Tag {
569            return Some(oid.to_string());
570        }
571
572        let target = std::str::from_utf8(&content)
573            .ok()?
574            .lines()
575            .find_map(|line| line.strip_prefix("object "))
576            .map(str::trim)?
577            .to_string();
578
579        match self.get_object_content(&target, objects)?.0 {
580            ObjectType::Tag => self.peel_tag_target(&target, objects),
581            _ => Some(target),
582        }
583    }
584
585    fn build_info_refs_content(
586        &self,
587        refs: &HashMap<String, String>,
588        objects: &HashMap<String, Vec<u8>>,
589    ) -> String {
590        let mut lines = Vec::new();
591
592        for (name, value) in refs {
593            if name == "HEAD" {
594                continue;
595            }
596
597            let oid = value.trim().to_string();
598            lines.push((name.clone(), oid.clone()));
599
600            if name.starts_with("refs/tags/") {
601                if let Some(peeled) = self.peel_tag_target(&oid, objects) {
602                    if peeled != oid {
603                        lines.push((format!("{}^{{}}", name), peeled));
604                    }
605                }
606            }
607        }
608
609        lines.sort_by(|a, b| a.0.cmp(&b.0));
610
611        let mut content = String::new();
612        for (name, oid) in lines {
613            content.push_str(&oid);
614            content.push('\t');
615            content.push_str(&name);
616            content.push('\n');
617        }
618        content
619    }
620
621    async fn build_info_dir(
622        &self,
623        refs: &HashMap<String, String>,
624        objects: &HashMap<String, Vec<u8>>,
625    ) -> Result<Cid> {
626        let info_refs = self.build_info_refs_content(refs, objects);
627        let (info_refs_cid, info_refs_size) = self
628            .tree
629            .put(info_refs.as_bytes())
630            .await
631            .map_err(|e| Error::StorageError(format!("put info/refs: {}", e)))?;
632
633        self.tree
634            .put_directory(vec![
635                DirEntry::from_cid("refs", &info_refs_cid).with_size(info_refs_size)
636            ])
637            .await
638            .map_err(|e| Error::StorageError(format!("put info dir: {}", e)))
639    }
640
641    /// Build the hashtree and return the root CID (hash + encryption key)
642    pub fn build_tree(&self) -> Result<Cid> {
643        // Check if we have a cached root
644        if let Ok(root) = self.root_cid.read() {
645            if let Some(ref cid) = *root {
646                return Ok(cid.clone());
647            }
648        }
649
650        let objects = self
651            .objects
652            .read()
653            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
654        let refs = self
655            .refs
656            .read()
657            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
658
659        // Get default branch from HEAD or find first branch ref
660        let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
661            let branch = head.strip_prefix("ref: ").map(String::from);
662            let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
663            (branch, sha)
664        } else {
665            // No HEAD ref - find first refs/heads/* ref directly
666            let mut branch_info: Option<(String, String)> = None;
667            for (ref_name, sha) in refs.iter() {
668                if ref_name.starts_with("refs/heads/") {
669                    branch_info = Some((ref_name.clone(), sha.clone()));
670                    break;
671                }
672            }
673            match branch_info {
674                Some((branch, sha)) => (Some(branch), Some(sha)),
675                None => (None, None),
676            }
677        };
678
679        // Get tree SHA from commit
680        let tree_sha = commit_sha
681            .as_ref()
682            .and_then(|sha| self.get_commit_tree(sha, &objects));
683
684        // Clone objects for async block
685        let objects_clone = objects.clone();
686
687        let root_cid = self.runtime.block_on(async {
688            // Build objects directory
689            let objects_cid = self.build_objects_dir(&objects).await?;
690
691            // Build refs directory
692            let refs_cid = self.build_refs_dir(&refs).await?;
693
694            // Build dumb-HTTP info directory
695            let info_cid = self.build_info_dir(&refs, &objects_clone).await?;
696
697            // Build HEAD file - use default_branch if no explicit HEAD
698            // Git expects HEAD to end with newline, so add it if missing
699            let head_content = refs.get("HEAD")
700                .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
701                .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
702                .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
703            debug!("HEAD content: {:?}", head_content);
704            let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
705                .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
706            debug!("HEAD hash: {}", hex::encode(head_cid.hash));
707
708            // Build .git directory - use from_cid to preserve encryption keys
709            let mut git_entries = vec![
710                DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
711                DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir),
712                DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
713                DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
714            ];
715
716            // Add config if we have a default branch
717            if let Some(ref branch) = default_branch {
718                let config = format!(
719                    "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
720                    branch.trim_start_matches("refs/heads/")
721                );
722                let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
723                    .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
724                git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
725            }
726
727            // Build and add index file if we have a tree SHA
728            if let Some(ref tree_oid) = tree_sha {
729                match self.build_index_file(tree_oid, &objects_clone) {
730                    Ok(index_data) => {
731                        let (index_cid, index_size) = self.tree.put(&index_data).await
732                            .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
733                        git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
734                        info!("Added git index file ({} bytes)", index_data.len());
735                    }
736                    Err(e) => {
737                        debug!("Failed to build git index file: {} - continuing without index", e);
738                    }
739                }
740            }
741
742            let git_cid = self.tree.put_directory(git_entries).await
743                .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
744
745            // Build root entries starting with .git
746            // Use from_cid to preserve the encryption key
747            let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
748
749            // Add working tree files if we have a tree SHA
750            if let Some(ref tree_oid) = tree_sha {
751                let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
752                root_entries.extend(working_tree_entries);
753                info!("Added {} working tree entries to root", root_entries.len() - 1);
754            }
755
756            // Sort entries for deterministic ordering
757            root_entries.sort_by(|a, b| a.name.cmp(&b.name));
758
759            let root_cid = self.tree.put_directory(root_entries).await
760                .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
761
762            info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
763                hex::encode(root_cid.hash),
764                root_cid.key.is_some(),
765                hex::encode(git_cid.hash));
766
767            Ok::<Cid, Error>(root_cid)
768        })?;
769
770        // Cache the root CID
771        if let Ok(mut root) = self.root_cid.write() {
772            *root = Some(root_cid.clone());
773        }
774
775        Ok(root_cid)
776    }
777
778    /// Build working tree entries from a git tree object
779    async fn build_working_tree_entries(
780        &self,
781        tree_oid: &str,
782        objects: &HashMap<String, Vec<u8>>,
783    ) -> Result<Vec<DirEntry>> {
784        let mut entries = Vec::new();
785
786        // Get tree content
787        let (obj_type, content) = self
788            .get_object_content(tree_oid, objects)
789            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
790
791        if obj_type != ObjectType::Tree {
792            return Err(Error::InvalidObjectType(format!(
793                "expected tree, got {:?}",
794                obj_type
795            )));
796        }
797
798        // Parse tree entries
799        let tree_entries = parse_tree(&content)?;
800
801        for entry in tree_entries {
802            let oid_hex = entry.oid.to_hex();
803
804            if entry.is_tree() {
805                // Recursively build subdirectory
806                let sub_entries = self
807                    .build_working_tree_entries_boxed(&oid_hex, objects)
808                    .await?;
809
810                // Create subdirectory in hashtree
811                let dir_cid =
812                    self.tree.put_directory(sub_entries).await.map_err(|e| {
813                        Error::StorageError(format!("put dir {}: {}", entry.name, e))
814                    })?;
815
816                // Use from_cid to preserve encryption key
817                entries
818                    .push(DirEntry::from_cid(&entry.name, &dir_cid).with_link_type(LinkType::Dir));
819            } else {
820                // Get blob content
821                if let Some((ObjectType::Blob, blob_content)) =
822                    self.get_object_content(&oid_hex, objects)
823                {
824                    // Use put() instead of put_blob() to chunk large files
825                    let (cid, size) = self.tree.put(&blob_content).await.map_err(|e| {
826                        Error::StorageError(format!("put blob {}: {}", entry.name, e))
827                    })?;
828
829                    // Use from_cid to preserve encryption key
830                    entries.push(DirEntry::from_cid(&entry.name, &cid).with_size(size));
831                }
832            }
833        }
834
835        // Sort for deterministic ordering
836        entries.sort_by(|a, b| a.name.cmp(&b.name));
837
838        Ok(entries)
839    }
840
841    /// Boxed version for async recursion
842    fn build_working_tree_entries_boxed<'a>(
843        &'a self,
844        tree_oid: &'a str,
845        objects: &'a HashMap<String, Vec<u8>>,
846    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
847        Box::pin(self.build_working_tree_entries(tree_oid, objects))
848    }
849
850    /// Build the objects directory using HashTree
851    async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
852        let mut top_entries = Vec::new();
853
854        if !objects.is_empty() {
855            // Group objects by first 2 characters of SHA (git loose object structure)
856            // Git expects objects/XX/YYYYYY... where XX is first 2 hex chars
857            let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
858            for (oid, data) in objects {
859                let prefix = &oid[..2];
860                let suffix = &oid[2..];
861                buckets
862                    .entry(prefix.to_string())
863                    .or_default()
864                    .push((suffix.to_string(), data.clone()));
865            }
866
867            // Build subdirectories for each prefix
868            for (prefix, objs) in buckets {
869                let mut sub_entries = Vec::new();
870                for (suffix, data) in objs {
871                    // Use put() instead of put_blob() to chunk large objects
872                    // Git blobs can be >5MB which exceeds blossom server limits
873                    let (cid, size) = self.tree.put(&data).await.map_err(|e| {
874                        Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e))
875                    })?;
876                    // Use from_cid to preserve encryption key
877                    sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
878                }
879                // Sort for deterministic ordering
880                sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
881
882                let sub_cid =
883                    self.tree.put_directory(sub_entries).await.map_err(|e| {
884                        Error::StorageError(format!("put objects/{}: {}", prefix, e))
885                    })?;
886                top_entries
887                    .push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
888            }
889        }
890
891        let (packs_cid, packs_size) = self
892            .tree
893            .put(b"")
894            .await
895            .map_err(|e| Error::StorageError(format!("put objects/info/packs: {}", e)))?;
896        let info_cid = self
897            .tree
898            .put_directory(vec![
899                DirEntry::from_cid("packs", &packs_cid).with_size(packs_size)
900            ])
901            .await
902            .map_err(|e| Error::StorageError(format!("put objects/info: {}", e)))?;
903        top_entries.push(DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir));
904
905        // Sort for deterministic ordering
906        top_entries.sort_by(|a, b| a.name.cmp(&b.name));
907
908        let entry_count = top_entries.len();
909        let cid = self
910            .tree
911            .put_directory(top_entries)
912            .await
913            .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
914
915        debug!(
916            "Built objects dir with {} entries: {}",
917            entry_count,
918            hex::encode(cid.hash)
919        );
920        Ok(cid)
921    }
922
923    /// Build the refs directory using HashTree
924    async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
925        let mut root = RefDirectory::default();
926
927        for (ref_name, value) in refs {
928            let parts: Vec<&str> = ref_name.split('/').collect();
929            if parts.len() >= 3 && parts[0] == "refs" {
930                root.insert(&parts[1..], value.clone());
931            }
932        }
933
934        let mut ref_entries = self.build_ref_entries_recursive(&root, "refs").await?;
935
936        if ref_entries.is_empty() {
937            // Return empty directory Cid
938            let empty_cid = self
939                .tree
940                .put_directory(vec![])
941                .await
942                .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
943            return Ok(empty_cid);
944        }
945
946        ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
947
948        let refs_cid = self
949            .tree
950            .put_directory(ref_entries)
951            .await
952            .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
953        debug!("refs dir -> {}", hex::encode(refs_cid.hash));
954        Ok(refs_cid)
955    }
956
957    fn build_ref_entries_recursive<'a>(
958        &'a self,
959        dir: &'a RefDirectory,
960        prefix: &'a str,
961    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
962        Box::pin(async move {
963            let mut entries = Vec::new();
964
965            for (name, value) in &dir.files {
966                let (cid, size) = self
967                    .tree
968                    .put(value.as_bytes())
969                    .await
970                    .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
971                debug!("{}/{} -> blob {}", prefix, name, hex::encode(cid.hash));
972                entries.push(DirEntry::from_cid(name, &cid).with_size(size));
973            }
974
975            for (name, child) in &dir.dirs {
976                let child_prefix = format!("{prefix}/{name}");
977                let child_entries = self
978                    .build_ref_entries_recursive(child, &child_prefix)
979                    .await?;
980                let child_cid =
981                    self.tree.put_directory(child_entries).await.map_err(|e| {
982                        Error::StorageError(format!("put {child_prefix} dir: {}", e))
983                    })?;
984                debug!("{} dir -> {}", child_prefix, hex::encode(child_cid.hash));
985                entries.push(DirEntry::from_cid(name, &child_cid).with_link_type(LinkType::Dir));
986            }
987
988            entries.sort_by(|a, b| a.name.cmp(&b.name));
989            Ok(entries)
990        })
991    }
992
993    /// Build git index file from tree entries
994    /// Returns the raw binary content of the index file
995    fn build_index_file(
996        &self,
997        tree_oid: &str,
998        objects: &HashMap<String, Vec<u8>>,
999    ) -> Result<Vec<u8>> {
1000        // Collect all file entries from the tree (recursively)
1001        let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); // (path, sha1, mode, size)
1002        self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
1003
1004        // Sort entries by path (git index requirement)
1005        entries.sort_by(|a, b| a.0.cmp(&b.0));
1006
1007        let entry_count = entries.len() as u32;
1008        debug!("Building git index with {} entries", entry_count);
1009
1010        // Build index content
1011        let mut index_data = Vec::new();
1012
1013        // Header: DIRC + version 2 + entry count
1014        index_data.extend_from_slice(b"DIRC");
1015        index_data.extend_from_slice(&2u32.to_be_bytes()); // version 2
1016        index_data.extend_from_slice(&entry_count.to_be_bytes());
1017
1018        // Current time for ctime/mtime (doesn't matter much for our use case)
1019        let now_sec = std::time::SystemTime::now()
1020            .duration_since(std::time::UNIX_EPOCH)
1021            .unwrap_or_default()
1022            .as_secs() as u32;
1023
1024        for (path, sha1, mode, size) in &entries {
1025            let entry_start = index_data.len();
1026
1027            // ctime sec, nsec
1028            index_data.extend_from_slice(&now_sec.to_be_bytes());
1029            index_data.extend_from_slice(&0u32.to_be_bytes());
1030            // mtime sec, nsec
1031            index_data.extend_from_slice(&now_sec.to_be_bytes());
1032            index_data.extend_from_slice(&0u32.to_be_bytes());
1033            // dev, ino (use 0)
1034            index_data.extend_from_slice(&0u32.to_be_bytes());
1035            index_data.extend_from_slice(&0u32.to_be_bytes());
1036            // mode
1037            index_data.extend_from_slice(&mode.to_be_bytes());
1038            // uid, gid (use 0)
1039            index_data.extend_from_slice(&0u32.to_be_bytes());
1040            index_data.extend_from_slice(&0u32.to_be_bytes());
1041            // file size
1042            index_data.extend_from_slice(&size.to_be_bytes());
1043            // SHA-1
1044            index_data.extend_from_slice(sha1);
1045            // flags: path length (max 0xFFF) in low 12 bits
1046            let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
1047            index_data.extend_from_slice(&path_len.to_be_bytes());
1048            // path (NUL-terminated)
1049            index_data.extend_from_slice(path.as_bytes());
1050            index_data.push(0); // NUL terminator
1051
1052            // Pad to 8-byte boundary relative to entry start
1053            let entry_len = index_data.len() - entry_start;
1054            let padding = (8 - (entry_len % 8)) % 8;
1055            index_data.extend(std::iter::repeat_n(0, padding));
1056        }
1057
1058        // Calculate SHA-1 checksum of everything and append
1059        let mut hasher = Sha1::new();
1060        hasher.update(&index_data);
1061        let checksum = hasher.finalize();
1062        index_data.extend_from_slice(&checksum);
1063
1064        debug!(
1065            "Built git index: {} bytes, {} entries",
1066            index_data.len(),
1067            entry_count
1068        );
1069        Ok(index_data)
1070    }
1071
1072    /// Collect file entries from a git tree for building the index
1073    fn collect_tree_entries_for_index(
1074        &self,
1075        tree_oid: &str,
1076        objects: &HashMap<String, Vec<u8>>,
1077        prefix: &str,
1078        entries: &mut Vec<(String, [u8; 20], u32, u32)>,
1079    ) -> Result<()> {
1080        let (obj_type, content) = self
1081            .get_object_content(tree_oid, objects)
1082            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
1083
1084        if obj_type != ObjectType::Tree {
1085            return Err(Error::InvalidObjectType(format!(
1086                "expected tree, got {:?}",
1087                obj_type
1088            )));
1089        }
1090
1091        let tree_entries = parse_tree(&content)?;
1092
1093        for entry in tree_entries {
1094            let path = if prefix.is_empty() {
1095                entry.name.clone()
1096            } else {
1097                format!("{}/{}", prefix, entry.name)
1098            };
1099
1100            let oid_hex = entry.oid.to_hex();
1101
1102            if entry.is_tree() {
1103                // Recursively process subdirectory
1104                self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
1105            } else {
1106                // Get blob content for size and SHA-1
1107                if let Some((ObjectType::Blob, blob_content)) =
1108                    self.get_object_content(&oid_hex, objects)
1109                {
1110                    // Convert hex SHA to bytes
1111                    let mut sha1_bytes = [0u8; 20];
1112                    if let Ok(bytes) = hex::decode(&oid_hex) {
1113                        if bytes.len() == 20 {
1114                            sha1_bytes.copy_from_slice(&bytes);
1115                        }
1116                    }
1117
1118                    // Mode: use entry.mode or default to regular file
1119                    let mode = entry.mode;
1120                    let size = blob_content.len() as u32;
1121
1122                    entries.push((path, sha1_bytes, mode, size));
1123                }
1124            }
1125        }
1126
1127        Ok(())
1128    }
1129
1130    /// Get the underlying store
1131    pub fn store(&self) -> &Arc<LocalStore> {
1132        &self.store
1133    }
1134
1135    /// Get the HashTree for direct access
1136    #[allow(dead_code)]
1137    pub fn hashtree(&self) -> &HashTree<LocalStore> {
1138        &self.tree
1139    }
1140
1141    /// Push all blobs to file servers
1142    #[allow(dead_code)]
1143    pub fn push_to_file_servers(
1144        &self,
1145        blossom: &hashtree_blossom::BlossomClient,
1146    ) -> Result<(usize, usize)> {
1147        let hashes = self
1148            .store
1149            .list()
1150            .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
1151
1152        info!("Pushing {} blobs to file servers", hashes.len());
1153
1154        let mut uploaded = 0;
1155        let mut existed = 0;
1156
1157        self.runtime.block_on(async {
1158            for hash in &hashes {
1159                let hex_hash = hex::encode(hash);
1160                let data = match self.store.get_sync(hash) {
1161                    Ok(Some(d)) => d,
1162                    _ => continue,
1163                };
1164
1165                match blossom.upload_if_missing(&data).await {
1166                    Ok((_, true)) => {
1167                        debug!("Uploaded {}", &hex_hash[..12]);
1168                        uploaded += 1;
1169                    }
1170                    Ok((_, false)) => {
1171                        existed += 1;
1172                    }
1173                    Err(e) => {
1174                        debug!("Failed to upload {}: {}", &hex_hash[..12], e);
1175                    }
1176                }
1177            }
1178        });
1179
1180        info!(
1181            "Upload complete: {} new, {} already existed",
1182            uploaded, existed
1183        );
1184        Ok((uploaded, existed))
1185    }
1186
1187    /// Clear all state (for testing or re-initialization)
1188    #[allow(dead_code)]
1189    pub fn clear(&self) -> Result<()> {
1190        let mut objects = self
1191            .objects
1192            .write()
1193            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1194        let mut refs = self
1195            .refs
1196            .write()
1197            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1198        let mut root = self
1199            .root_cid
1200            .write()
1201            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1202
1203        objects.clear();
1204        refs.clear();
1205        *root = None;
1206        Ok(())
1207    }
1208}
1209
1210#[cfg(test)]
1211mod tests {
1212    use super::*;
1213    use hashtree_core::store::Store;
1214    use hashtree_core::LinkType;
1215    use std::io::{Read, Write};
1216    use std::net::{TcpListener, TcpStream};
1217    use std::path::Path;
1218    use std::process::{Child, Command, Stdio};
1219    use std::time::{Duration, Instant};
1220    use tempfile::TempDir;
1221
1222    fn create_test_storage() -> (GitStorage, TempDir) {
1223        let temp_dir = TempDir::new().unwrap();
1224        let storage = GitStorage::open(temp_dir.path()).unwrap();
1225        (storage, temp_dir)
1226    }
1227
1228    fn create_test_storage_with_limit(max_size_bytes: u64) -> (GitStorage, TempDir) {
1229        let temp_dir = TempDir::new().unwrap();
1230        let storage = GitStorage::open_with_backend_and_max_bytes(
1231            temp_dir.path(),
1232            StorageBackend::Fs,
1233            max_size_bytes,
1234        )
1235        .unwrap();
1236        (storage, temp_dir)
1237    }
1238
1239    fn local_total_bytes(storage: &GitStorage) -> u64 {
1240        match storage.store().as_ref() {
1241            LocalStore::Fs(store) => store.stats().unwrap().total_bytes,
1242            #[cfg(feature = "lmdb")]
1243            LocalStore::Lmdb(store) => store.stats().unwrap().total_bytes,
1244        }
1245    }
1246
1247    fn write_test_commit(storage: &GitStorage) -> ObjectId {
1248        let blob_oid = storage
1249            .write_raw_object(ObjectType::Blob, b"hello from hashtree\n")
1250            .unwrap();
1251
1252        let mut tree_content = Vec::new();
1253        tree_content.extend_from_slice(b"100644 README.md\0");
1254        tree_content.extend_from_slice(&hex::decode(blob_oid.to_hex()).unwrap());
1255        let tree_oid = storage
1256            .write_raw_object(ObjectType::Tree, &tree_content)
1257            .unwrap();
1258
1259        let commit_content = format!(
1260            "tree {}\nauthor Test User <test@example.com> 0 +0000\ncommitter Test User <test@example.com> 0 +0000\n\nInitial commit\n",
1261            tree_oid.to_hex()
1262        );
1263        storage
1264            .write_raw_object(ObjectType::Commit, commit_content.as_bytes())
1265            .unwrap()
1266    }
1267
1268    fn export_tree_to_fs<S: Store>(
1269        runtime: &RuntimeExecutor,
1270        tree: &HashTree<S>,
1271        cid: &Cid,
1272        dst: &Path,
1273    ) {
1274        std::fs::create_dir_all(dst).unwrap();
1275        let entries = runtime.block_on(tree.list_directory(cid)).unwrap();
1276        for entry in entries {
1277            let entry_cid = Cid {
1278                hash: entry.hash,
1279                key: entry.key,
1280            };
1281            let path = dst.join(&entry.name);
1282            match entry.link_type {
1283                LinkType::Dir => export_tree_to_fs(runtime, tree, &entry_cid, &path),
1284                LinkType::Blob | LinkType::File => {
1285                    let data = runtime
1286                        .block_on(tree.get(&entry_cid, None))
1287                        .unwrap()
1288                        .unwrap();
1289                    if let Some(parent) = path.parent() {
1290                        std::fs::create_dir_all(parent).unwrap();
1291                    }
1292                    std::fs::write(path, data).unwrap();
1293                }
1294            }
1295        }
1296    }
1297
1298    fn spawn_http_server(root: &Path, port: u16) -> Child {
1299        Command::new("python3")
1300            .args([
1301                "-m",
1302                "http.server",
1303                &port.to_string(),
1304                "--bind",
1305                "127.0.0.1",
1306            ])
1307            .current_dir(root)
1308            .stdout(Stdio::null())
1309            .stderr(Stdio::null())
1310            .spawn()
1311            .expect("spawn python http server")
1312    }
1313
1314    fn wait_for_http_server(server: &mut Child, port: u16, path: &str) {
1315        let deadline = Instant::now() + Duration::from_secs(5);
1316
1317        loop {
1318            if let Some(status) = server.try_wait().expect("check http server status") {
1319                panic!("python http server exited before becoming ready: {status}");
1320            }
1321
1322            if let Ok(mut stream) = TcpStream::connect(("127.0.0.1", port)) {
1323                stream
1324                    .set_read_timeout(Some(Duration::from_millis(200)))
1325                    .expect("set read timeout");
1326                stream
1327                    .set_write_timeout(Some(Duration::from_millis(200)))
1328                    .expect("set write timeout");
1329                let request =
1330                    format!("GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n");
1331                if stream.write_all(request.as_bytes()).is_ok() {
1332                    let mut response = String::new();
1333                    if stream.read_to_string(&mut response).is_ok()
1334                        && response.starts_with("HTTP/1.0 200")
1335                    {
1336                        return;
1337                    }
1338                }
1339            }
1340
1341            if Instant::now() >= deadline {
1342                panic!("python http server did not become ready on port {port}");
1343            }
1344            std::thread::sleep(Duration::from_millis(50));
1345        }
1346    }
1347
1348    #[test]
1349    fn test_import_ref() {
1350        let (storage, _temp) = create_test_storage();
1351
1352        // Import a ref
1353        storage
1354            .import_ref("refs/heads/main", "abc123def456")
1355            .unwrap();
1356
1357        // Check it exists
1358        assert!(storage.has_ref("refs/heads/main").unwrap());
1359
1360        // Check value via list_refs
1361        let refs = storage.list_refs().unwrap();
1362        assert_eq!(
1363            refs.get("refs/heads/main"),
1364            Some(&"abc123def456".to_string())
1365        );
1366    }
1367
1368    #[test]
1369    fn test_import_multiple_refs_preserves_all() {
1370        let (storage, _temp) = create_test_storage();
1371
1372        // Import multiple refs (simulating loading from remote)
1373        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1374        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1375        storage
1376            .import_ref("refs/heads/feature", "sha_feature")
1377            .unwrap();
1378
1379        // All should exist
1380        assert!(storage.has_ref("refs/heads/main").unwrap());
1381        assert!(storage.has_ref("refs/heads/dev").unwrap());
1382        assert!(storage.has_ref("refs/heads/feature").unwrap());
1383
1384        // Now write a new ref (simulating push)
1385        storage
1386            .write_ref(
1387                "refs/heads/new-branch",
1388                &Ref::Direct(
1389                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1390                ),
1391            )
1392            .unwrap();
1393
1394        // Original refs should still exist
1395        let refs = storage.list_refs().unwrap();
1396        assert_eq!(refs.len(), 4);
1397        assert!(refs.contains_key("refs/heads/main"));
1398        assert!(refs.contains_key("refs/heads/dev"));
1399        assert!(refs.contains_key("refs/heads/feature"));
1400        assert!(refs.contains_key("refs/heads/new-branch"));
1401    }
1402
1403    #[test]
1404    fn test_import_compressed_object() {
1405        let (storage, _temp) = create_test_storage();
1406
1407        // Create a fake compressed object
1408        let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; // fake zlib data
1409
1410        storage
1411            .import_compressed_object("abc123def456", fake_compressed.clone())
1412            .unwrap();
1413
1414        // Check object count
1415        assert_eq!(storage.object_count().unwrap(), 1);
1416    }
1417
1418    #[test]
1419    fn test_write_ref_overwrites_imported() {
1420        let (storage, _temp) = create_test_storage();
1421
1422        // Import a ref
1423        storage.import_ref("refs/heads/main", "old_sha").unwrap();
1424
1425        // Write same ref with new value
1426        storage
1427            .write_ref(
1428                "refs/heads/main",
1429                &Ref::Direct(
1430                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1431                ),
1432            )
1433            .unwrap();
1434
1435        // Should have new value
1436        let refs = storage.list_refs().unwrap();
1437        assert_eq!(
1438            refs.get("refs/heads/main"),
1439            Some(&"0123456789abcdef0123456789abcdef01234567".to_string())
1440        );
1441    }
1442
1443    #[test]
1444    fn test_delete_ref_preserves_others() {
1445        let (storage, _temp) = create_test_storage();
1446
1447        // Import multiple refs
1448        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1449        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1450
1451        // Delete one
1452        storage.delete_ref("refs/heads/dev").unwrap();
1453
1454        // Other should still exist
1455        assert!(storage.has_ref("refs/heads/main").unwrap());
1456        assert!(!storage.has_ref("refs/heads/dev").unwrap());
1457    }
1458
1459    #[test]
1460    fn test_clear_removes_all() {
1461        let (storage, _temp) = create_test_storage();
1462
1463        // Import refs and objects
1464        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1465        storage
1466            .import_compressed_object("obj1", vec![1, 2, 3])
1467            .unwrap();
1468
1469        // Clear
1470        storage.clear().unwrap();
1471
1472        // All gone
1473        assert!(!storage.has_ref("refs/heads/main").unwrap());
1474        assert_eq!(storage.object_count().unwrap(), 0);
1475    }
1476
1477    #[test]
1478    fn test_evict_if_needed_respects_configured_limit() {
1479        let (storage, _temp) = create_test_storage_with_limit(1_024);
1480
1481        storage
1482            .write_raw_object(ObjectType::Blob, &vec![b'a'; 900])
1483            .unwrap();
1484        storage
1485            .write_raw_object(ObjectType::Blob, &vec![b'b'; 900])
1486            .unwrap();
1487        storage
1488            .write_ref(
1489                "refs/heads/main",
1490                &Ref::Direct(
1491                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1492                ),
1493            )
1494            .unwrap();
1495
1496        storage.build_tree().unwrap();
1497
1498        let before = local_total_bytes(&storage);
1499        assert!(before > 1_024);
1500
1501        let freed = storage.evict_if_needed().unwrap();
1502        assert!(freed > 0);
1503
1504        let after = local_total_bytes(&storage);
1505        assert!(after <= 1_024);
1506    }
1507
1508    #[test]
1509    fn test_build_tree_adds_dumb_http_metadata() {
1510        let (storage, _temp) = create_test_storage();
1511        let commit_oid = write_test_commit(&storage);
1512        let tag_content = format!(
1513            "object {}\ntype commit\ntag v1.0.0\ntagger Test User <test@example.com> 0 +0000\n\nrelease\n",
1514            commit_oid.to_hex()
1515        );
1516        let tag_oid = storage
1517            .write_raw_object(ObjectType::Tag, tag_content.as_bytes())
1518            .unwrap();
1519
1520        storage
1521            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1522            .unwrap();
1523        storage
1524            .write_ref("refs/tags/v1.0.0", &Ref::Direct(tag_oid))
1525            .unwrap();
1526        storage
1527            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1528            .unwrap();
1529
1530        let root_cid = storage.build_tree().unwrap();
1531
1532        let info_refs_cid = storage
1533            .runtime
1534            .block_on(storage.tree.resolve_path(&root_cid, ".git/info/refs"))
1535            .unwrap()
1536            .expect("info/refs exists");
1537        let info_refs = storage
1538            .runtime
1539            .block_on(storage.tree.get(&info_refs_cid, None))
1540            .unwrap()
1541            .unwrap();
1542        let info_refs = String::from_utf8(info_refs).unwrap();
1543
1544        assert_eq!(
1545            info_refs,
1546            format!(
1547                "{commit}\trefs/heads/main\n{tag}\trefs/tags/v1.0.0\n{commit}\trefs/tags/v1.0.0^{{}}\n",
1548                commit = commit_oid.to_hex(),
1549                tag = tag_oid.to_hex()
1550            )
1551        );
1552
1553        let packs_cid = storage
1554            .runtime
1555            .block_on(
1556                storage
1557                    .tree
1558                    .resolve_path(&root_cid, ".git/objects/info/packs"),
1559            )
1560            .unwrap()
1561            .expect("objects/info/packs exists");
1562        let packs = storage
1563            .runtime
1564            .block_on(storage.tree.get(&packs_cid, None))
1565            .unwrap()
1566            .unwrap();
1567        assert!(packs.is_empty(), "objects/info/packs should be empty");
1568    }
1569
1570    #[test]
1571    fn test_build_tree_materializes_loose_refs_at_git_paths() {
1572        let (storage, _temp) = create_test_storage();
1573        let commit_oid = write_test_commit(&storage);
1574
1575        storage
1576            .write_ref("refs/heads/master", &Ref::Direct(commit_oid))
1577            .unwrap();
1578        storage
1579            .write_ref("refs/heads/codex/meshrouter-prod", &Ref::Direct(commit_oid))
1580            .unwrap();
1581        storage
1582            .write_ref("refs/tags/v1.0.0", &Ref::Direct(commit_oid))
1583            .unwrap();
1584        storage
1585            .write_ref("HEAD", &Ref::Symbolic("refs/heads/master".to_string()))
1586            .unwrap();
1587
1588        let root_cid = storage.build_tree().unwrap();
1589
1590        for path in [
1591            ".git/refs/heads/master",
1592            ".git/refs/heads/codex/meshrouter-prod",
1593            ".git/refs/tags/v1.0.0",
1594        ] {
1595            let ref_cid = storage
1596                .runtime
1597                .block_on(storage.tree.resolve_path(&root_cid, path))
1598                .unwrap()
1599                .unwrap_or_else(|| panic!("{path} should exist"));
1600            let ref_value = storage
1601                .runtime
1602                .block_on(storage.tree.get(&ref_cid, None))
1603                .unwrap()
1604                .unwrap();
1605            assert_eq!(
1606                String::from_utf8(ref_value).unwrap(),
1607                commit_oid.to_hex(),
1608                "{path} should contain the ref target",
1609            );
1610        }
1611    }
1612
1613    #[test]
1614    fn test_materialized_tree_supports_static_http_clone_from_git_dir() {
1615        let (storage, _temp) = create_test_storage();
1616        let commit_oid = write_test_commit(&storage);
1617        storage
1618            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1619            .unwrap();
1620        storage
1621            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1622            .unwrap();
1623
1624        let root_cid = storage.build_tree().unwrap();
1625        let export_dir = TempDir::new().unwrap();
1626        let repo_dir = export_dir.path().join("repo");
1627        export_tree_to_fs(&storage.runtime, &storage.tree, &root_cid, &repo_dir);
1628
1629        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
1630        let port = listener.local_addr().unwrap().port();
1631        drop(listener);
1632
1633        let mut server = spawn_http_server(export_dir.path(), port);
1634        wait_for_http_server(&mut server, port, "/repo/.git/HEAD");
1635
1636        let clone_dir = TempDir::new().unwrap();
1637        let clone_path = clone_dir.path().join("clone");
1638        let output = Command::new("git")
1639            .args([
1640                "clone",
1641                &format!("http://127.0.0.1:{port}/repo/.git", port = port),
1642                clone_path.to_str().unwrap(),
1643            ])
1644            .output()
1645            .unwrap();
1646
1647        let _ = server.kill();
1648        let _ = server.wait();
1649
1650        assert!(
1651            output.status.success(),
1652            "git clone failed: {}",
1653            String::from_utf8_lossy(&output.stderr)
1654        );
1655        assert_eq!(
1656            std::fs::read_to_string(clone_path.join("README.md")).unwrap(),
1657            "hello from hashtree\n"
1658        );
1659    }
1660}