Skip to main content

git_remote_htree/git/
storage.rs

1//! Hashtree-backed git object and ref storage with configurable persistence
2//!
3//! Stores git objects and refs in a hashtree merkle tree:
4//!   root/
5//!     .git/
6//!       HEAD -> "ref: refs/heads/main"
7//!       refs/heads/main -> <commit-sha1>
8//!       info/refs -> dumb-HTTP ref advertisement
9//!       objects/XX/YYYY... -> zlib-compressed loose object (standard git layout)
10//!       objects/info/packs -> dumb-HTTP pack advertisement
11//!
12//! The root hash (SHA-256) is the content-addressed identifier for the entire repo state.
13
14use flate2::read::ZlibDecoder;
15use flate2::write::ZlibEncoder;
16use flate2::Compression;
17use hashtree_config::{Config, StorageBackend};
18use hashtree_core::store::{Store, StoreError, StoreStats};
19use hashtree_core::types::Hash;
20use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
21use hashtree_fs::FsBlobStore;
22#[cfg(feature = "lmdb")]
23use hashtree_lmdb::LmdbBlobStore;
24use sha1::{Digest, Sha1};
25use std::collections::{BTreeMap, HashMap};
26use std::io::{Read, Write};
27use std::path::Path;
28use std::sync::Arc;
29use tokio::runtime::{Handle, Runtime};
30use tracing::{debug, info, warn};
31
32use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
33use super::refs::{validate_ref_name, Ref};
34use super::{Error, Result};
35
36/// Box type for async recursion
37type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
38
39#[derive(Default)]
40struct RefDirectory {
41    files: BTreeMap<String, String>,
42    dirs: BTreeMap<String, RefDirectory>,
43}
44
45impl RefDirectory {
46    fn insert(&mut self, parts: &[&str], value: String) {
47        let Some((name, rest)) = parts.split_first() else {
48            return;
49        };
50
51        if rest.is_empty() {
52            self.files.insert((*name).to_string(), value);
53        } else {
54            self.dirs
55                .entry((*name).to_string())
56                .or_default()
57                .insert(rest, value);
58        }
59    }
60}
61
62/// Runtime executor - either owns a runtime or reuses an existing one
63enum RuntimeExecutor {
64    Owned(Runtime),
65    Handle(Handle),
66}
67
68impl RuntimeExecutor {
69    fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
70        match self {
71            RuntimeExecutor::Owned(rt) => rt.block_on(f),
72            RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
73        }
74    }
75}
76
77/// Local blob store - wraps either FsBlobStore or LmdbBlobStore
78pub enum LocalStore {
79    Fs(FsBlobStore),
80    #[cfg(feature = "lmdb")]
81    Lmdb(LmdbBlobStore),
82}
83
84impl LocalStore {
85    pub(crate) fn new_for_backend<P: AsRef<Path>>(
86        path: P,
87        backend: StorageBackend,
88        max_bytes: u64,
89    ) -> std::result::Result<Self, StoreError> {
90        let path = path.as_ref();
91        #[cfg(feature = "lmdb")]
92        {
93            return Self::new_for_backend_with_openers(
94                path,
95                backend,
96                max_bytes,
97                Self::open_fs_store,
98                Self::open_lmdb_store,
99            );
100        }
101
102        #[cfg(not(feature = "lmdb"))]
103        match backend {
104            StorageBackend::Fs => Self::open_fs_store(path, max_bytes),
105            #[cfg(not(feature = "lmdb"))]
106            StorageBackend::Lmdb => {
107                warn!(
108                    "LMDB backend requested but lmdb feature not enabled, using filesystem storage"
109                );
110                Self::open_fs_store(path, max_bytes)
111            }
112        }
113    }
114
115    fn open_fs_store(path: &Path, max_bytes: u64) -> std::result::Result<Self, StoreError> {
116        if max_bytes > 0 {
117            Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
118                path, max_bytes,
119            )?))
120        } else {
121            Ok(LocalStore::Fs(FsBlobStore::new(path)?))
122        }
123    }
124
125    #[cfg(feature = "lmdb")]
126    fn open_lmdb_store(path: &Path, max_bytes: u64) -> std::result::Result<Self, StoreError> {
127        if max_bytes > 0 {
128            Ok(LocalStore::Lmdb(LmdbBlobStore::with_max_bytes(
129                path, max_bytes,
130            )?))
131        } else {
132            Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?))
133        }
134    }
135
136    #[cfg(feature = "lmdb")]
137    fn new_for_backend_with_openers<FS, LMDB>(
138        path: &Path,
139        backend: StorageBackend,
140        max_bytes: u64,
141        fs_open: FS,
142        lmdb_open: LMDB,
143    ) -> std::result::Result<Self, StoreError>
144    where
145        FS: Fn(&Path, u64) -> std::result::Result<Self, StoreError>,
146        LMDB: Fn(&Path, u64) -> std::result::Result<Self, StoreError>,
147    {
148        match backend {
149            StorageBackend::Fs => fs_open(path, max_bytes),
150            StorageBackend::Lmdb => match lmdb_open(path, max_bytes) {
151                Ok(store) => Ok(store),
152                Err(err) if should_fallback_from_lmdb_error(&err) => {
153                    warn!(
154                        path = %path.display(),
155                        "LMDB backend is unsupported in this environment, falling back to filesystem storage"
156                    );
157                    fs_open(path, max_bytes)
158                }
159                Err(err) => Err(err),
160            },
161        }
162    }
163
164    /// Create a new local store based on config
165    pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
166        Self::new_with_max_bytes(path, 0)
167    }
168
169    /// Create a new local store based on config with an optional byte limit.
170    pub fn new_with_max_bytes<P: AsRef<Path>>(
171        path: P,
172        max_bytes: u64,
173    ) -> std::result::Result<Self, StoreError> {
174        let config = Config::load_or_default();
175        Self::new_for_backend(path, config.storage.backend, max_bytes)
176    }
177
178    /// List all hashes in the store
179    pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
180        match self {
181            LocalStore::Fs(store) => store.list(),
182            #[cfg(feature = "lmdb")]
183            LocalStore::Lmdb(store) => store.list(),
184        }
185    }
186
187    /// Sync get operation
188    pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
189        match self {
190            LocalStore::Fs(store) => store.get_sync(hash),
191            #[cfg(feature = "lmdb")]
192            LocalStore::Lmdb(store) => store.get_sync(hash),
193        }
194    }
195}
196
197#[cfg(feature = "lmdb")]
198pub(crate) fn should_fallback_from_lmdb_error(error: &StoreError) -> bool {
199    matches!(
200        error,
201        StoreError::Io(io_error) if io_error.raw_os_error() == Some(libc::ENOSYS)
202    )
203}
204
205#[async_trait::async_trait]
206impl Store for LocalStore {
207    async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
208        match self {
209            LocalStore::Fs(store) => store.put(hash, data).await,
210            #[cfg(feature = "lmdb")]
211            LocalStore::Lmdb(store) => store.put(hash, data).await,
212        }
213    }
214
215    async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
216        match self {
217            LocalStore::Fs(store) => store.get(hash).await,
218            #[cfg(feature = "lmdb")]
219            LocalStore::Lmdb(store) => store.get(hash).await,
220        }
221    }
222
223    async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
224        match self {
225            LocalStore::Fs(store) => store.has(hash).await,
226            #[cfg(feature = "lmdb")]
227            LocalStore::Lmdb(store) => store.has(hash).await,
228        }
229    }
230
231    async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
232        match self {
233            LocalStore::Fs(store) => store.delete(hash).await,
234            #[cfg(feature = "lmdb")]
235            LocalStore::Lmdb(store) => store.delete(hash).await,
236        }
237    }
238
239    fn set_max_bytes(&self, max: u64) {
240        match self {
241            LocalStore::Fs(store) => store.set_max_bytes(max),
242            #[cfg(feature = "lmdb")]
243            LocalStore::Lmdb(store) => store.set_max_bytes(max),
244        }
245    }
246
247    fn max_bytes(&self) -> Option<u64> {
248        match self {
249            LocalStore::Fs(store) => store.max_bytes(),
250            #[cfg(feature = "lmdb")]
251            LocalStore::Lmdb(store) => store.max_bytes(),
252        }
253    }
254
255    async fn stats(&self) -> StoreStats {
256        match self {
257            LocalStore::Fs(store) => match store.stats() {
258                Ok(stats) => StoreStats {
259                    count: stats.count as u64,
260                    bytes: stats.total_bytes,
261                    pinned_count: stats.pinned_count as u64,
262                    pinned_bytes: stats.pinned_bytes,
263                },
264                Err(_) => StoreStats::default(),
265            },
266            #[cfg(feature = "lmdb")]
267            LocalStore::Lmdb(store) => match store.stats() {
268                Ok(stats) => StoreStats {
269                    count: stats.count as u64,
270                    bytes: stats.total_bytes,
271                    pinned_count: 0,
272                    pinned_bytes: 0,
273                },
274                Err(_) => StoreStats::default(),
275            },
276        }
277    }
278
279    async fn evict_if_needed(&self) -> std::result::Result<u64, StoreError> {
280        match self {
281            LocalStore::Fs(store) => store.evict_if_needed().await,
282            #[cfg(feature = "lmdb")]
283            LocalStore::Lmdb(store) => store.evict_if_needed().await,
284        }
285    }
286}
287
288/// Git storage backed by HashTree with configurable persistence
289pub struct GitStorage {
290    store: Arc<LocalStore>,
291    tree: HashTree<LocalStore>,
292    runtime: RuntimeExecutor,
293    /// In-memory state for the current session
294    objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
295    refs: std::sync::RwLock<HashMap<String, String>>,
296    /// Cached root CID (hash + encryption key)
297    root_cid: std::sync::RwLock<Option<Cid>>,
298}
299
300impl GitStorage {
301    /// Open or create a git storage at the given path
302    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
303        let config = Config::load_or_default();
304        let max_size_bytes = config
305            .storage
306            .max_size_gb
307            .saturating_mul(1024 * 1024 * 1024);
308        Self::open_with_max_bytes(path, max_size_bytes)
309    }
310
311    /// Open or create a git storage at the given path with an explicit byte limit.
312    pub fn open_with_max_bytes(path: impl AsRef<Path>, max_size_bytes: u64) -> Result<Self> {
313        let config = Config::load_or_default();
314        Self::open_with_backend_and_max_bytes(path, config.storage.backend, max_size_bytes)
315    }
316
317    pub fn open_with_backend_and_max_bytes(
318        path: impl AsRef<Path>,
319        backend: StorageBackend,
320        max_size_bytes: u64,
321    ) -> Result<Self> {
322        let runtime = match Handle::try_current() {
323            Ok(handle) => RuntimeExecutor::Handle(handle),
324            Err(_) => {
325                let rt = Runtime::new()
326                    .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
327                RuntimeExecutor::Owned(rt)
328            }
329        };
330
331        let store_path = path.as_ref().join("blobs");
332        let store = Arc::new(
333            LocalStore::new_for_backend(&store_path, backend, max_size_bytes)
334                .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
335        );
336
337        // Use encrypted mode (default) - blossom servers require encrypted data
338        let tree = HashTree::new(HashTreeConfig::new(store.clone()));
339
340        Ok(Self {
341            store,
342            tree,
343            runtime,
344            objects: std::sync::RwLock::new(HashMap::new()),
345            refs: std::sync::RwLock::new(HashMap::new()),
346            root_cid: std::sync::RwLock::new(None),
347        })
348    }
349
350    /// Evict old local blobs if storage is over the configured limit.
351    pub fn evict_if_needed(&self) -> Result<u64> {
352        self.runtime
353            .block_on(self.store.evict_if_needed())
354            .map_err(|e| Error::StorageError(format!("evict: {}", e)))
355    }
356
357    /// Write an object, returning its ID
358    fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
359        let oid = obj.id();
360        let key = oid.to_hex();
361
362        let loose = obj.to_loose_format();
363        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
364        encoder.write_all(&loose)?;
365        let compressed = encoder.finish()?;
366
367        let mut objects = self
368            .objects
369            .write()
370            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
371        objects.insert(key, compressed);
372
373        // Invalidate cached root
374        if let Ok(mut root) = self.root_cid.write() {
375            *root = None;
376        }
377
378        Ok(oid)
379    }
380
381    /// Write raw object data (type + content already parsed)
382    pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
383        let obj = GitObject::new(obj_type, content.to_vec());
384        self.write_object(&obj)
385    }
386
387    /// Read an object by ID from in-memory cache
388    #[allow(dead_code)]
389    fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
390        let key = oid.to_hex();
391        let objects = self
392            .objects
393            .read()
394            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
395        let compressed = objects
396            .get(&key)
397            .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
398
399        let mut decoder = ZlibDecoder::new(compressed.as_slice());
400        let mut data = Vec::new();
401        decoder.read_to_end(&mut data)?;
402
403        GitObject::from_loose_format(&data)
404    }
405
406    /// Write a ref
407    pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
408        validate_ref_name(name)?;
409
410        let value = match target {
411            Ref::Direct(oid) => oid.to_hex(),
412            Ref::Symbolic(target) => format!("ref: {}", target),
413        };
414
415        let mut refs = self
416            .refs
417            .write()
418            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
419        refs.insert(name.to_string(), value);
420
421        // Invalidate cached root
422        if let Ok(mut root) = self.root_cid.write() {
423            *root = None;
424        }
425
426        Ok(())
427    }
428
429    /// Read a ref
430    #[allow(dead_code)]
431    pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
432        let refs = self
433            .refs
434            .read()
435            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
436
437        match refs.get(name) {
438            Some(value) => {
439                if let Some(target) = value.strip_prefix("ref: ") {
440                    Ok(Some(Ref::Symbolic(target.to_string())))
441                } else {
442                    let oid = ObjectId::from_hex(value)
443                        .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
444                    Ok(Some(Ref::Direct(oid)))
445                }
446            }
447            None => Ok(None),
448        }
449    }
450
451    /// List all refs
452    #[allow(dead_code)]
453    pub fn list_refs(&self) -> Result<HashMap<String, String>> {
454        let refs = self
455            .refs
456            .read()
457            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
458        Ok(refs.clone())
459    }
460
461    /// Delete a ref
462    pub fn delete_ref(&self, name: &str) -> Result<bool> {
463        let mut refs = self
464            .refs
465            .write()
466            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
467        let existed = refs.remove(name).is_some();
468
469        // Invalidate cached root
470        if let Ok(mut root) = self.root_cid.write() {
471            *root = None;
472        }
473
474        Ok(existed)
475    }
476
477    /// Import a raw git object (already in loose format, zlib compressed)
478    /// Used when fetching existing objects from remote before push
479    pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
480        let mut objects = self
481            .objects
482            .write()
483            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
484        objects.insert(oid.to_string(), compressed_data);
485
486        // Invalidate cached root
487        if let Ok(mut root) = self.root_cid.write() {
488            *root = None;
489        }
490
491        Ok(())
492    }
493
494    /// Import a ref directly (used when loading existing refs from remote)
495    pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
496        let mut refs = self
497            .refs
498            .write()
499            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
500        refs.insert(name.to_string(), value.to_string());
501
502        // Invalidate cached root
503        if let Ok(mut root) = self.root_cid.write() {
504            *root = None;
505        }
506
507        Ok(())
508    }
509
510    /// Check if a ref exists
511    #[cfg(test)]
512    pub fn has_ref(&self, name: &str) -> Result<bool> {
513        let refs = self
514            .refs
515            .read()
516            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
517        Ok(refs.contains_key(name))
518    }
519
520    /// Get count of objects in storage
521    #[cfg(test)]
522    pub fn object_count(&self) -> Result<usize> {
523        let objects = self
524            .objects
525            .read()
526            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
527        Ok(objects.len())
528    }
529
530    /// Get the cached root CID (returns None if tree hasn't been built)
531    #[allow(dead_code)]
532    pub fn get_root_cid(&self) -> Result<Option<Cid>> {
533        let root = self
534            .root_cid
535            .read()
536            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
537        Ok(root.clone())
538    }
539
540    /// Get the default branch name
541    #[allow(dead_code)]
542    pub fn default_branch(&self) -> Result<Option<String>> {
543        let refs = self
544            .refs
545            .read()
546            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
547
548        if let Some(head) = refs.get("HEAD") {
549            if let Some(target) = head.strip_prefix("ref: ") {
550                return Ok(Some(target.to_string()));
551            }
552        }
553        Ok(None)
554    }
555
556    /// Get the tree SHA from a commit object
557    fn get_commit_tree(
558        &self,
559        commit_oid: &str,
560        objects: &HashMap<String, Vec<u8>>,
561    ) -> Option<String> {
562        let compressed = objects.get(commit_oid)?;
563
564        // Decompress the object
565        let mut decoder = ZlibDecoder::new(&compressed[..]);
566        let mut decompressed = Vec::new();
567        decoder.read_to_end(&mut decompressed).ok()?;
568
569        // Parse git object format: "type size\0content"
570        let null_pos = decompressed.iter().position(|&b| b == 0)?;
571        let content = &decompressed[null_pos + 1..];
572
573        // Parse commit content - first line is "tree <sha>"
574        let content_str = std::str::from_utf8(content).ok()?;
575        let first_line = content_str.lines().next()?;
576        first_line
577            .strip_prefix("tree ")
578            .map(|tree_hash| tree_hash.to_string())
579    }
580
581    /// Get git object content (decompressed, without header)
582    fn get_object_content(
583        &self,
584        oid: &str,
585        objects: &HashMap<String, Vec<u8>>,
586    ) -> Option<(ObjectType, Vec<u8>)> {
587        let compressed = objects.get(oid)?;
588
589        // Decompress the object
590        let mut decoder = ZlibDecoder::new(&compressed[..]);
591        let mut decompressed = Vec::new();
592        decoder.read_to_end(&mut decompressed).ok()?;
593
594        // Parse git object format: "type size\0content"
595        let null_pos = decompressed.iter().position(|&b| b == 0)?;
596        let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
597        let obj_type = if header.starts_with("blob") {
598            ObjectType::Blob
599        } else if header.starts_with("tree") {
600            ObjectType::Tree
601        } else if header.starts_with("commit") {
602            ObjectType::Commit
603        } else if header.starts_with("tag") {
604            ObjectType::Tag
605        } else {
606            return None;
607        };
608        let content = decompressed[null_pos + 1..].to_vec();
609        Some((obj_type, content))
610    }
611
612    fn peel_tag_target(&self, oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<String> {
613        let (obj_type, content) = self.get_object_content(oid, objects)?;
614        if obj_type != ObjectType::Tag {
615            return Some(oid.to_string());
616        }
617
618        let target = std::str::from_utf8(&content)
619            .ok()?
620            .lines()
621            .find_map(|line| line.strip_prefix("object "))
622            .map(str::trim)?
623            .to_string();
624
625        match self.get_object_content(&target, objects)?.0 {
626            ObjectType::Tag => self.peel_tag_target(&target, objects),
627            _ => Some(target),
628        }
629    }
630
631    fn build_info_refs_content(
632        &self,
633        refs: &HashMap<String, String>,
634        objects: &HashMap<String, Vec<u8>>,
635    ) -> String {
636        let mut lines = Vec::new();
637
638        for (name, value) in refs {
639            if name == "HEAD" {
640                continue;
641            }
642
643            let oid = value.trim().to_string();
644            lines.push((name.clone(), oid.clone()));
645
646            if name.starts_with("refs/tags/") {
647                if let Some(peeled) = self.peel_tag_target(&oid, objects) {
648                    if peeled != oid {
649                        lines.push((format!("{}^{{}}", name), peeled));
650                    }
651                }
652            }
653        }
654
655        lines.sort_by(|a, b| a.0.cmp(&b.0));
656
657        let mut content = String::new();
658        for (name, oid) in lines {
659            content.push_str(&oid);
660            content.push('\t');
661            content.push_str(&name);
662            content.push('\n');
663        }
664        content
665    }
666
667    async fn build_info_dir(
668        &self,
669        refs: &HashMap<String, String>,
670        objects: &HashMap<String, Vec<u8>>,
671    ) -> Result<Cid> {
672        let info_refs = self.build_info_refs_content(refs, objects);
673        let (info_refs_cid, info_refs_size) = self
674            .tree
675            .put(info_refs.as_bytes())
676            .await
677            .map_err(|e| Error::StorageError(format!("put info/refs: {}", e)))?;
678
679        self.tree
680            .put_directory(vec![
681                DirEntry::from_cid("refs", &info_refs_cid).with_size(info_refs_size)
682            ])
683            .await
684            .map_err(|e| Error::StorageError(format!("put info dir: {}", e)))
685    }
686
687    /// Build the hashtree and return the root CID (hash + encryption key)
688    pub fn build_tree(&self) -> Result<Cid> {
689        // Check if we have a cached root
690        if let Ok(root) = self.root_cid.read() {
691            if let Some(ref cid) = *root {
692                return Ok(cid.clone());
693            }
694        }
695
696        if let Err(err) = self.evict_if_needed() {
697            debug!("pre-build eviction skipped: {}", err);
698        }
699
700        let objects = self
701            .objects
702            .read()
703            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
704        let refs = self
705            .refs
706            .read()
707            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
708
709        // Get default branch from HEAD or find first branch ref
710        let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
711            let branch = head.strip_prefix("ref: ").map(String::from);
712            let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
713            (branch, sha)
714        } else {
715            // No HEAD ref - find first refs/heads/* ref directly
716            let mut branch_info: Option<(String, String)> = None;
717            for (ref_name, sha) in refs.iter() {
718                if ref_name.starts_with("refs/heads/") {
719                    branch_info = Some((ref_name.clone(), sha.clone()));
720                    break;
721                }
722            }
723            match branch_info {
724                Some((branch, sha)) => (Some(branch), Some(sha)),
725                None => (None, None),
726            }
727        };
728
729        // Get tree SHA from commit
730        let tree_sha = commit_sha
731            .as_ref()
732            .and_then(|sha| self.get_commit_tree(sha, &objects));
733
734        // Clone objects for async block
735        let objects_clone = objects.clone();
736
737        let root_cid = self.runtime.block_on(async {
738            // Build objects directory
739            let objects_cid = self.build_objects_dir(&objects).await?;
740
741            // Build refs directory
742            let refs_cid = self.build_refs_dir(&refs).await?;
743
744            // Build dumb-HTTP info directory
745            let info_cid = self.build_info_dir(&refs, &objects_clone).await?;
746
747            // Build HEAD file - use default_branch if no explicit HEAD
748            // Git expects HEAD to end with newline, so add it if missing
749            let head_content = refs.get("HEAD")
750                .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
751                .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
752                .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
753            debug!("HEAD content: {:?}", head_content);
754            let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
755                .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
756            debug!("HEAD hash: {}", hex::encode(head_cid.hash));
757
758            // Build .git directory - use from_cid to preserve encryption keys
759            let mut git_entries = vec![
760                DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
761                DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir),
762                DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
763                DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
764            ];
765
766            // Add config if we have a default branch
767            if let Some(ref branch) = default_branch {
768                let config = format!(
769                    "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
770                    branch.trim_start_matches("refs/heads/")
771                );
772                let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
773                    .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
774                git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
775            }
776
777            // Build and add index file if we have a tree SHA
778            if let Some(ref tree_oid) = tree_sha {
779                match self.build_index_file(tree_oid, &objects_clone) {
780                    Ok(index_data) => {
781                        let (index_cid, index_size) = self.tree.put(&index_data).await
782                            .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
783                        git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
784                        info!("Added git index file ({} bytes)", index_data.len());
785                    }
786                    Err(e) => {
787                        debug!("Failed to build git index file: {} - continuing without index", e);
788                    }
789                }
790            }
791
792            let git_cid = self.tree.put_directory(git_entries).await
793                .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
794
795            // Build root entries starting with .git
796            // Use from_cid to preserve the encryption key
797            let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
798
799            // Add working tree files if we have a tree SHA
800            if let Some(ref tree_oid) = tree_sha {
801                let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
802                root_entries.extend(working_tree_entries);
803                info!("Added {} working tree entries to root", root_entries.len() - 1);
804            }
805
806            // Sort entries for deterministic ordering
807            root_entries.sort_by(|a, b| a.name.cmp(&b.name));
808
809            let root_cid = self.tree.put_directory(root_entries).await
810                .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
811
812            info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
813                hex::encode(root_cid.hash),
814                root_cid.key.is_some(),
815                hex::encode(git_cid.hash));
816
817            Ok::<Cid, Error>(root_cid)
818        })?;
819
820        // Cache the root CID
821        if let Ok(mut root) = self.root_cid.write() {
822            *root = Some(root_cid.clone());
823        }
824
825        Ok(root_cid)
826    }
827
828    /// Build working tree entries from a git tree object
829    async fn build_working_tree_entries(
830        &self,
831        tree_oid: &str,
832        objects: &HashMap<String, Vec<u8>>,
833    ) -> Result<Vec<DirEntry>> {
834        let mut entries = Vec::new();
835
836        // Get tree content
837        let (obj_type, content) = self
838            .get_object_content(tree_oid, objects)
839            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
840
841        if obj_type != ObjectType::Tree {
842            return Err(Error::InvalidObjectType(format!(
843                "expected tree, got {:?}",
844                obj_type
845            )));
846        }
847
848        // Parse tree entries
849        let tree_entries = parse_tree(&content)?;
850
851        for entry in tree_entries {
852            let oid_hex = entry.oid.to_hex();
853
854            if entry.is_tree() {
855                // Recursively build subdirectory
856                let sub_entries = self
857                    .build_working_tree_entries_boxed(&oid_hex, objects)
858                    .await?;
859
860                // Create subdirectory in hashtree
861                let dir_cid =
862                    self.tree.put_directory(sub_entries).await.map_err(|e| {
863                        Error::StorageError(format!("put dir {}: {}", entry.name, e))
864                    })?;
865
866                // Use from_cid to preserve encryption key
867                entries
868                    .push(DirEntry::from_cid(&entry.name, &dir_cid).with_link_type(LinkType::Dir));
869            } else {
870                // Get blob content
871                if let Some((ObjectType::Blob, blob_content)) =
872                    self.get_object_content(&oid_hex, objects)
873                {
874                    // Use put() instead of put_blob() to chunk large files
875                    let (cid, size) = self.tree.put(&blob_content).await.map_err(|e| {
876                        Error::StorageError(format!("put blob {}: {}", entry.name, e))
877                    })?;
878
879                    // Use from_cid to preserve encryption key
880                    entries.push(DirEntry::from_cid(&entry.name, &cid).with_size(size));
881                }
882            }
883        }
884
885        // Sort for deterministic ordering
886        entries.sort_by(|a, b| a.name.cmp(&b.name));
887
888        Ok(entries)
889    }
890
891    /// Boxed version for async recursion
892    fn build_working_tree_entries_boxed<'a>(
893        &'a self,
894        tree_oid: &'a str,
895        objects: &'a HashMap<String, Vec<u8>>,
896    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
897        Box::pin(self.build_working_tree_entries(tree_oid, objects))
898    }
899
900    /// Build the objects directory using HashTree
901    async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
902        let mut top_entries = Vec::new();
903
904        if !objects.is_empty() {
905            // Group objects by first 2 characters of SHA (git loose object structure)
906            // Git expects objects/XX/YYYYYY... where XX is first 2 hex chars
907            let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
908            for (oid, data) in objects {
909                let prefix = &oid[..2];
910                let suffix = &oid[2..];
911                buckets
912                    .entry(prefix.to_string())
913                    .or_default()
914                    .push((suffix.to_string(), data.clone()));
915            }
916
917            // Build subdirectories for each prefix
918            for (prefix, objs) in buckets {
919                let mut sub_entries = Vec::new();
920                for (suffix, data) in objs {
921                    // Use put() instead of put_blob() to chunk large objects
922                    // Git blobs can be >5MB which exceeds blossom server limits
923                    let (cid, size) = self.tree.put(&data).await.map_err(|e| {
924                        Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e))
925                    })?;
926                    // Use from_cid to preserve encryption key
927                    sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
928                }
929                // Sort for deterministic ordering
930                sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
931
932                let sub_cid =
933                    self.tree.put_directory(sub_entries).await.map_err(|e| {
934                        Error::StorageError(format!("put objects/{}: {}", prefix, e))
935                    })?;
936                top_entries
937                    .push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
938            }
939        }
940
941        let (packs_cid, packs_size) = self
942            .tree
943            .put(b"")
944            .await
945            .map_err(|e| Error::StorageError(format!("put objects/info/packs: {}", e)))?;
946        let info_cid = self
947            .tree
948            .put_directory(vec![
949                DirEntry::from_cid("packs", &packs_cid).with_size(packs_size)
950            ])
951            .await
952            .map_err(|e| Error::StorageError(format!("put objects/info: {}", e)))?;
953        top_entries.push(DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir));
954
955        // Sort for deterministic ordering
956        top_entries.sort_by(|a, b| a.name.cmp(&b.name));
957
958        let entry_count = top_entries.len();
959        let cid = self
960            .tree
961            .put_directory(top_entries)
962            .await
963            .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
964
965        debug!(
966            "Built objects dir with {} entries: {}",
967            entry_count,
968            hex::encode(cid.hash)
969        );
970        Ok(cid)
971    }
972
973    /// Build the refs directory using HashTree
974    async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
975        let mut root = RefDirectory::default();
976
977        for (ref_name, value) in refs {
978            let parts: Vec<&str> = ref_name.split('/').collect();
979            if parts.len() >= 3 && parts[0] == "refs" {
980                root.insert(&parts[1..], value.clone());
981            }
982        }
983
984        let mut ref_entries = self.build_ref_entries_recursive(&root, "refs").await?;
985
986        if ref_entries.is_empty() {
987            // Return empty directory Cid
988            let empty_cid = self
989                .tree
990                .put_directory(vec![])
991                .await
992                .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
993            return Ok(empty_cid);
994        }
995
996        ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
997
998        let refs_cid = self
999            .tree
1000            .put_directory(ref_entries)
1001            .await
1002            .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
1003        debug!("refs dir -> {}", hex::encode(refs_cid.hash));
1004        Ok(refs_cid)
1005    }
1006
1007    fn build_ref_entries_recursive<'a>(
1008        &'a self,
1009        dir: &'a RefDirectory,
1010        prefix: &'a str,
1011    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
1012        Box::pin(async move {
1013            let mut entries = Vec::new();
1014
1015            for (name, value) in &dir.files {
1016                let (cid, size) = self
1017                    .tree
1018                    .put(value.as_bytes())
1019                    .await
1020                    .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
1021                debug!("{}/{} -> blob {}", prefix, name, hex::encode(cid.hash));
1022                entries.push(DirEntry::from_cid(name, &cid).with_size(size));
1023            }
1024
1025            for (name, child) in &dir.dirs {
1026                let child_prefix = format!("{prefix}/{name}");
1027                let child_entries = self
1028                    .build_ref_entries_recursive(child, &child_prefix)
1029                    .await?;
1030                let child_cid =
1031                    self.tree.put_directory(child_entries).await.map_err(|e| {
1032                        Error::StorageError(format!("put {child_prefix} dir: {}", e))
1033                    })?;
1034                debug!("{} dir -> {}", child_prefix, hex::encode(child_cid.hash));
1035                entries.push(DirEntry::from_cid(name, &child_cid).with_link_type(LinkType::Dir));
1036            }
1037
1038            entries.sort_by(|a, b| a.name.cmp(&b.name));
1039            Ok(entries)
1040        })
1041    }
1042
1043    /// Build git index file from tree entries
1044    /// Returns the raw binary content of the index file
1045    fn build_index_file(
1046        &self,
1047        tree_oid: &str,
1048        objects: &HashMap<String, Vec<u8>>,
1049    ) -> Result<Vec<u8>> {
1050        // Collect all file entries from the tree (recursively)
1051        let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); // (path, sha1, mode, size)
1052        self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
1053
1054        // Sort entries by path (git index requirement)
1055        entries.sort_by(|a, b| a.0.cmp(&b.0));
1056
1057        let entry_count = entries.len() as u32;
1058        debug!("Building git index with {} entries", entry_count);
1059
1060        // Build index content
1061        let mut index_data = Vec::new();
1062
1063        // Header: DIRC + version 2 + entry count
1064        index_data.extend_from_slice(b"DIRC");
1065        index_data.extend_from_slice(&2u32.to_be_bytes()); // version 2
1066        index_data.extend_from_slice(&entry_count.to_be_bytes());
1067
1068        // Current time for ctime/mtime (doesn't matter much for our use case)
1069        let now_sec = std::time::SystemTime::now()
1070            .duration_since(std::time::UNIX_EPOCH)
1071            .unwrap_or_default()
1072            .as_secs() as u32;
1073
1074        for (path, sha1, mode, size) in &entries {
1075            let entry_start = index_data.len();
1076
1077            // ctime sec, nsec
1078            index_data.extend_from_slice(&now_sec.to_be_bytes());
1079            index_data.extend_from_slice(&0u32.to_be_bytes());
1080            // mtime sec, nsec
1081            index_data.extend_from_slice(&now_sec.to_be_bytes());
1082            index_data.extend_from_slice(&0u32.to_be_bytes());
1083            // dev, ino (use 0)
1084            index_data.extend_from_slice(&0u32.to_be_bytes());
1085            index_data.extend_from_slice(&0u32.to_be_bytes());
1086            // mode
1087            index_data.extend_from_slice(&mode.to_be_bytes());
1088            // uid, gid (use 0)
1089            index_data.extend_from_slice(&0u32.to_be_bytes());
1090            index_data.extend_from_slice(&0u32.to_be_bytes());
1091            // file size
1092            index_data.extend_from_slice(&size.to_be_bytes());
1093            // SHA-1
1094            index_data.extend_from_slice(sha1);
1095            // flags: path length (max 0xFFF) in low 12 bits
1096            let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
1097            index_data.extend_from_slice(&path_len.to_be_bytes());
1098            // path (NUL-terminated)
1099            index_data.extend_from_slice(path.as_bytes());
1100            index_data.push(0); // NUL terminator
1101
1102            // Pad to 8-byte boundary relative to entry start
1103            let entry_len = index_data.len() - entry_start;
1104            let padding = (8 - (entry_len % 8)) % 8;
1105            index_data.extend(std::iter::repeat_n(0, padding));
1106        }
1107
1108        // Calculate SHA-1 checksum of everything and append
1109        let mut hasher = Sha1::new();
1110        hasher.update(&index_data);
1111        let checksum = hasher.finalize();
1112        index_data.extend_from_slice(&checksum);
1113
1114        debug!(
1115            "Built git index: {} bytes, {} entries",
1116            index_data.len(),
1117            entry_count
1118        );
1119        Ok(index_data)
1120    }
1121
1122    /// Collect file entries from a git tree for building the index
1123    fn collect_tree_entries_for_index(
1124        &self,
1125        tree_oid: &str,
1126        objects: &HashMap<String, Vec<u8>>,
1127        prefix: &str,
1128        entries: &mut Vec<(String, [u8; 20], u32, u32)>,
1129    ) -> Result<()> {
1130        let (obj_type, content) = self
1131            .get_object_content(tree_oid, objects)
1132            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
1133
1134        if obj_type != ObjectType::Tree {
1135            return Err(Error::InvalidObjectType(format!(
1136                "expected tree, got {:?}",
1137                obj_type
1138            )));
1139        }
1140
1141        let tree_entries = parse_tree(&content)?;
1142
1143        for entry in tree_entries {
1144            let path = if prefix.is_empty() {
1145                entry.name.clone()
1146            } else {
1147                format!("{}/{}", prefix, entry.name)
1148            };
1149
1150            let oid_hex = entry.oid.to_hex();
1151
1152            if entry.is_tree() {
1153                // Recursively process subdirectory
1154                self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
1155            } else {
1156                // Get blob content for size and SHA-1
1157                if let Some((ObjectType::Blob, blob_content)) =
1158                    self.get_object_content(&oid_hex, objects)
1159                {
1160                    // Convert hex SHA to bytes
1161                    let mut sha1_bytes = [0u8; 20];
1162                    if let Ok(bytes) = hex::decode(&oid_hex) {
1163                        if bytes.len() == 20 {
1164                            sha1_bytes.copy_from_slice(&bytes);
1165                        }
1166                    }
1167
1168                    // Mode: use entry.mode or default to regular file
1169                    let mode = entry.mode;
1170                    let size = blob_content.len() as u32;
1171
1172                    entries.push((path, sha1_bytes, mode, size));
1173                }
1174            }
1175        }
1176
1177        Ok(())
1178    }
1179
1180    /// Get the underlying store
1181    pub fn store(&self) -> &Arc<LocalStore> {
1182        &self.store
1183    }
1184
1185    /// Get the HashTree for direct access
1186    #[allow(dead_code)]
1187    pub fn hashtree(&self) -> &HashTree<LocalStore> {
1188        &self.tree
1189    }
1190
1191    /// Push all blobs to file servers
1192    #[allow(dead_code)]
1193    pub fn push_to_file_servers(
1194        &self,
1195        blossom: &hashtree_blossom::BlossomClient,
1196    ) -> Result<(usize, usize)> {
1197        let hashes = self
1198            .store
1199            .list()
1200            .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
1201
1202        info!("Pushing {} blobs to file servers", hashes.len());
1203
1204        let mut uploaded = 0;
1205        let mut existed = 0;
1206
1207        self.runtime.block_on(async {
1208            for hash in &hashes {
1209                let hex_hash = hex::encode(hash);
1210                let data = match self.store.get_sync(hash) {
1211                    Ok(Some(d)) => d,
1212                    _ => continue,
1213                };
1214
1215                match blossom.upload_if_missing(&data).await {
1216                    Ok((_, true)) => {
1217                        debug!("Uploaded {}", &hex_hash[..12]);
1218                        uploaded += 1;
1219                    }
1220                    Ok((_, false)) => {
1221                        existed += 1;
1222                    }
1223                    Err(e) => {
1224                        debug!("Failed to upload {}: {}", &hex_hash[..12], e);
1225                    }
1226                }
1227            }
1228        });
1229
1230        info!(
1231            "Upload complete: {} new, {} already existed",
1232            uploaded, existed
1233        );
1234        Ok((uploaded, existed))
1235    }
1236
1237    /// Clear all state (for testing or re-initialization)
1238    #[allow(dead_code)]
1239    pub fn clear(&self) -> Result<()> {
1240        let mut objects = self
1241            .objects
1242            .write()
1243            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1244        let mut refs = self
1245            .refs
1246            .write()
1247            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1248        let mut root = self
1249            .root_cid
1250            .write()
1251            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1252
1253        objects.clear();
1254        refs.clear();
1255        *root = None;
1256        Ok(())
1257    }
1258}
1259
1260#[cfg(test)]
1261mod tests {
1262    use super::*;
1263    use hashtree_core::store::Store;
1264    use hashtree_core::{sha256, LinkType};
1265    use std::io::{Read, Write};
1266    use std::net::{TcpListener, TcpStream};
1267    use std::path::Path;
1268    use std::process::{Child, Command, Stdio};
1269    #[cfg(feature = "lmdb")]
1270    use std::sync::atomic::{AtomicUsize, Ordering};
1271    use std::time::{Duration, Instant};
1272    use tempfile::TempDir;
1273
1274    fn create_test_storage() -> (GitStorage, TempDir) {
1275        let temp_dir = TempDir::new().unwrap();
1276        let storage = GitStorage::open(temp_dir.path()).unwrap();
1277        (storage, temp_dir)
1278    }
1279
1280    fn create_test_storage_with_limit(max_size_bytes: u64) -> (GitStorage, TempDir) {
1281        let temp_dir = TempDir::new().unwrap();
1282        let storage = GitStorage::open_with_backend_and_max_bytes(
1283            temp_dir.path(),
1284            StorageBackend::Fs,
1285            max_size_bytes,
1286        )
1287        .unwrap();
1288        (storage, temp_dir)
1289    }
1290
1291    fn local_total_bytes(storage: &GitStorage) -> u64 {
1292        match storage.store().as_ref() {
1293            LocalStore::Fs(store) => store.stats().unwrap().total_bytes,
1294            #[cfg(feature = "lmdb")]
1295            LocalStore::Lmdb(store) => store.stats().unwrap().total_bytes,
1296        }
1297    }
1298
1299    fn write_test_commit(storage: &GitStorage) -> ObjectId {
1300        let blob_oid = storage
1301            .write_raw_object(ObjectType::Blob, b"hello from hashtree\n")
1302            .unwrap();
1303
1304        let mut tree_content = Vec::new();
1305        tree_content.extend_from_slice(b"100644 README.md\0");
1306        tree_content.extend_from_slice(&hex::decode(blob_oid.to_hex()).unwrap());
1307        let tree_oid = storage
1308            .write_raw_object(ObjectType::Tree, &tree_content)
1309            .unwrap();
1310
1311        let commit_content = format!(
1312            "tree {}\nauthor Test User <test@example.com> 0 +0000\ncommitter Test User <test@example.com> 0 +0000\n\nInitial commit\n",
1313            tree_oid.to_hex()
1314        );
1315        storage
1316            .write_raw_object(ObjectType::Commit, commit_content.as_bytes())
1317            .unwrap()
1318    }
1319
1320    fn export_tree_to_fs<S: Store>(
1321        runtime: &RuntimeExecutor,
1322        tree: &HashTree<S>,
1323        cid: &Cid,
1324        dst: &Path,
1325    ) {
1326        std::fs::create_dir_all(dst).unwrap();
1327        let entries = runtime.block_on(tree.list_directory(cid)).unwrap();
1328        for entry in entries {
1329            let entry_cid = Cid {
1330                hash: entry.hash,
1331                key: entry.key,
1332            };
1333            let path = dst.join(&entry.name);
1334            match entry.link_type {
1335                LinkType::Dir => export_tree_to_fs(runtime, tree, &entry_cid, &path),
1336                LinkType::Blob | LinkType::File => {
1337                    let data = runtime
1338                        .block_on(tree.get(&entry_cid, None))
1339                        .unwrap()
1340                        .unwrap();
1341                    if let Some(parent) = path.parent() {
1342                        std::fs::create_dir_all(parent).unwrap();
1343                    }
1344                    std::fs::write(path, data).unwrap();
1345                }
1346            }
1347        }
1348    }
1349
1350    fn spawn_http_server(root: &Path, port: u16) -> Child {
1351        Command::new("python3")
1352            .args([
1353                "-m",
1354                "http.server",
1355                &port.to_string(),
1356                "--bind",
1357                "127.0.0.1",
1358            ])
1359            .current_dir(root)
1360            .stdout(Stdio::null())
1361            .stderr(Stdio::null())
1362            .spawn()
1363            .expect("spawn python http server")
1364    }
1365
1366    fn wait_for_http_server(server: &mut Child, port: u16, path: &str) {
1367        let deadline = Instant::now() + Duration::from_secs(5);
1368
1369        loop {
1370            if let Some(status) = server.try_wait().expect("check http server status") {
1371                panic!("python http server exited before becoming ready: {status}");
1372            }
1373
1374            if let Ok(mut stream) = TcpStream::connect(("127.0.0.1", port)) {
1375                stream
1376                    .set_read_timeout(Some(Duration::from_millis(200)))
1377                    .expect("set read timeout");
1378                stream
1379                    .set_write_timeout(Some(Duration::from_millis(200)))
1380                    .expect("set write timeout");
1381                let request =
1382                    format!("GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n");
1383                if stream.write_all(request.as_bytes()).is_ok() {
1384                    let mut response = String::new();
1385                    if stream.read_to_string(&mut response).is_ok()
1386                        && response.starts_with("HTTP/1.0 200")
1387                    {
1388                        return;
1389                    }
1390                }
1391            }
1392
1393            if Instant::now() >= deadline {
1394                panic!("python http server did not become ready on port {port}");
1395            }
1396            std::thread::sleep(Duration::from_millis(50));
1397        }
1398    }
1399
1400    #[test]
1401    fn test_import_ref() {
1402        let (storage, _temp) = create_test_storage();
1403
1404        // Import a ref
1405        storage
1406            .import_ref("refs/heads/main", "abc123def456")
1407            .unwrap();
1408
1409        // Check it exists
1410        assert!(storage.has_ref("refs/heads/main").unwrap());
1411
1412        // Check value via list_refs
1413        let refs = storage.list_refs().unwrap();
1414        assert_eq!(
1415            refs.get("refs/heads/main"),
1416            Some(&"abc123def456".to_string())
1417        );
1418    }
1419
1420    #[cfg(feature = "lmdb")]
1421    #[test]
1422    fn test_local_store_falls_back_to_fs_when_lmdb_open_returns_enosys() {
1423        let temp_dir = TempDir::new().unwrap();
1424        let fs_calls = AtomicUsize::new(0);
1425        let lmdb_calls = AtomicUsize::new(0);
1426
1427        let store = LocalStore::new_for_backend_with_openers(
1428            temp_dir.path(),
1429            StorageBackend::Lmdb,
1430            0,
1431            |path, max_bytes| {
1432                fs_calls.fetch_add(1, Ordering::SeqCst);
1433                LocalStore::open_fs_store(path, max_bytes)
1434            },
1435            |_path, _max_bytes| {
1436                lmdb_calls.fetch_add(1, Ordering::SeqCst);
1437                Err(StoreError::Io(std::io::Error::from_raw_os_error(
1438                    libc::ENOSYS,
1439                )))
1440            },
1441        )
1442        .unwrap();
1443
1444        assert!(matches!(store, LocalStore::Fs(_)));
1445        assert_eq!(lmdb_calls.load(Ordering::SeqCst), 1);
1446        assert_eq!(fs_calls.load(Ordering::SeqCst), 1);
1447    }
1448
1449    #[cfg(feature = "lmdb")]
1450    #[test]
1451    fn test_local_store_does_not_fallback_on_unrelated_lmdb_errors() {
1452        let temp_dir = TempDir::new().unwrap();
1453        let fs_calls = AtomicUsize::new(0);
1454
1455        let result = LocalStore::new_for_backend_with_openers(
1456            temp_dir.path(),
1457            StorageBackend::Lmdb,
1458            0,
1459            |path, max_bytes| {
1460                fs_calls.fetch_add(1, Ordering::SeqCst);
1461                LocalStore::open_fs_store(path, max_bytes)
1462            },
1463            |_path, _max_bytes| {
1464                Err(StoreError::Io(std::io::Error::from_raw_os_error(
1465                    libc::EACCES,
1466                )))
1467            },
1468        );
1469
1470        assert!(
1471            matches!(result, Err(StoreError::Io(io_error)) if io_error.raw_os_error() == Some(libc::EACCES))
1472        );
1473        assert_eq!(fs_calls.load(Ordering::SeqCst), 0);
1474    }
1475
1476    #[test]
1477    fn test_import_multiple_refs_preserves_all() {
1478        let (storage, _temp) = create_test_storage();
1479
1480        // Import multiple refs (simulating loading from remote)
1481        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1482        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1483        storage
1484            .import_ref("refs/heads/feature", "sha_feature")
1485            .unwrap();
1486
1487        // All should exist
1488        assert!(storage.has_ref("refs/heads/main").unwrap());
1489        assert!(storage.has_ref("refs/heads/dev").unwrap());
1490        assert!(storage.has_ref("refs/heads/feature").unwrap());
1491
1492        // Now write a new ref (simulating push)
1493        storage
1494            .write_ref(
1495                "refs/heads/new-branch",
1496                &Ref::Direct(
1497                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1498                ),
1499            )
1500            .unwrap();
1501
1502        // Original refs should still exist
1503        let refs = storage.list_refs().unwrap();
1504        assert_eq!(refs.len(), 4);
1505        assert!(refs.contains_key("refs/heads/main"));
1506        assert!(refs.contains_key("refs/heads/dev"));
1507        assert!(refs.contains_key("refs/heads/feature"));
1508        assert!(refs.contains_key("refs/heads/new-branch"));
1509    }
1510
1511    #[test]
1512    fn test_import_compressed_object() {
1513        let (storage, _temp) = create_test_storage();
1514
1515        // Create a fake compressed object
1516        let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; // fake zlib data
1517
1518        storage
1519            .import_compressed_object("abc123def456", fake_compressed.clone())
1520            .unwrap();
1521
1522        // Check object count
1523        assert_eq!(storage.object_count().unwrap(), 1);
1524    }
1525
1526    #[test]
1527    fn test_write_ref_overwrites_imported() {
1528        let (storage, _temp) = create_test_storage();
1529
1530        // Import a ref
1531        storage.import_ref("refs/heads/main", "old_sha").unwrap();
1532
1533        // Write same ref with new value
1534        storage
1535            .write_ref(
1536                "refs/heads/main",
1537                &Ref::Direct(
1538                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1539                ),
1540            )
1541            .unwrap();
1542
1543        // Should have new value
1544        let refs = storage.list_refs().unwrap();
1545        assert_eq!(
1546            refs.get("refs/heads/main"),
1547            Some(&"0123456789abcdef0123456789abcdef01234567".to_string())
1548        );
1549    }
1550
1551    #[test]
1552    fn test_delete_ref_preserves_others() {
1553        let (storage, _temp) = create_test_storage();
1554
1555        // Import multiple refs
1556        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1557        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1558
1559        // Delete one
1560        storage.delete_ref("refs/heads/dev").unwrap();
1561
1562        // Other should still exist
1563        assert!(storage.has_ref("refs/heads/main").unwrap());
1564        assert!(!storage.has_ref("refs/heads/dev").unwrap());
1565    }
1566
1567    #[test]
1568    fn test_clear_removes_all() {
1569        let (storage, _temp) = create_test_storage();
1570
1571        // Import refs and objects
1572        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1573        storage
1574            .import_compressed_object("obj1", vec![1, 2, 3])
1575            .unwrap();
1576
1577        // Clear
1578        storage.clear().unwrap();
1579
1580        // All gone
1581        assert!(!storage.has_ref("refs/heads/main").unwrap());
1582        assert_eq!(storage.object_count().unwrap(), 0);
1583    }
1584
1585    #[test]
1586    fn test_evict_if_needed_respects_configured_limit() {
1587        let (storage, _temp) = create_test_storage_with_limit(1_024);
1588
1589        storage
1590            .write_raw_object(ObjectType::Blob, &vec![b'a'; 900])
1591            .unwrap();
1592        storage
1593            .write_raw_object(ObjectType::Blob, &vec![b'b'; 900])
1594            .unwrap();
1595        storage
1596            .write_ref(
1597                "refs/heads/main",
1598                &Ref::Direct(
1599                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1600                ),
1601            )
1602            .unwrap();
1603
1604        storage.build_tree().unwrap();
1605
1606        let before = local_total_bytes(&storage);
1607        assert!(before > 1_024);
1608
1609        let freed = storage.evict_if_needed().unwrap();
1610        assert!(freed > 0);
1611
1612        let after = local_total_bytes(&storage);
1613        assert!(after <= 1_024);
1614    }
1615
1616    #[test]
1617    fn test_build_tree_evicts_stale_blobs_before_writing_new_tree() {
1618        let max_size_bytes = 16 * 1024;
1619        let (storage, _temp) = create_test_storage_with_limit(max_size_bytes);
1620
1621        let stale_blobs = vec![
1622            vec![b'x'; 7 * 1024],
1623            vec![b'y'; 7 * 1024],
1624            vec![b'z'; 7 * 1024],
1625        ];
1626        let stale_hashes: Vec<Hash> = stale_blobs.iter().map(|blob| sha256(blob)).collect();
1627
1628        for (hash, blob) in stale_hashes.iter().zip(stale_blobs) {
1629            storage
1630                .runtime
1631                .block_on(storage.store().put(*hash, blob))
1632                .unwrap();
1633        }
1634
1635        let before = local_total_bytes(&storage);
1636        assert!(before > max_size_bytes);
1637
1638        let commit_oid = write_test_commit(&storage);
1639        storage
1640            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1641            .unwrap();
1642        storage
1643            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1644            .unwrap();
1645
1646        storage.build_tree().unwrap();
1647
1648        let evicted_stale = stale_hashes
1649            .iter()
1650            .filter(|hash| !storage.runtime.block_on(storage.store().has(hash)).unwrap())
1651            .count();
1652
1653        assert!(
1654            evicted_stale > 0,
1655            "expected build_tree preflight eviction to remove stale blobs before writing"
1656        );
1657    }
1658
1659    #[test]
1660    fn test_build_tree_adds_dumb_http_metadata() {
1661        let (storage, _temp) = create_test_storage();
1662        let commit_oid = write_test_commit(&storage);
1663        let tag_content = format!(
1664            "object {}\ntype commit\ntag v1.0.0\ntagger Test User <test@example.com> 0 +0000\n\nrelease\n",
1665            commit_oid.to_hex()
1666        );
1667        let tag_oid = storage
1668            .write_raw_object(ObjectType::Tag, tag_content.as_bytes())
1669            .unwrap();
1670
1671        storage
1672            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1673            .unwrap();
1674        storage
1675            .write_ref("refs/tags/v1.0.0", &Ref::Direct(tag_oid))
1676            .unwrap();
1677        storage
1678            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1679            .unwrap();
1680
1681        let root_cid = storage.build_tree().unwrap();
1682
1683        let info_refs_cid = storage
1684            .runtime
1685            .block_on(storage.tree.resolve_path(&root_cid, ".git/info/refs"))
1686            .unwrap()
1687            .expect("info/refs exists");
1688        let info_refs = storage
1689            .runtime
1690            .block_on(storage.tree.get(&info_refs_cid, None))
1691            .unwrap()
1692            .unwrap();
1693        let info_refs = String::from_utf8(info_refs).unwrap();
1694
1695        assert_eq!(
1696            info_refs,
1697            format!(
1698                "{commit}\trefs/heads/main\n{tag}\trefs/tags/v1.0.0\n{commit}\trefs/tags/v1.0.0^{{}}\n",
1699                commit = commit_oid.to_hex(),
1700                tag = tag_oid.to_hex()
1701            )
1702        );
1703
1704        let packs_cid = storage
1705            .runtime
1706            .block_on(
1707                storage
1708                    .tree
1709                    .resolve_path(&root_cid, ".git/objects/info/packs"),
1710            )
1711            .unwrap()
1712            .expect("objects/info/packs exists");
1713        let packs = storage
1714            .runtime
1715            .block_on(storage.tree.get(&packs_cid, None))
1716            .unwrap()
1717            .unwrap();
1718        assert!(packs.is_empty(), "objects/info/packs should be empty");
1719    }
1720
1721    #[test]
1722    fn test_build_tree_materializes_loose_refs_at_git_paths() {
1723        let (storage, _temp) = create_test_storage();
1724        let commit_oid = write_test_commit(&storage);
1725
1726        storage
1727            .write_ref("refs/heads/master", &Ref::Direct(commit_oid))
1728            .unwrap();
1729        storage
1730            .write_ref("refs/heads/codex/meshrouter-prod", &Ref::Direct(commit_oid))
1731            .unwrap();
1732        storage
1733            .write_ref("refs/tags/v1.0.0", &Ref::Direct(commit_oid))
1734            .unwrap();
1735        storage
1736            .write_ref("HEAD", &Ref::Symbolic("refs/heads/master".to_string()))
1737            .unwrap();
1738
1739        let root_cid = storage.build_tree().unwrap();
1740
1741        for path in [
1742            ".git/refs/heads/master",
1743            ".git/refs/heads/codex/meshrouter-prod",
1744            ".git/refs/tags/v1.0.0",
1745        ] {
1746            let ref_cid = storage
1747                .runtime
1748                .block_on(storage.tree.resolve_path(&root_cid, path))
1749                .unwrap()
1750                .unwrap_or_else(|| panic!("{path} should exist"));
1751            let ref_value = storage
1752                .runtime
1753                .block_on(storage.tree.get(&ref_cid, None))
1754                .unwrap()
1755                .unwrap();
1756            assert_eq!(
1757                String::from_utf8(ref_value).unwrap(),
1758                commit_oid.to_hex(),
1759                "{path} should contain the ref target",
1760            );
1761        }
1762    }
1763
1764    #[test]
1765    fn test_materialized_tree_supports_static_http_clone_from_git_dir() {
1766        let (storage, _temp) = create_test_storage();
1767        let commit_oid = write_test_commit(&storage);
1768        storage
1769            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1770            .unwrap();
1771        storage
1772            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1773            .unwrap();
1774
1775        let root_cid = storage.build_tree().unwrap();
1776        let export_dir = TempDir::new().unwrap();
1777        let repo_dir = export_dir.path().join("repo");
1778        export_tree_to_fs(&storage.runtime, &storage.tree, &root_cid, &repo_dir);
1779
1780        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
1781        let port = listener.local_addr().unwrap().port();
1782        drop(listener);
1783
1784        let mut server = spawn_http_server(export_dir.path(), port);
1785        wait_for_http_server(&mut server, port, "/repo/.git/HEAD");
1786
1787        let clone_dir = TempDir::new().unwrap();
1788        let clone_path = clone_dir.path().join("clone");
1789        let output = Command::new("git")
1790            .args([
1791                "clone",
1792                &format!("http://127.0.0.1:{port}/repo/.git", port = port),
1793                clone_path.to_str().unwrap(),
1794            ])
1795            .output()
1796            .unwrap();
1797
1798        let _ = server.kill();
1799        let _ = server.wait();
1800
1801        assert!(
1802            output.status.success(),
1803            "git clone failed: {}",
1804            String::from_utf8_lossy(&output.stderr)
1805        );
1806        assert_eq!(
1807            std::fs::read_to_string(clone_path.join("README.md")).unwrap(),
1808            "hello from hashtree\n"
1809        );
1810    }
1811}