Skip to main content

git_remote_htree/git/
storage.rs

1//! Hashtree-backed git object and ref storage with configurable persistence
2//!
3//! Stores git objects and refs in a hashtree merkle tree:
4//!   root/
5//!     .git/
6//!       HEAD -> "ref: refs/heads/main"
7//!       refs/heads/main -> <commit-sha1>
8//!       info/refs -> dumb-HTTP ref advertisement
9//!       objects/XX/YYYY... -> zlib-compressed loose object (standard git layout)
10//!       objects/info/packs -> dumb-HTTP pack advertisement
11//!
12//! The root hash (SHA-256) is the content-addressed identifier for the entire repo state.
13
14use flate2::read::ZlibDecoder;
15use flate2::write::ZlibEncoder;
16use flate2::Compression;
17use hashtree_config::{Config, StorageBackend};
18use hashtree_core::store::{Store, StoreError, StoreStats};
19use hashtree_core::types::Hash;
20use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
21use hashtree_fs::FsBlobStore;
22#[cfg(feature = "lmdb")]
23use hashtree_lmdb::LmdbBlobStore;
24use sha1::{Digest, Sha1};
25use std::collections::{BTreeMap, HashMap};
26use std::io::{Read, Write};
27use std::path::Path;
28use std::sync::Arc;
29use tokio::runtime::{Handle, Runtime};
30use tracing::{debug, info};
31
32use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
33use super::refs::{validate_ref_name, Ref};
34use super::{Error, Result};
35
36/// Box type for async recursion
37type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
38
39#[derive(Default)]
40struct RefDirectory {
41    files: BTreeMap<String, String>,
42    dirs: BTreeMap<String, RefDirectory>,
43}
44
45impl RefDirectory {
46    fn insert(&mut self, parts: &[&str], value: String) {
47        let Some((name, rest)) = parts.split_first() else {
48            return;
49        };
50
51        if rest.is_empty() {
52            self.files.insert((*name).to_string(), value);
53        } else {
54            self.dirs
55                .entry((*name).to_string())
56                .or_default()
57                .insert(rest, value);
58        }
59    }
60}
61
62/// Runtime executor - either owns a runtime or reuses an existing one
63enum RuntimeExecutor {
64    Owned(Runtime),
65    Handle(Handle),
66}
67
68impl RuntimeExecutor {
69    fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
70        match self {
71            RuntimeExecutor::Owned(rt) => rt.block_on(f),
72            RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
73        }
74    }
75}
76
77/// Local blob store - wraps either FsBlobStore or LmdbBlobStore
78pub enum LocalStore {
79    Fs(FsBlobStore),
80    #[cfg(feature = "lmdb")]
81    Lmdb(LmdbBlobStore),
82}
83
84impl LocalStore {
85    fn new_for_backend<P: AsRef<Path>>(
86        path: P,
87        backend: StorageBackend,
88        max_bytes: u64,
89    ) -> std::result::Result<Self, StoreError> {
90        match backend {
91            StorageBackend::Fs => {
92                if max_bytes > 0 {
93                    Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
94                        path, max_bytes,
95                    )?))
96                } else {
97                    Ok(LocalStore::Fs(FsBlobStore::new(path)?))
98                }
99            }
100            #[cfg(feature = "lmdb")]
101            StorageBackend::Lmdb => {
102                if max_bytes > 0 {
103                    Ok(LocalStore::Lmdb(LmdbBlobStore::with_max_bytes(
104                        path, max_bytes,
105                    )?))
106                } else {
107                    Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?))
108                }
109            }
110            #[cfg(not(feature = "lmdb"))]
111            StorageBackend::Lmdb => {
112                warn!(
113                    "LMDB backend requested but lmdb feature not enabled, using filesystem storage"
114                );
115                if max_bytes > 0 {
116                    Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
117                        path, max_bytes,
118                    )?))
119                } else {
120                    Ok(LocalStore::Fs(FsBlobStore::new(path)?))
121                }
122            }
123        }
124    }
125
126    /// Create a new local store based on config
127    pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
128        Self::new_with_max_bytes(path, 0)
129    }
130
131    /// Create a new local store based on config with an optional byte limit.
132    pub fn new_with_max_bytes<P: AsRef<Path>>(
133        path: P,
134        max_bytes: u64,
135    ) -> std::result::Result<Self, StoreError> {
136        let config = Config::load_or_default();
137        Self::new_for_backend(path, config.storage.backend, max_bytes)
138    }
139
140    /// List all hashes in the store
141    pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
142        match self {
143            LocalStore::Fs(store) => store.list(),
144            #[cfg(feature = "lmdb")]
145            LocalStore::Lmdb(store) => store.list(),
146        }
147    }
148
149    /// Sync get operation
150    pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
151        match self {
152            LocalStore::Fs(store) => store.get_sync(hash),
153            #[cfg(feature = "lmdb")]
154            LocalStore::Lmdb(store) => store.get_sync(hash),
155        }
156    }
157}
158
159#[async_trait::async_trait]
160impl Store for LocalStore {
161    async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
162        match self {
163            LocalStore::Fs(store) => store.put(hash, data).await,
164            #[cfg(feature = "lmdb")]
165            LocalStore::Lmdb(store) => store.put(hash, data).await,
166        }
167    }
168
169    async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
170        match self {
171            LocalStore::Fs(store) => store.get(hash).await,
172            #[cfg(feature = "lmdb")]
173            LocalStore::Lmdb(store) => store.get(hash).await,
174        }
175    }
176
177    async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
178        match self {
179            LocalStore::Fs(store) => store.has(hash).await,
180            #[cfg(feature = "lmdb")]
181            LocalStore::Lmdb(store) => store.has(hash).await,
182        }
183    }
184
185    async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
186        match self {
187            LocalStore::Fs(store) => store.delete(hash).await,
188            #[cfg(feature = "lmdb")]
189            LocalStore::Lmdb(store) => store.delete(hash).await,
190        }
191    }
192
193    fn set_max_bytes(&self, max: u64) {
194        match self {
195            LocalStore::Fs(store) => store.set_max_bytes(max),
196            #[cfg(feature = "lmdb")]
197            LocalStore::Lmdb(store) => store.set_max_bytes(max),
198        }
199    }
200
201    fn max_bytes(&self) -> Option<u64> {
202        match self {
203            LocalStore::Fs(store) => store.max_bytes(),
204            #[cfg(feature = "lmdb")]
205            LocalStore::Lmdb(store) => store.max_bytes(),
206        }
207    }
208
209    async fn stats(&self) -> StoreStats {
210        match self {
211            LocalStore::Fs(store) => match store.stats() {
212                Ok(stats) => StoreStats {
213                    count: stats.count as u64,
214                    bytes: stats.total_bytes,
215                    pinned_count: stats.pinned_count as u64,
216                    pinned_bytes: stats.pinned_bytes,
217                },
218                Err(_) => StoreStats::default(),
219            },
220            #[cfg(feature = "lmdb")]
221            LocalStore::Lmdb(store) => match store.stats() {
222                Ok(stats) => StoreStats {
223                    count: stats.count as u64,
224                    bytes: stats.total_bytes,
225                    pinned_count: 0,
226                    pinned_bytes: 0,
227                },
228                Err(_) => StoreStats::default(),
229            },
230        }
231    }
232
233    async fn evict_if_needed(&self) -> std::result::Result<u64, StoreError> {
234        match self {
235            LocalStore::Fs(store) => store.evict_if_needed().await,
236            #[cfg(feature = "lmdb")]
237            LocalStore::Lmdb(store) => store.evict_if_needed().await,
238        }
239    }
240}
241
242/// Git storage backed by HashTree with configurable persistence
243pub struct GitStorage {
244    store: Arc<LocalStore>,
245    tree: HashTree<LocalStore>,
246    runtime: RuntimeExecutor,
247    /// In-memory state for the current session
248    objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
249    refs: std::sync::RwLock<HashMap<String, String>>,
250    /// Cached root CID (hash + encryption key)
251    root_cid: std::sync::RwLock<Option<Cid>>,
252}
253
254impl GitStorage {
255    /// Open or create a git storage at the given path
256    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
257        let config = Config::load_or_default();
258        let max_size_bytes = config
259            .storage
260            .max_size_gb
261            .saturating_mul(1024 * 1024 * 1024);
262        Self::open_with_max_bytes(path, max_size_bytes)
263    }
264
265    /// Open or create a git storage at the given path with an explicit byte limit.
266    pub fn open_with_max_bytes(path: impl AsRef<Path>, max_size_bytes: u64) -> Result<Self> {
267        let config = Config::load_or_default();
268        Self::open_with_backend_and_max_bytes(path, config.storage.backend, max_size_bytes)
269    }
270
271    pub fn open_with_backend_and_max_bytes(
272        path: impl AsRef<Path>,
273        backend: StorageBackend,
274        max_size_bytes: u64,
275    ) -> Result<Self> {
276        let runtime = match Handle::try_current() {
277            Ok(handle) => RuntimeExecutor::Handle(handle),
278            Err(_) => {
279                let rt = Runtime::new()
280                    .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
281                RuntimeExecutor::Owned(rt)
282            }
283        };
284
285        let store_path = path.as_ref().join("blobs");
286        let store = Arc::new(
287            LocalStore::new_for_backend(&store_path, backend, max_size_bytes)
288                .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
289        );
290
291        // Use encrypted mode (default) - blossom servers require encrypted data
292        let tree = HashTree::new(HashTreeConfig::new(store.clone()));
293
294        Ok(Self {
295            store,
296            tree,
297            runtime,
298            objects: std::sync::RwLock::new(HashMap::new()),
299            refs: std::sync::RwLock::new(HashMap::new()),
300            root_cid: std::sync::RwLock::new(None),
301        })
302    }
303
304    /// Evict old local blobs if storage is over the configured limit.
305    pub fn evict_if_needed(&self) -> Result<u64> {
306        self.runtime
307            .block_on(self.store.evict_if_needed())
308            .map_err(|e| Error::StorageError(format!("evict: {}", e)))
309    }
310
311    /// Write an object, returning its ID
312    fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
313        let oid = obj.id();
314        let key = oid.to_hex();
315
316        let loose = obj.to_loose_format();
317        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
318        encoder.write_all(&loose)?;
319        let compressed = encoder.finish()?;
320
321        let mut objects = self
322            .objects
323            .write()
324            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
325        objects.insert(key, compressed);
326
327        // Invalidate cached root
328        if let Ok(mut root) = self.root_cid.write() {
329            *root = None;
330        }
331
332        Ok(oid)
333    }
334
335    /// Write raw object data (type + content already parsed)
336    pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
337        let obj = GitObject::new(obj_type, content.to_vec());
338        self.write_object(&obj)
339    }
340
341    /// Read an object by ID from in-memory cache
342    #[allow(dead_code)]
343    fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
344        let key = oid.to_hex();
345        let objects = self
346            .objects
347            .read()
348            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
349        let compressed = objects
350            .get(&key)
351            .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
352
353        let mut decoder = ZlibDecoder::new(compressed.as_slice());
354        let mut data = Vec::new();
355        decoder.read_to_end(&mut data)?;
356
357        GitObject::from_loose_format(&data)
358    }
359
360    /// Write a ref
361    pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
362        validate_ref_name(name)?;
363
364        let value = match target {
365            Ref::Direct(oid) => oid.to_hex(),
366            Ref::Symbolic(target) => format!("ref: {}", target),
367        };
368
369        let mut refs = self
370            .refs
371            .write()
372            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
373        refs.insert(name.to_string(), value);
374
375        // Invalidate cached root
376        if let Ok(mut root) = self.root_cid.write() {
377            *root = None;
378        }
379
380        Ok(())
381    }
382
383    /// Read a ref
384    #[allow(dead_code)]
385    pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
386        let refs = self
387            .refs
388            .read()
389            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
390
391        match refs.get(name) {
392            Some(value) => {
393                if let Some(target) = value.strip_prefix("ref: ") {
394                    Ok(Some(Ref::Symbolic(target.to_string())))
395                } else {
396                    let oid = ObjectId::from_hex(value)
397                        .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
398                    Ok(Some(Ref::Direct(oid)))
399                }
400            }
401            None => Ok(None),
402        }
403    }
404
405    /// List all refs
406    #[allow(dead_code)]
407    pub fn list_refs(&self) -> Result<HashMap<String, String>> {
408        let refs = self
409            .refs
410            .read()
411            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
412        Ok(refs.clone())
413    }
414
415    /// Delete a ref
416    pub fn delete_ref(&self, name: &str) -> Result<bool> {
417        let mut refs = self
418            .refs
419            .write()
420            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
421        let existed = refs.remove(name).is_some();
422
423        // Invalidate cached root
424        if let Ok(mut root) = self.root_cid.write() {
425            *root = None;
426        }
427
428        Ok(existed)
429    }
430
431    /// Import a raw git object (already in loose format, zlib compressed)
432    /// Used when fetching existing objects from remote before push
433    pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
434        let mut objects = self
435            .objects
436            .write()
437            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
438        objects.insert(oid.to_string(), compressed_data);
439
440        // Invalidate cached root
441        if let Ok(mut root) = self.root_cid.write() {
442            *root = None;
443        }
444
445        Ok(())
446    }
447
448    /// Import a ref directly (used when loading existing refs from remote)
449    pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
450        let mut refs = self
451            .refs
452            .write()
453            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
454        refs.insert(name.to_string(), value.to_string());
455
456        // Invalidate cached root
457        if let Ok(mut root) = self.root_cid.write() {
458            *root = None;
459        }
460
461        Ok(())
462    }
463
464    /// Check if a ref exists
465    #[cfg(test)]
466    pub fn has_ref(&self, name: &str) -> Result<bool> {
467        let refs = self
468            .refs
469            .read()
470            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
471        Ok(refs.contains_key(name))
472    }
473
474    /// Get count of objects in storage
475    #[cfg(test)]
476    pub fn object_count(&self) -> Result<usize> {
477        let objects = self
478            .objects
479            .read()
480            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
481        Ok(objects.len())
482    }
483
484    /// Get the cached root CID (returns None if tree hasn't been built)
485    #[allow(dead_code)]
486    pub fn get_root_cid(&self) -> Result<Option<Cid>> {
487        let root = self
488            .root_cid
489            .read()
490            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
491        Ok(root.clone())
492    }
493
494    /// Get the default branch name
495    #[allow(dead_code)]
496    pub fn default_branch(&self) -> Result<Option<String>> {
497        let refs = self
498            .refs
499            .read()
500            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
501
502        if let Some(head) = refs.get("HEAD") {
503            if let Some(target) = head.strip_prefix("ref: ") {
504                return Ok(Some(target.to_string()));
505            }
506        }
507        Ok(None)
508    }
509
510    /// Get the tree SHA from a commit object
511    fn get_commit_tree(
512        &self,
513        commit_oid: &str,
514        objects: &HashMap<String, Vec<u8>>,
515    ) -> Option<String> {
516        let compressed = objects.get(commit_oid)?;
517
518        // Decompress the object
519        let mut decoder = ZlibDecoder::new(&compressed[..]);
520        let mut decompressed = Vec::new();
521        decoder.read_to_end(&mut decompressed).ok()?;
522
523        // Parse git object format: "type size\0content"
524        let null_pos = decompressed.iter().position(|&b| b == 0)?;
525        let content = &decompressed[null_pos + 1..];
526
527        // Parse commit content - first line is "tree <sha>"
528        let content_str = std::str::from_utf8(content).ok()?;
529        let first_line = content_str.lines().next()?;
530        first_line
531            .strip_prefix("tree ")
532            .map(|tree_hash| tree_hash.to_string())
533    }
534
535    /// Get git object content (decompressed, without header)
536    fn get_object_content(
537        &self,
538        oid: &str,
539        objects: &HashMap<String, Vec<u8>>,
540    ) -> Option<(ObjectType, Vec<u8>)> {
541        let compressed = objects.get(oid)?;
542
543        // Decompress the object
544        let mut decoder = ZlibDecoder::new(&compressed[..]);
545        let mut decompressed = Vec::new();
546        decoder.read_to_end(&mut decompressed).ok()?;
547
548        // Parse git object format: "type size\0content"
549        let null_pos = decompressed.iter().position(|&b| b == 0)?;
550        let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
551        let obj_type = if header.starts_with("blob") {
552            ObjectType::Blob
553        } else if header.starts_with("tree") {
554            ObjectType::Tree
555        } else if header.starts_with("commit") {
556            ObjectType::Commit
557        } else if header.starts_with("tag") {
558            ObjectType::Tag
559        } else {
560            return None;
561        };
562        let content = decompressed[null_pos + 1..].to_vec();
563        Some((obj_type, content))
564    }
565
566    fn peel_tag_target(&self, oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<String> {
567        let (obj_type, content) = self.get_object_content(oid, objects)?;
568        if obj_type != ObjectType::Tag {
569            return Some(oid.to_string());
570        }
571
572        let target = std::str::from_utf8(&content)
573            .ok()?
574            .lines()
575            .find_map(|line| line.strip_prefix("object "))
576            .map(str::trim)?
577            .to_string();
578
579        match self.get_object_content(&target, objects)?.0 {
580            ObjectType::Tag => self.peel_tag_target(&target, objects),
581            _ => Some(target),
582        }
583    }
584
585    fn build_info_refs_content(
586        &self,
587        refs: &HashMap<String, String>,
588        objects: &HashMap<String, Vec<u8>>,
589    ) -> String {
590        let mut lines = Vec::new();
591
592        for (name, value) in refs {
593            if name == "HEAD" {
594                continue;
595            }
596
597            let oid = value.trim().to_string();
598            lines.push((name.clone(), oid.clone()));
599
600            if name.starts_with("refs/tags/") {
601                if let Some(peeled) = self.peel_tag_target(&oid, objects) {
602                    if peeled != oid {
603                        lines.push((format!("{}^{{}}", name), peeled));
604                    }
605                }
606            }
607        }
608
609        lines.sort_by(|a, b| a.0.cmp(&b.0));
610
611        let mut content = String::new();
612        for (name, oid) in lines {
613            content.push_str(&oid);
614            content.push('\t');
615            content.push_str(&name);
616            content.push('\n');
617        }
618        content
619    }
620
621    async fn build_info_dir(
622        &self,
623        refs: &HashMap<String, String>,
624        objects: &HashMap<String, Vec<u8>>,
625    ) -> Result<Cid> {
626        let info_refs = self.build_info_refs_content(refs, objects);
627        let (info_refs_cid, info_refs_size) = self
628            .tree
629            .put(info_refs.as_bytes())
630            .await
631            .map_err(|e| Error::StorageError(format!("put info/refs: {}", e)))?;
632
633        self.tree
634            .put_directory(vec![
635                DirEntry::from_cid("refs", &info_refs_cid).with_size(info_refs_size)
636            ])
637            .await
638            .map_err(|e| Error::StorageError(format!("put info dir: {}", e)))
639    }
640
641    /// Build the hashtree and return the root CID (hash + encryption key)
642    pub fn build_tree(&self) -> Result<Cid> {
643        // Check if we have a cached root
644        if let Ok(root) = self.root_cid.read() {
645            if let Some(ref cid) = *root {
646                return Ok(cid.clone());
647            }
648        }
649
650        if let Err(err) = self.evict_if_needed() {
651            debug!("pre-build eviction skipped: {}", err);
652        }
653
654        let objects = self
655            .objects
656            .read()
657            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
658        let refs = self
659            .refs
660            .read()
661            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
662
663        // Get default branch from HEAD or find first branch ref
664        let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
665            let branch = head.strip_prefix("ref: ").map(String::from);
666            let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
667            (branch, sha)
668        } else {
669            // No HEAD ref - find first refs/heads/* ref directly
670            let mut branch_info: Option<(String, String)> = None;
671            for (ref_name, sha) in refs.iter() {
672                if ref_name.starts_with("refs/heads/") {
673                    branch_info = Some((ref_name.clone(), sha.clone()));
674                    break;
675                }
676            }
677            match branch_info {
678                Some((branch, sha)) => (Some(branch), Some(sha)),
679                None => (None, None),
680            }
681        };
682
683        // Get tree SHA from commit
684        let tree_sha = commit_sha
685            .as_ref()
686            .and_then(|sha| self.get_commit_tree(sha, &objects));
687
688        // Clone objects for async block
689        let objects_clone = objects.clone();
690
691        let root_cid = self.runtime.block_on(async {
692            // Build objects directory
693            let objects_cid = self.build_objects_dir(&objects).await?;
694
695            // Build refs directory
696            let refs_cid = self.build_refs_dir(&refs).await?;
697
698            // Build dumb-HTTP info directory
699            let info_cid = self.build_info_dir(&refs, &objects_clone).await?;
700
701            // Build HEAD file - use default_branch if no explicit HEAD
702            // Git expects HEAD to end with newline, so add it if missing
703            let head_content = refs.get("HEAD")
704                .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
705                .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
706                .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
707            debug!("HEAD content: {:?}", head_content);
708            let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
709                .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
710            debug!("HEAD hash: {}", hex::encode(head_cid.hash));
711
712            // Build .git directory - use from_cid to preserve encryption keys
713            let mut git_entries = vec![
714                DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
715                DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir),
716                DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
717                DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
718            ];
719
720            // Add config if we have a default branch
721            if let Some(ref branch) = default_branch {
722                let config = format!(
723                    "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
724                    branch.trim_start_matches("refs/heads/")
725                );
726                let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
727                    .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
728                git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
729            }
730
731            // Build and add index file if we have a tree SHA
732            if let Some(ref tree_oid) = tree_sha {
733                match self.build_index_file(tree_oid, &objects_clone) {
734                    Ok(index_data) => {
735                        let (index_cid, index_size) = self.tree.put(&index_data).await
736                            .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
737                        git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
738                        info!("Added git index file ({} bytes)", index_data.len());
739                    }
740                    Err(e) => {
741                        debug!("Failed to build git index file: {} - continuing without index", e);
742                    }
743                }
744            }
745
746            let git_cid = self.tree.put_directory(git_entries).await
747                .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
748
749            // Build root entries starting with .git
750            // Use from_cid to preserve the encryption key
751            let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
752
753            // Add working tree files if we have a tree SHA
754            if let Some(ref tree_oid) = tree_sha {
755                let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
756                root_entries.extend(working_tree_entries);
757                info!("Added {} working tree entries to root", root_entries.len() - 1);
758            }
759
760            // Sort entries for deterministic ordering
761            root_entries.sort_by(|a, b| a.name.cmp(&b.name));
762
763            let root_cid = self.tree.put_directory(root_entries).await
764                .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
765
766            info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
767                hex::encode(root_cid.hash),
768                root_cid.key.is_some(),
769                hex::encode(git_cid.hash));
770
771            Ok::<Cid, Error>(root_cid)
772        })?;
773
774        // Cache the root CID
775        if let Ok(mut root) = self.root_cid.write() {
776            *root = Some(root_cid.clone());
777        }
778
779        Ok(root_cid)
780    }
781
782    /// Build working tree entries from a git tree object
783    async fn build_working_tree_entries(
784        &self,
785        tree_oid: &str,
786        objects: &HashMap<String, Vec<u8>>,
787    ) -> Result<Vec<DirEntry>> {
788        let mut entries = Vec::new();
789
790        // Get tree content
791        let (obj_type, content) = self
792            .get_object_content(tree_oid, objects)
793            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
794
795        if obj_type != ObjectType::Tree {
796            return Err(Error::InvalidObjectType(format!(
797                "expected tree, got {:?}",
798                obj_type
799            )));
800        }
801
802        // Parse tree entries
803        let tree_entries = parse_tree(&content)?;
804
805        for entry in tree_entries {
806            let oid_hex = entry.oid.to_hex();
807
808            if entry.is_tree() {
809                // Recursively build subdirectory
810                let sub_entries = self
811                    .build_working_tree_entries_boxed(&oid_hex, objects)
812                    .await?;
813
814                // Create subdirectory in hashtree
815                let dir_cid =
816                    self.tree.put_directory(sub_entries).await.map_err(|e| {
817                        Error::StorageError(format!("put dir {}: {}", entry.name, e))
818                    })?;
819
820                // Use from_cid to preserve encryption key
821                entries
822                    .push(DirEntry::from_cid(&entry.name, &dir_cid).with_link_type(LinkType::Dir));
823            } else {
824                // Get blob content
825                if let Some((ObjectType::Blob, blob_content)) =
826                    self.get_object_content(&oid_hex, objects)
827                {
828                    // Use put() instead of put_blob() to chunk large files
829                    let (cid, size) = self.tree.put(&blob_content).await.map_err(|e| {
830                        Error::StorageError(format!("put blob {}: {}", entry.name, e))
831                    })?;
832
833                    // Use from_cid to preserve encryption key
834                    entries.push(DirEntry::from_cid(&entry.name, &cid).with_size(size));
835                }
836            }
837        }
838
839        // Sort for deterministic ordering
840        entries.sort_by(|a, b| a.name.cmp(&b.name));
841
842        Ok(entries)
843    }
844
845    /// Boxed version for async recursion
846    fn build_working_tree_entries_boxed<'a>(
847        &'a self,
848        tree_oid: &'a str,
849        objects: &'a HashMap<String, Vec<u8>>,
850    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
851        Box::pin(self.build_working_tree_entries(tree_oid, objects))
852    }
853
854    /// Build the objects directory using HashTree
855    async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
856        let mut top_entries = Vec::new();
857
858        if !objects.is_empty() {
859            // Group objects by first 2 characters of SHA (git loose object structure)
860            // Git expects objects/XX/YYYYYY... where XX is first 2 hex chars
861            let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
862            for (oid, data) in objects {
863                let prefix = &oid[..2];
864                let suffix = &oid[2..];
865                buckets
866                    .entry(prefix.to_string())
867                    .or_default()
868                    .push((suffix.to_string(), data.clone()));
869            }
870
871            // Build subdirectories for each prefix
872            for (prefix, objs) in buckets {
873                let mut sub_entries = Vec::new();
874                for (suffix, data) in objs {
875                    // Use put() instead of put_blob() to chunk large objects
876                    // Git blobs can be >5MB which exceeds blossom server limits
877                    let (cid, size) = self.tree.put(&data).await.map_err(|e| {
878                        Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e))
879                    })?;
880                    // Use from_cid to preserve encryption key
881                    sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
882                }
883                // Sort for deterministic ordering
884                sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
885
886                let sub_cid =
887                    self.tree.put_directory(sub_entries).await.map_err(|e| {
888                        Error::StorageError(format!("put objects/{}: {}", prefix, e))
889                    })?;
890                top_entries
891                    .push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
892            }
893        }
894
895        let (packs_cid, packs_size) = self
896            .tree
897            .put(b"")
898            .await
899            .map_err(|e| Error::StorageError(format!("put objects/info/packs: {}", e)))?;
900        let info_cid = self
901            .tree
902            .put_directory(vec![
903                DirEntry::from_cid("packs", &packs_cid).with_size(packs_size)
904            ])
905            .await
906            .map_err(|e| Error::StorageError(format!("put objects/info: {}", e)))?;
907        top_entries.push(DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir));
908
909        // Sort for deterministic ordering
910        top_entries.sort_by(|a, b| a.name.cmp(&b.name));
911
912        let entry_count = top_entries.len();
913        let cid = self
914            .tree
915            .put_directory(top_entries)
916            .await
917            .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
918
919        debug!(
920            "Built objects dir with {} entries: {}",
921            entry_count,
922            hex::encode(cid.hash)
923        );
924        Ok(cid)
925    }
926
927    /// Build the refs directory using HashTree
928    async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
929        let mut root = RefDirectory::default();
930
931        for (ref_name, value) in refs {
932            let parts: Vec<&str> = ref_name.split('/').collect();
933            if parts.len() >= 3 && parts[0] == "refs" {
934                root.insert(&parts[1..], value.clone());
935            }
936        }
937
938        let mut ref_entries = self.build_ref_entries_recursive(&root, "refs").await?;
939
940        if ref_entries.is_empty() {
941            // Return empty directory Cid
942            let empty_cid = self
943                .tree
944                .put_directory(vec![])
945                .await
946                .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
947            return Ok(empty_cid);
948        }
949
950        ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
951
952        let refs_cid = self
953            .tree
954            .put_directory(ref_entries)
955            .await
956            .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
957        debug!("refs dir -> {}", hex::encode(refs_cid.hash));
958        Ok(refs_cid)
959    }
960
961    fn build_ref_entries_recursive<'a>(
962        &'a self,
963        dir: &'a RefDirectory,
964        prefix: &'a str,
965    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
966        Box::pin(async move {
967            let mut entries = Vec::new();
968
969            for (name, value) in &dir.files {
970                let (cid, size) = self
971                    .tree
972                    .put(value.as_bytes())
973                    .await
974                    .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
975                debug!("{}/{} -> blob {}", prefix, name, hex::encode(cid.hash));
976                entries.push(DirEntry::from_cid(name, &cid).with_size(size));
977            }
978
979            for (name, child) in &dir.dirs {
980                let child_prefix = format!("{prefix}/{name}");
981                let child_entries = self
982                    .build_ref_entries_recursive(child, &child_prefix)
983                    .await?;
984                let child_cid =
985                    self.tree.put_directory(child_entries).await.map_err(|e| {
986                        Error::StorageError(format!("put {child_prefix} dir: {}", e))
987                    })?;
988                debug!("{} dir -> {}", child_prefix, hex::encode(child_cid.hash));
989                entries.push(DirEntry::from_cid(name, &child_cid).with_link_type(LinkType::Dir));
990            }
991
992            entries.sort_by(|a, b| a.name.cmp(&b.name));
993            Ok(entries)
994        })
995    }
996
997    /// Build git index file from tree entries
998    /// Returns the raw binary content of the index file
999    fn build_index_file(
1000        &self,
1001        tree_oid: &str,
1002        objects: &HashMap<String, Vec<u8>>,
1003    ) -> Result<Vec<u8>> {
1004        // Collect all file entries from the tree (recursively)
1005        let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); // (path, sha1, mode, size)
1006        self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
1007
1008        // Sort entries by path (git index requirement)
1009        entries.sort_by(|a, b| a.0.cmp(&b.0));
1010
1011        let entry_count = entries.len() as u32;
1012        debug!("Building git index with {} entries", entry_count);
1013
1014        // Build index content
1015        let mut index_data = Vec::new();
1016
1017        // Header: DIRC + version 2 + entry count
1018        index_data.extend_from_slice(b"DIRC");
1019        index_data.extend_from_slice(&2u32.to_be_bytes()); // version 2
1020        index_data.extend_from_slice(&entry_count.to_be_bytes());
1021
1022        // Current time for ctime/mtime (doesn't matter much for our use case)
1023        let now_sec = std::time::SystemTime::now()
1024            .duration_since(std::time::UNIX_EPOCH)
1025            .unwrap_or_default()
1026            .as_secs() as u32;
1027
1028        for (path, sha1, mode, size) in &entries {
1029            let entry_start = index_data.len();
1030
1031            // ctime sec, nsec
1032            index_data.extend_from_slice(&now_sec.to_be_bytes());
1033            index_data.extend_from_slice(&0u32.to_be_bytes());
1034            // mtime sec, nsec
1035            index_data.extend_from_slice(&now_sec.to_be_bytes());
1036            index_data.extend_from_slice(&0u32.to_be_bytes());
1037            // dev, ino (use 0)
1038            index_data.extend_from_slice(&0u32.to_be_bytes());
1039            index_data.extend_from_slice(&0u32.to_be_bytes());
1040            // mode
1041            index_data.extend_from_slice(&mode.to_be_bytes());
1042            // uid, gid (use 0)
1043            index_data.extend_from_slice(&0u32.to_be_bytes());
1044            index_data.extend_from_slice(&0u32.to_be_bytes());
1045            // file size
1046            index_data.extend_from_slice(&size.to_be_bytes());
1047            // SHA-1
1048            index_data.extend_from_slice(sha1);
1049            // flags: path length (max 0xFFF) in low 12 bits
1050            let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
1051            index_data.extend_from_slice(&path_len.to_be_bytes());
1052            // path (NUL-terminated)
1053            index_data.extend_from_slice(path.as_bytes());
1054            index_data.push(0); // NUL terminator
1055
1056            // Pad to 8-byte boundary relative to entry start
1057            let entry_len = index_data.len() - entry_start;
1058            let padding = (8 - (entry_len % 8)) % 8;
1059            index_data.extend(std::iter::repeat_n(0, padding));
1060        }
1061
1062        // Calculate SHA-1 checksum of everything and append
1063        let mut hasher = Sha1::new();
1064        hasher.update(&index_data);
1065        let checksum = hasher.finalize();
1066        index_data.extend_from_slice(&checksum);
1067
1068        debug!(
1069            "Built git index: {} bytes, {} entries",
1070            index_data.len(),
1071            entry_count
1072        );
1073        Ok(index_data)
1074    }
1075
1076    /// Collect file entries from a git tree for building the index
1077    fn collect_tree_entries_for_index(
1078        &self,
1079        tree_oid: &str,
1080        objects: &HashMap<String, Vec<u8>>,
1081        prefix: &str,
1082        entries: &mut Vec<(String, [u8; 20], u32, u32)>,
1083    ) -> Result<()> {
1084        let (obj_type, content) = self
1085            .get_object_content(tree_oid, objects)
1086            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
1087
1088        if obj_type != ObjectType::Tree {
1089            return Err(Error::InvalidObjectType(format!(
1090                "expected tree, got {:?}",
1091                obj_type
1092            )));
1093        }
1094
1095        let tree_entries = parse_tree(&content)?;
1096
1097        for entry in tree_entries {
1098            let path = if prefix.is_empty() {
1099                entry.name.clone()
1100            } else {
1101                format!("{}/{}", prefix, entry.name)
1102            };
1103
1104            let oid_hex = entry.oid.to_hex();
1105
1106            if entry.is_tree() {
1107                // Recursively process subdirectory
1108                self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
1109            } else {
1110                // Get blob content for size and SHA-1
1111                if let Some((ObjectType::Blob, blob_content)) =
1112                    self.get_object_content(&oid_hex, objects)
1113                {
1114                    // Convert hex SHA to bytes
1115                    let mut sha1_bytes = [0u8; 20];
1116                    if let Ok(bytes) = hex::decode(&oid_hex) {
1117                        if bytes.len() == 20 {
1118                            sha1_bytes.copy_from_slice(&bytes);
1119                        }
1120                    }
1121
1122                    // Mode: use entry.mode or default to regular file
1123                    let mode = entry.mode;
1124                    let size = blob_content.len() as u32;
1125
1126                    entries.push((path, sha1_bytes, mode, size));
1127                }
1128            }
1129        }
1130
1131        Ok(())
1132    }
1133
1134    /// Get the underlying store
1135    pub fn store(&self) -> &Arc<LocalStore> {
1136        &self.store
1137    }
1138
1139    /// Get the HashTree for direct access
1140    #[allow(dead_code)]
1141    pub fn hashtree(&self) -> &HashTree<LocalStore> {
1142        &self.tree
1143    }
1144
1145    /// Push all blobs to file servers
1146    #[allow(dead_code)]
1147    pub fn push_to_file_servers(
1148        &self,
1149        blossom: &hashtree_blossom::BlossomClient,
1150    ) -> Result<(usize, usize)> {
1151        let hashes = self
1152            .store
1153            .list()
1154            .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
1155
1156        info!("Pushing {} blobs to file servers", hashes.len());
1157
1158        let mut uploaded = 0;
1159        let mut existed = 0;
1160
1161        self.runtime.block_on(async {
1162            for hash in &hashes {
1163                let hex_hash = hex::encode(hash);
1164                let data = match self.store.get_sync(hash) {
1165                    Ok(Some(d)) => d,
1166                    _ => continue,
1167                };
1168
1169                match blossom.upload_if_missing(&data).await {
1170                    Ok((_, true)) => {
1171                        debug!("Uploaded {}", &hex_hash[..12]);
1172                        uploaded += 1;
1173                    }
1174                    Ok((_, false)) => {
1175                        existed += 1;
1176                    }
1177                    Err(e) => {
1178                        debug!("Failed to upload {}: {}", &hex_hash[..12], e);
1179                    }
1180                }
1181            }
1182        });
1183
1184        info!(
1185            "Upload complete: {} new, {} already existed",
1186            uploaded, existed
1187        );
1188        Ok((uploaded, existed))
1189    }
1190
1191    /// Clear all state (for testing or re-initialization)
1192    #[allow(dead_code)]
1193    pub fn clear(&self) -> Result<()> {
1194        let mut objects = self
1195            .objects
1196            .write()
1197            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1198        let mut refs = self
1199            .refs
1200            .write()
1201            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1202        let mut root = self
1203            .root_cid
1204            .write()
1205            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1206
1207        objects.clear();
1208        refs.clear();
1209        *root = None;
1210        Ok(())
1211    }
1212}
1213
1214#[cfg(test)]
1215mod tests {
1216    use super::*;
1217    use hashtree_core::store::Store;
1218    use hashtree_core::{sha256, LinkType};
1219    use std::io::{Read, Write};
1220    use std::net::{TcpListener, TcpStream};
1221    use std::path::Path;
1222    use std::process::{Child, Command, Stdio};
1223    use std::time::{Duration, Instant};
1224    use tempfile::TempDir;
1225
1226    fn create_test_storage() -> (GitStorage, TempDir) {
1227        let temp_dir = TempDir::new().unwrap();
1228        let storage = GitStorage::open(temp_dir.path()).unwrap();
1229        (storage, temp_dir)
1230    }
1231
1232    fn create_test_storage_with_limit(max_size_bytes: u64) -> (GitStorage, TempDir) {
1233        let temp_dir = TempDir::new().unwrap();
1234        let storage = GitStorage::open_with_backend_and_max_bytes(
1235            temp_dir.path(),
1236            StorageBackend::Fs,
1237            max_size_bytes,
1238        )
1239        .unwrap();
1240        (storage, temp_dir)
1241    }
1242
1243    fn local_total_bytes(storage: &GitStorage) -> u64 {
1244        match storage.store().as_ref() {
1245            LocalStore::Fs(store) => store.stats().unwrap().total_bytes,
1246            #[cfg(feature = "lmdb")]
1247            LocalStore::Lmdb(store) => store.stats().unwrap().total_bytes,
1248        }
1249    }
1250
1251    fn write_test_commit(storage: &GitStorage) -> ObjectId {
1252        let blob_oid = storage
1253            .write_raw_object(ObjectType::Blob, b"hello from hashtree\n")
1254            .unwrap();
1255
1256        let mut tree_content = Vec::new();
1257        tree_content.extend_from_slice(b"100644 README.md\0");
1258        tree_content.extend_from_slice(&hex::decode(blob_oid.to_hex()).unwrap());
1259        let tree_oid = storage
1260            .write_raw_object(ObjectType::Tree, &tree_content)
1261            .unwrap();
1262
1263        let commit_content = format!(
1264            "tree {}\nauthor Test User <test@example.com> 0 +0000\ncommitter Test User <test@example.com> 0 +0000\n\nInitial commit\n",
1265            tree_oid.to_hex()
1266        );
1267        storage
1268            .write_raw_object(ObjectType::Commit, commit_content.as_bytes())
1269            .unwrap()
1270    }
1271
1272    fn export_tree_to_fs<S: Store>(
1273        runtime: &RuntimeExecutor,
1274        tree: &HashTree<S>,
1275        cid: &Cid,
1276        dst: &Path,
1277    ) {
1278        std::fs::create_dir_all(dst).unwrap();
1279        let entries = runtime.block_on(tree.list_directory(cid)).unwrap();
1280        for entry in entries {
1281            let entry_cid = Cid {
1282                hash: entry.hash,
1283                key: entry.key,
1284            };
1285            let path = dst.join(&entry.name);
1286            match entry.link_type {
1287                LinkType::Dir => export_tree_to_fs(runtime, tree, &entry_cid, &path),
1288                LinkType::Blob | LinkType::File => {
1289                    let data = runtime
1290                        .block_on(tree.get(&entry_cid, None))
1291                        .unwrap()
1292                        .unwrap();
1293                    if let Some(parent) = path.parent() {
1294                        std::fs::create_dir_all(parent).unwrap();
1295                    }
1296                    std::fs::write(path, data).unwrap();
1297                }
1298            }
1299        }
1300    }
1301
1302    fn spawn_http_server(root: &Path, port: u16) -> Child {
1303        Command::new("python3")
1304            .args([
1305                "-m",
1306                "http.server",
1307                &port.to_string(),
1308                "--bind",
1309                "127.0.0.1",
1310            ])
1311            .current_dir(root)
1312            .stdout(Stdio::null())
1313            .stderr(Stdio::null())
1314            .spawn()
1315            .expect("spawn python http server")
1316    }
1317
1318    fn wait_for_http_server(server: &mut Child, port: u16, path: &str) {
1319        let deadline = Instant::now() + Duration::from_secs(5);
1320
1321        loop {
1322            if let Some(status) = server.try_wait().expect("check http server status") {
1323                panic!("python http server exited before becoming ready: {status}");
1324            }
1325
1326            if let Ok(mut stream) = TcpStream::connect(("127.0.0.1", port)) {
1327                stream
1328                    .set_read_timeout(Some(Duration::from_millis(200)))
1329                    .expect("set read timeout");
1330                stream
1331                    .set_write_timeout(Some(Duration::from_millis(200)))
1332                    .expect("set write timeout");
1333                let request =
1334                    format!("GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n");
1335                if stream.write_all(request.as_bytes()).is_ok() {
1336                    let mut response = String::new();
1337                    if stream.read_to_string(&mut response).is_ok()
1338                        && response.starts_with("HTTP/1.0 200")
1339                    {
1340                        return;
1341                    }
1342                }
1343            }
1344
1345            if Instant::now() >= deadline {
1346                panic!("python http server did not become ready on port {port}");
1347            }
1348            std::thread::sleep(Duration::from_millis(50));
1349        }
1350    }
1351
1352    #[test]
1353    fn test_import_ref() {
1354        let (storage, _temp) = create_test_storage();
1355
1356        // Import a ref
1357        storage
1358            .import_ref("refs/heads/main", "abc123def456")
1359            .unwrap();
1360
1361        // Check it exists
1362        assert!(storage.has_ref("refs/heads/main").unwrap());
1363
1364        // Check value via list_refs
1365        let refs = storage.list_refs().unwrap();
1366        assert_eq!(
1367            refs.get("refs/heads/main"),
1368            Some(&"abc123def456".to_string())
1369        );
1370    }
1371
1372    #[test]
1373    fn test_import_multiple_refs_preserves_all() {
1374        let (storage, _temp) = create_test_storage();
1375
1376        // Import multiple refs (simulating loading from remote)
1377        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1378        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1379        storage
1380            .import_ref("refs/heads/feature", "sha_feature")
1381            .unwrap();
1382
1383        // All should exist
1384        assert!(storage.has_ref("refs/heads/main").unwrap());
1385        assert!(storage.has_ref("refs/heads/dev").unwrap());
1386        assert!(storage.has_ref("refs/heads/feature").unwrap());
1387
1388        // Now write a new ref (simulating push)
1389        storage
1390            .write_ref(
1391                "refs/heads/new-branch",
1392                &Ref::Direct(
1393                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1394                ),
1395            )
1396            .unwrap();
1397
1398        // Original refs should still exist
1399        let refs = storage.list_refs().unwrap();
1400        assert_eq!(refs.len(), 4);
1401        assert!(refs.contains_key("refs/heads/main"));
1402        assert!(refs.contains_key("refs/heads/dev"));
1403        assert!(refs.contains_key("refs/heads/feature"));
1404        assert!(refs.contains_key("refs/heads/new-branch"));
1405    }
1406
1407    #[test]
1408    fn test_import_compressed_object() {
1409        let (storage, _temp) = create_test_storage();
1410
1411        // Create a fake compressed object
1412        let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; // fake zlib data
1413
1414        storage
1415            .import_compressed_object("abc123def456", fake_compressed.clone())
1416            .unwrap();
1417
1418        // Check object count
1419        assert_eq!(storage.object_count().unwrap(), 1);
1420    }
1421
1422    #[test]
1423    fn test_write_ref_overwrites_imported() {
1424        let (storage, _temp) = create_test_storage();
1425
1426        // Import a ref
1427        storage.import_ref("refs/heads/main", "old_sha").unwrap();
1428
1429        // Write same ref with new value
1430        storage
1431            .write_ref(
1432                "refs/heads/main",
1433                &Ref::Direct(
1434                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1435                ),
1436            )
1437            .unwrap();
1438
1439        // Should have new value
1440        let refs = storage.list_refs().unwrap();
1441        assert_eq!(
1442            refs.get("refs/heads/main"),
1443            Some(&"0123456789abcdef0123456789abcdef01234567".to_string())
1444        );
1445    }
1446
1447    #[test]
1448    fn test_delete_ref_preserves_others() {
1449        let (storage, _temp) = create_test_storage();
1450
1451        // Import multiple refs
1452        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1453        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1454
1455        // Delete one
1456        storage.delete_ref("refs/heads/dev").unwrap();
1457
1458        // Other should still exist
1459        assert!(storage.has_ref("refs/heads/main").unwrap());
1460        assert!(!storage.has_ref("refs/heads/dev").unwrap());
1461    }
1462
1463    #[test]
1464    fn test_clear_removes_all() {
1465        let (storage, _temp) = create_test_storage();
1466
1467        // Import refs and objects
1468        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1469        storage
1470            .import_compressed_object("obj1", vec![1, 2, 3])
1471            .unwrap();
1472
1473        // Clear
1474        storage.clear().unwrap();
1475
1476        // All gone
1477        assert!(!storage.has_ref("refs/heads/main").unwrap());
1478        assert_eq!(storage.object_count().unwrap(), 0);
1479    }
1480
1481    #[test]
1482    fn test_evict_if_needed_respects_configured_limit() {
1483        let (storage, _temp) = create_test_storage_with_limit(1_024);
1484
1485        storage
1486            .write_raw_object(ObjectType::Blob, &vec![b'a'; 900])
1487            .unwrap();
1488        storage
1489            .write_raw_object(ObjectType::Blob, &vec![b'b'; 900])
1490            .unwrap();
1491        storage
1492            .write_ref(
1493                "refs/heads/main",
1494                &Ref::Direct(
1495                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1496                ),
1497            )
1498            .unwrap();
1499
1500        storage.build_tree().unwrap();
1501
1502        let before = local_total_bytes(&storage);
1503        assert!(before > 1_024);
1504
1505        let freed = storage.evict_if_needed().unwrap();
1506        assert!(freed > 0);
1507
1508        let after = local_total_bytes(&storage);
1509        assert!(after <= 1_024);
1510    }
1511
1512    #[test]
1513    fn test_build_tree_evicts_stale_blobs_before_writing_new_tree() {
1514        let max_size_bytes = 16 * 1024;
1515        let (storage, _temp) = create_test_storage_with_limit(max_size_bytes);
1516
1517        let stale_blobs = vec![
1518            vec![b'x'; 7 * 1024],
1519            vec![b'y'; 7 * 1024],
1520            vec![b'z'; 7 * 1024],
1521        ];
1522        let stale_hashes: Vec<Hash> = stale_blobs.iter().map(|blob| sha256(blob)).collect();
1523
1524        for (hash, blob) in stale_hashes.iter().zip(stale_blobs) {
1525            storage
1526                .runtime
1527                .block_on(storage.store().put(*hash, blob))
1528                .unwrap();
1529        }
1530
1531        let before = local_total_bytes(&storage);
1532        assert!(before > max_size_bytes);
1533
1534        let commit_oid = write_test_commit(&storage);
1535        storage
1536            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1537            .unwrap();
1538        storage
1539            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1540            .unwrap();
1541
1542        storage.build_tree().unwrap();
1543
1544        let evicted_stale = stale_hashes
1545            .iter()
1546            .filter(|hash| !storage.runtime.block_on(storage.store().has(hash)).unwrap())
1547            .count();
1548
1549        assert!(
1550            evicted_stale > 0,
1551            "expected build_tree preflight eviction to remove stale blobs before writing"
1552        );
1553    }
1554
1555    #[test]
1556    fn test_build_tree_adds_dumb_http_metadata() {
1557        let (storage, _temp) = create_test_storage();
1558        let commit_oid = write_test_commit(&storage);
1559        let tag_content = format!(
1560            "object {}\ntype commit\ntag v1.0.0\ntagger Test User <test@example.com> 0 +0000\n\nrelease\n",
1561            commit_oid.to_hex()
1562        );
1563        let tag_oid = storage
1564            .write_raw_object(ObjectType::Tag, tag_content.as_bytes())
1565            .unwrap();
1566
1567        storage
1568            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1569            .unwrap();
1570        storage
1571            .write_ref("refs/tags/v1.0.0", &Ref::Direct(tag_oid))
1572            .unwrap();
1573        storage
1574            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1575            .unwrap();
1576
1577        let root_cid = storage.build_tree().unwrap();
1578
1579        let info_refs_cid = storage
1580            .runtime
1581            .block_on(storage.tree.resolve_path(&root_cid, ".git/info/refs"))
1582            .unwrap()
1583            .expect("info/refs exists");
1584        let info_refs = storage
1585            .runtime
1586            .block_on(storage.tree.get(&info_refs_cid, None))
1587            .unwrap()
1588            .unwrap();
1589        let info_refs = String::from_utf8(info_refs).unwrap();
1590
1591        assert_eq!(
1592            info_refs,
1593            format!(
1594                "{commit}\trefs/heads/main\n{tag}\trefs/tags/v1.0.0\n{commit}\trefs/tags/v1.0.0^{{}}\n",
1595                commit = commit_oid.to_hex(),
1596                tag = tag_oid.to_hex()
1597            )
1598        );
1599
1600        let packs_cid = storage
1601            .runtime
1602            .block_on(
1603                storage
1604                    .tree
1605                    .resolve_path(&root_cid, ".git/objects/info/packs"),
1606            )
1607            .unwrap()
1608            .expect("objects/info/packs exists");
1609        let packs = storage
1610            .runtime
1611            .block_on(storage.tree.get(&packs_cid, None))
1612            .unwrap()
1613            .unwrap();
1614        assert!(packs.is_empty(), "objects/info/packs should be empty");
1615    }
1616
1617    #[test]
1618    fn test_build_tree_materializes_loose_refs_at_git_paths() {
1619        let (storage, _temp) = create_test_storage();
1620        let commit_oid = write_test_commit(&storage);
1621
1622        storage
1623            .write_ref("refs/heads/master", &Ref::Direct(commit_oid))
1624            .unwrap();
1625        storage
1626            .write_ref("refs/heads/codex/meshrouter-prod", &Ref::Direct(commit_oid))
1627            .unwrap();
1628        storage
1629            .write_ref("refs/tags/v1.0.0", &Ref::Direct(commit_oid))
1630            .unwrap();
1631        storage
1632            .write_ref("HEAD", &Ref::Symbolic("refs/heads/master".to_string()))
1633            .unwrap();
1634
1635        let root_cid = storage.build_tree().unwrap();
1636
1637        for path in [
1638            ".git/refs/heads/master",
1639            ".git/refs/heads/codex/meshrouter-prod",
1640            ".git/refs/tags/v1.0.0",
1641        ] {
1642            let ref_cid = storage
1643                .runtime
1644                .block_on(storage.tree.resolve_path(&root_cid, path))
1645                .unwrap()
1646                .unwrap_or_else(|| panic!("{path} should exist"));
1647            let ref_value = storage
1648                .runtime
1649                .block_on(storage.tree.get(&ref_cid, None))
1650                .unwrap()
1651                .unwrap();
1652            assert_eq!(
1653                String::from_utf8(ref_value).unwrap(),
1654                commit_oid.to_hex(),
1655                "{path} should contain the ref target",
1656            );
1657        }
1658    }
1659
1660    #[test]
1661    fn test_materialized_tree_supports_static_http_clone_from_git_dir() {
1662        let (storage, _temp) = create_test_storage();
1663        let commit_oid = write_test_commit(&storage);
1664        storage
1665            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1666            .unwrap();
1667        storage
1668            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1669            .unwrap();
1670
1671        let root_cid = storage.build_tree().unwrap();
1672        let export_dir = TempDir::new().unwrap();
1673        let repo_dir = export_dir.path().join("repo");
1674        export_tree_to_fs(&storage.runtime, &storage.tree, &root_cid, &repo_dir);
1675
1676        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
1677        let port = listener.local_addr().unwrap().port();
1678        drop(listener);
1679
1680        let mut server = spawn_http_server(export_dir.path(), port);
1681        wait_for_http_server(&mut server, port, "/repo/.git/HEAD");
1682
1683        let clone_dir = TempDir::new().unwrap();
1684        let clone_path = clone_dir.path().join("clone");
1685        let output = Command::new("git")
1686            .args([
1687                "clone",
1688                &format!("http://127.0.0.1:{port}/repo/.git", port = port),
1689                clone_path.to_str().unwrap(),
1690            ])
1691            .output()
1692            .unwrap();
1693
1694        let _ = server.kill();
1695        let _ = server.wait();
1696
1697        assert!(
1698            output.status.success(),
1699            "git clone failed: {}",
1700            String::from_utf8_lossy(&output.stderr)
1701        );
1702        assert_eq!(
1703            std::fs::read_to_string(clone_path.join("README.md")).unwrap(),
1704            "hello from hashtree\n"
1705        );
1706    }
1707}