Skip to main content

git_remote_htree/git/
storage.rs

1//! Hashtree-backed git object and ref storage with configurable persistence
2//!
3//! Stores git objects and refs in a hashtree merkle tree:
4//!   root/
5//!     .git/
6//!       HEAD -> "ref: refs/heads/main"
7//!       refs/heads/main -> <commit-sha1>
8//!       info/refs -> dumb-HTTP ref advertisement
9//!       objects/XX/YYYY... -> zlib-compressed loose object (standard git layout)
10//!       objects/info/packs -> dumb-HTTP pack advertisement
11//!
12//! The root hash (SHA-256) is the content-addressed identifier for the entire repo state.
13
14use flate2::read::ZlibDecoder;
15use flate2::write::ZlibEncoder;
16use flate2::Compression;
17use hashtree_config::{Config, StorageBackend};
18use hashtree_core::store::{Store, StoreError, StoreStats};
19use hashtree_core::types::Hash;
20use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
21use hashtree_fs::FsBlobStore;
22#[cfg(feature = "lmdb")]
23use hashtree_lmdb::LmdbBlobStore;
24use sha1::{Digest, Sha1};
25use std::collections::HashMap;
26use std::io::{Read, Write};
27use std::path::Path;
28use std::sync::Arc;
29use tokio::runtime::{Handle, Runtime};
30use tracing::{debug, info};
31
32use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
33use super::refs::{validate_ref_name, Ref};
34use super::{Error, Result};
35
36/// Box type for async recursion
37type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
38
39/// Runtime executor - either owns a runtime or reuses an existing one
40enum RuntimeExecutor {
41    Owned(Runtime),
42    Handle(Handle),
43}
44
45impl RuntimeExecutor {
46    fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
47        match self {
48            RuntimeExecutor::Owned(rt) => rt.block_on(f),
49            RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
50        }
51    }
52}
53
54/// Local blob store - wraps either FsBlobStore or LmdbBlobStore
55pub enum LocalStore {
56    Fs(FsBlobStore),
57    #[cfg(feature = "lmdb")]
58    Lmdb(LmdbBlobStore),
59}
60
61impl LocalStore {
62    fn new_for_backend<P: AsRef<Path>>(
63        path: P,
64        backend: StorageBackend,
65        max_bytes: u64,
66    ) -> std::result::Result<Self, StoreError> {
67        match backend {
68            StorageBackend::Fs => {
69                if max_bytes > 0 {
70                    Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
71                        path, max_bytes,
72                    )?))
73                } else {
74                    Ok(LocalStore::Fs(FsBlobStore::new(path)?))
75                }
76            }
77            #[cfg(feature = "lmdb")]
78            StorageBackend::Lmdb => {
79                if max_bytes > 0 {
80                    Ok(LocalStore::Lmdb(LmdbBlobStore::with_max_bytes(
81                        path, max_bytes,
82                    )?))
83                } else {
84                    Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?))
85                }
86            }
87            #[cfg(not(feature = "lmdb"))]
88            StorageBackend::Lmdb => {
89                warn!(
90                    "LMDB backend requested but lmdb feature not enabled, using filesystem storage"
91                );
92                if max_bytes > 0 {
93                    Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
94                        path, max_bytes,
95                    )?))
96                } else {
97                    Ok(LocalStore::Fs(FsBlobStore::new(path)?))
98                }
99            }
100        }
101    }
102
103    /// Create a new local store based on config
104    pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
105        Self::new_with_max_bytes(path, 0)
106    }
107
108    /// Create a new local store based on config with an optional byte limit.
109    pub fn new_with_max_bytes<P: AsRef<Path>>(
110        path: P,
111        max_bytes: u64,
112    ) -> std::result::Result<Self, StoreError> {
113        let config = Config::load_or_default();
114        Self::new_for_backend(path, config.storage.backend, max_bytes)
115    }
116
117    /// List all hashes in the store
118    pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
119        match self {
120            LocalStore::Fs(store) => store.list(),
121            #[cfg(feature = "lmdb")]
122            LocalStore::Lmdb(store) => store.list(),
123        }
124    }
125
126    /// Sync get operation
127    pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
128        match self {
129            LocalStore::Fs(store) => store.get_sync(hash),
130            #[cfg(feature = "lmdb")]
131            LocalStore::Lmdb(store) => store.get_sync(hash),
132        }
133    }
134}
135
136#[async_trait::async_trait]
137impl Store for LocalStore {
138    async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
139        match self {
140            LocalStore::Fs(store) => store.put(hash, data).await,
141            #[cfg(feature = "lmdb")]
142            LocalStore::Lmdb(store) => store.put(hash, data).await,
143        }
144    }
145
146    async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
147        match self {
148            LocalStore::Fs(store) => store.get(hash).await,
149            #[cfg(feature = "lmdb")]
150            LocalStore::Lmdb(store) => store.get(hash).await,
151        }
152    }
153
154    async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
155        match self {
156            LocalStore::Fs(store) => store.has(hash).await,
157            #[cfg(feature = "lmdb")]
158            LocalStore::Lmdb(store) => store.has(hash).await,
159        }
160    }
161
162    async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
163        match self {
164            LocalStore::Fs(store) => store.delete(hash).await,
165            #[cfg(feature = "lmdb")]
166            LocalStore::Lmdb(store) => store.delete(hash).await,
167        }
168    }
169
170    fn set_max_bytes(&self, max: u64) {
171        match self {
172            LocalStore::Fs(store) => store.set_max_bytes(max),
173            #[cfg(feature = "lmdb")]
174            LocalStore::Lmdb(store) => store.set_max_bytes(max),
175        }
176    }
177
178    fn max_bytes(&self) -> Option<u64> {
179        match self {
180            LocalStore::Fs(store) => store.max_bytes(),
181            #[cfg(feature = "lmdb")]
182            LocalStore::Lmdb(store) => store.max_bytes(),
183        }
184    }
185
186    async fn stats(&self) -> StoreStats {
187        match self {
188            LocalStore::Fs(store) => match store.stats() {
189                Ok(stats) => StoreStats {
190                    count: stats.count as u64,
191                    bytes: stats.total_bytes,
192                    pinned_count: stats.pinned_count as u64,
193                    pinned_bytes: stats.pinned_bytes,
194                },
195                Err(_) => StoreStats::default(),
196            },
197            #[cfg(feature = "lmdb")]
198            LocalStore::Lmdb(store) => match store.stats() {
199                Ok(stats) => StoreStats {
200                    count: stats.count as u64,
201                    bytes: stats.total_bytes,
202                    pinned_count: 0,
203                    pinned_bytes: 0,
204                },
205                Err(_) => StoreStats::default(),
206            },
207        }
208    }
209
210    async fn evict_if_needed(&self) -> std::result::Result<u64, StoreError> {
211        match self {
212            LocalStore::Fs(store) => store.evict_if_needed().await,
213            #[cfg(feature = "lmdb")]
214            LocalStore::Lmdb(store) => store.evict_if_needed().await,
215        }
216    }
217}
218
219/// Git storage backed by HashTree with configurable persistence
220pub struct GitStorage {
221    store: Arc<LocalStore>,
222    tree: HashTree<LocalStore>,
223    runtime: RuntimeExecutor,
224    /// In-memory state for the current session
225    objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
226    refs: std::sync::RwLock<HashMap<String, String>>,
227    /// Cached root CID (hash + encryption key)
228    root_cid: std::sync::RwLock<Option<Cid>>,
229}
230
231impl GitStorage {
232    /// Open or create a git storage at the given path
233    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
234        let config = Config::load_or_default();
235        let max_size_bytes = config
236            .storage
237            .max_size_gb
238            .saturating_mul(1024 * 1024 * 1024);
239        Self::open_with_max_bytes(path, max_size_bytes)
240    }
241
242    /// Open or create a git storage at the given path with an explicit byte limit.
243    pub fn open_with_max_bytes(path: impl AsRef<Path>, max_size_bytes: u64) -> Result<Self> {
244        let config = Config::load_or_default();
245        Self::open_with_backend_and_max_bytes(path, config.storage.backend, max_size_bytes)
246    }
247
248    pub fn open_with_backend_and_max_bytes(
249        path: impl AsRef<Path>,
250        backend: StorageBackend,
251        max_size_bytes: u64,
252    ) -> Result<Self> {
253        let runtime = match Handle::try_current() {
254            Ok(handle) => RuntimeExecutor::Handle(handle),
255            Err(_) => {
256                let rt = Runtime::new()
257                    .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
258                RuntimeExecutor::Owned(rt)
259            }
260        };
261
262        let store_path = path.as_ref().join("blobs");
263        let store = Arc::new(
264            LocalStore::new_for_backend(&store_path, backend, max_size_bytes)
265                .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
266        );
267
268        // Use encrypted mode (default) - blossom servers require encrypted data
269        let tree = HashTree::new(HashTreeConfig::new(store.clone()));
270
271        Ok(Self {
272            store,
273            tree,
274            runtime,
275            objects: std::sync::RwLock::new(HashMap::new()),
276            refs: std::sync::RwLock::new(HashMap::new()),
277            root_cid: std::sync::RwLock::new(None),
278        })
279    }
280
281    /// Evict old local blobs if storage is over the configured limit.
282    pub fn evict_if_needed(&self) -> Result<u64> {
283        self.runtime
284            .block_on(self.store.evict_if_needed())
285            .map_err(|e| Error::StorageError(format!("evict: {}", e)))
286    }
287
288    /// Write an object, returning its ID
289    fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
290        let oid = obj.id();
291        let key = oid.to_hex();
292
293        let loose = obj.to_loose_format();
294        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
295        encoder.write_all(&loose)?;
296        let compressed = encoder.finish()?;
297
298        let mut objects = self
299            .objects
300            .write()
301            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
302        objects.insert(key, compressed);
303
304        // Invalidate cached root
305        if let Ok(mut root) = self.root_cid.write() {
306            *root = None;
307        }
308
309        Ok(oid)
310    }
311
312    /// Write raw object data (type + content already parsed)
313    pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
314        let obj = GitObject::new(obj_type, content.to_vec());
315        self.write_object(&obj)
316    }
317
318    /// Read an object by ID from in-memory cache
319    #[allow(dead_code)]
320    fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
321        let key = oid.to_hex();
322        let objects = self
323            .objects
324            .read()
325            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
326        let compressed = objects
327            .get(&key)
328            .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
329
330        let mut decoder = ZlibDecoder::new(compressed.as_slice());
331        let mut data = Vec::new();
332        decoder.read_to_end(&mut data)?;
333
334        GitObject::from_loose_format(&data)
335    }
336
337    /// Write a ref
338    pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
339        validate_ref_name(name)?;
340
341        let value = match target {
342            Ref::Direct(oid) => oid.to_hex(),
343            Ref::Symbolic(target) => format!("ref: {}", target),
344        };
345
346        let mut refs = self
347            .refs
348            .write()
349            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
350        refs.insert(name.to_string(), value);
351
352        // Invalidate cached root
353        if let Ok(mut root) = self.root_cid.write() {
354            *root = None;
355        }
356
357        Ok(())
358    }
359
360    /// Read a ref
361    #[allow(dead_code)]
362    pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
363        let refs = self
364            .refs
365            .read()
366            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
367
368        match refs.get(name) {
369            Some(value) => {
370                if let Some(target) = value.strip_prefix("ref: ") {
371                    Ok(Some(Ref::Symbolic(target.to_string())))
372                } else {
373                    let oid = ObjectId::from_hex(value)
374                        .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
375                    Ok(Some(Ref::Direct(oid)))
376                }
377            }
378            None => Ok(None),
379        }
380    }
381
382    /// List all refs
383    #[allow(dead_code)]
384    pub fn list_refs(&self) -> Result<HashMap<String, String>> {
385        let refs = self
386            .refs
387            .read()
388            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
389        Ok(refs.clone())
390    }
391
392    /// Delete a ref
393    pub fn delete_ref(&self, name: &str) -> Result<bool> {
394        let mut refs = self
395            .refs
396            .write()
397            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
398        let existed = refs.remove(name).is_some();
399
400        // Invalidate cached root
401        if let Ok(mut root) = self.root_cid.write() {
402            *root = None;
403        }
404
405        Ok(existed)
406    }
407
408    /// Import a raw git object (already in loose format, zlib compressed)
409    /// Used when fetching existing objects from remote before push
410    pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
411        let mut objects = self
412            .objects
413            .write()
414            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
415        objects.insert(oid.to_string(), compressed_data);
416
417        // Invalidate cached root
418        if let Ok(mut root) = self.root_cid.write() {
419            *root = None;
420        }
421
422        Ok(())
423    }
424
425    /// Import a ref directly (used when loading existing refs from remote)
426    pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
427        let mut refs = self
428            .refs
429            .write()
430            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
431        refs.insert(name.to_string(), value.to_string());
432
433        // Invalidate cached root
434        if let Ok(mut root) = self.root_cid.write() {
435            *root = None;
436        }
437
438        Ok(())
439    }
440
441    /// Check if a ref exists
442    #[cfg(test)]
443    pub fn has_ref(&self, name: &str) -> Result<bool> {
444        let refs = self
445            .refs
446            .read()
447            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
448        Ok(refs.contains_key(name))
449    }
450
451    /// Get count of objects in storage
452    #[cfg(test)]
453    pub fn object_count(&self) -> Result<usize> {
454        let objects = self
455            .objects
456            .read()
457            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
458        Ok(objects.len())
459    }
460
461    /// Get the cached root CID (returns None if tree hasn't been built)
462    #[allow(dead_code)]
463    pub fn get_root_cid(&self) -> Result<Option<Cid>> {
464        let root = self
465            .root_cid
466            .read()
467            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
468        Ok(root.clone())
469    }
470
471    /// Get the default branch name
472    #[allow(dead_code)]
473    pub fn default_branch(&self) -> Result<Option<String>> {
474        let refs = self
475            .refs
476            .read()
477            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
478
479        if let Some(head) = refs.get("HEAD") {
480            if let Some(target) = head.strip_prefix("ref: ") {
481                return Ok(Some(target.to_string()));
482            }
483        }
484        Ok(None)
485    }
486
487    /// Get the tree SHA from a commit object
488    fn get_commit_tree(
489        &self,
490        commit_oid: &str,
491        objects: &HashMap<String, Vec<u8>>,
492    ) -> Option<String> {
493        let compressed = objects.get(commit_oid)?;
494
495        // Decompress the object
496        let mut decoder = ZlibDecoder::new(&compressed[..]);
497        let mut decompressed = Vec::new();
498        decoder.read_to_end(&mut decompressed).ok()?;
499
500        // Parse git object format: "type size\0content"
501        let null_pos = decompressed.iter().position(|&b| b == 0)?;
502        let content = &decompressed[null_pos + 1..];
503
504        // Parse commit content - first line is "tree <sha>"
505        let content_str = std::str::from_utf8(content).ok()?;
506        let first_line = content_str.lines().next()?;
507        first_line
508            .strip_prefix("tree ")
509            .map(|tree_hash| tree_hash.to_string())
510    }
511
512    /// Get git object content (decompressed, without header)
513    fn get_object_content(
514        &self,
515        oid: &str,
516        objects: &HashMap<String, Vec<u8>>,
517    ) -> Option<(ObjectType, Vec<u8>)> {
518        let compressed = objects.get(oid)?;
519
520        // Decompress the object
521        let mut decoder = ZlibDecoder::new(&compressed[..]);
522        let mut decompressed = Vec::new();
523        decoder.read_to_end(&mut decompressed).ok()?;
524
525        // Parse git object format: "type size\0content"
526        let null_pos = decompressed.iter().position(|&b| b == 0)?;
527        let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
528        let obj_type = if header.starts_with("blob") {
529            ObjectType::Blob
530        } else if header.starts_with("tree") {
531            ObjectType::Tree
532        } else if header.starts_with("commit") {
533            ObjectType::Commit
534        } else if header.starts_with("tag") {
535            ObjectType::Tag
536        } else {
537            return None;
538        };
539        let content = decompressed[null_pos + 1..].to_vec();
540        Some((obj_type, content))
541    }
542
543    fn peel_tag_target(&self, oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<String> {
544        let (obj_type, content) = self.get_object_content(oid, objects)?;
545        if obj_type != ObjectType::Tag {
546            return Some(oid.to_string());
547        }
548
549        let target = std::str::from_utf8(&content)
550            .ok()?
551            .lines()
552            .find_map(|line| line.strip_prefix("object "))
553            .map(str::trim)?
554            .to_string();
555
556        match self.get_object_content(&target, objects)?.0 {
557            ObjectType::Tag => self.peel_tag_target(&target, objects),
558            _ => Some(target),
559        }
560    }
561
562    fn build_info_refs_content(
563        &self,
564        refs: &HashMap<String, String>,
565        objects: &HashMap<String, Vec<u8>>,
566    ) -> String {
567        let mut lines = Vec::new();
568
569        for (name, value) in refs {
570            if name == "HEAD" {
571                continue;
572            }
573
574            let oid = value.trim().to_string();
575            lines.push((name.clone(), oid.clone()));
576
577            if name.starts_with("refs/tags/") {
578                if let Some(peeled) = self.peel_tag_target(&oid, objects) {
579                    if peeled != oid {
580                        lines.push((format!("{}^{{}}", name), peeled));
581                    }
582                }
583            }
584        }
585
586        lines.sort_by(|a, b| a.0.cmp(&b.0));
587
588        let mut content = String::new();
589        for (name, oid) in lines {
590            content.push_str(&oid);
591            content.push('\t');
592            content.push_str(&name);
593            content.push('\n');
594        }
595        content
596    }
597
598    async fn build_info_dir(
599        &self,
600        refs: &HashMap<String, String>,
601        objects: &HashMap<String, Vec<u8>>,
602    ) -> Result<Cid> {
603        let info_refs = self.build_info_refs_content(refs, objects);
604        let (info_refs_cid, info_refs_size) = self
605            .tree
606            .put(info_refs.as_bytes())
607            .await
608            .map_err(|e| Error::StorageError(format!("put info/refs: {}", e)))?;
609
610        self.tree
611            .put_directory(vec![
612                DirEntry::from_cid("refs", &info_refs_cid).with_size(info_refs_size)
613            ])
614            .await
615            .map_err(|e| Error::StorageError(format!("put info dir: {}", e)))
616    }
617
618    /// Build the hashtree and return the root CID (hash + encryption key)
619    pub fn build_tree(&self) -> Result<Cid> {
620        // Check if we have a cached root
621        if let Ok(root) = self.root_cid.read() {
622            if let Some(ref cid) = *root {
623                return Ok(cid.clone());
624            }
625        }
626
627        let objects = self
628            .objects
629            .read()
630            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
631        let refs = self
632            .refs
633            .read()
634            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
635
636        // Get default branch from HEAD or find first branch ref
637        let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
638            let branch = head.strip_prefix("ref: ").map(String::from);
639            let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
640            (branch, sha)
641        } else {
642            // No HEAD ref - find first refs/heads/* ref directly
643            let mut branch_info: Option<(String, String)> = None;
644            for (ref_name, sha) in refs.iter() {
645                if ref_name.starts_with("refs/heads/") {
646                    branch_info = Some((ref_name.clone(), sha.clone()));
647                    break;
648                }
649            }
650            match branch_info {
651                Some((branch, sha)) => (Some(branch), Some(sha)),
652                None => (None, None),
653            }
654        };
655
656        // Get tree SHA from commit
657        let tree_sha = commit_sha
658            .as_ref()
659            .and_then(|sha| self.get_commit_tree(sha, &objects));
660
661        // Clone objects for async block
662        let objects_clone = objects.clone();
663
664        let root_cid = self.runtime.block_on(async {
665            // Build objects directory
666            let objects_cid = self.build_objects_dir(&objects).await?;
667
668            // Build refs directory
669            let refs_cid = self.build_refs_dir(&refs).await?;
670
671            // Build dumb-HTTP info directory
672            let info_cid = self.build_info_dir(&refs, &objects_clone).await?;
673
674            // Build HEAD file - use default_branch if no explicit HEAD
675            // Git expects HEAD to end with newline, so add it if missing
676            let head_content = refs.get("HEAD")
677                .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
678                .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
679                .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
680            debug!("HEAD content: {:?}", head_content);
681            let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
682                .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
683            debug!("HEAD hash: {}", hex::encode(head_cid.hash));
684
685            // Build .git directory - use from_cid to preserve encryption keys
686            let mut git_entries = vec![
687                DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
688                DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir),
689                DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
690                DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
691            ];
692
693            // Add config if we have a default branch
694            if let Some(ref branch) = default_branch {
695                let config = format!(
696                    "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
697                    branch.trim_start_matches("refs/heads/")
698                );
699                let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
700                    .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
701                git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
702            }
703
704            // Build and add index file if we have a tree SHA
705            if let Some(ref tree_oid) = tree_sha {
706                match self.build_index_file(tree_oid, &objects_clone) {
707                    Ok(index_data) => {
708                        let (index_cid, index_size) = self.tree.put(&index_data).await
709                            .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
710                        git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
711                        info!("Added git index file ({} bytes)", index_data.len());
712                    }
713                    Err(e) => {
714                        debug!("Failed to build git index file: {} - continuing without index", e);
715                    }
716                }
717            }
718
719            let git_cid = self.tree.put_directory(git_entries).await
720                .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
721
722            // Build root entries starting with .git
723            // Use from_cid to preserve the encryption key
724            let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
725
726            // Add working tree files if we have a tree SHA
727            if let Some(ref tree_oid) = tree_sha {
728                let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
729                root_entries.extend(working_tree_entries);
730                info!("Added {} working tree entries to root", root_entries.len() - 1);
731            }
732
733            // Sort entries for deterministic ordering
734            root_entries.sort_by(|a, b| a.name.cmp(&b.name));
735
736            let root_cid = self.tree.put_directory(root_entries).await
737                .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
738
739            info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
740                hex::encode(root_cid.hash),
741                root_cid.key.is_some(),
742                hex::encode(git_cid.hash));
743
744            Ok::<Cid, Error>(root_cid)
745        })?;
746
747        // Cache the root CID
748        if let Ok(mut root) = self.root_cid.write() {
749            *root = Some(root_cid.clone());
750        }
751
752        Ok(root_cid)
753    }
754
755    /// Build working tree entries from a git tree object
756    async fn build_working_tree_entries(
757        &self,
758        tree_oid: &str,
759        objects: &HashMap<String, Vec<u8>>,
760    ) -> Result<Vec<DirEntry>> {
761        let mut entries = Vec::new();
762
763        // Get tree content
764        let (obj_type, content) = self
765            .get_object_content(tree_oid, objects)
766            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
767
768        if obj_type != ObjectType::Tree {
769            return Err(Error::InvalidObjectType(format!(
770                "expected tree, got {:?}",
771                obj_type
772            )));
773        }
774
775        // Parse tree entries
776        let tree_entries = parse_tree(&content)?;
777
778        for entry in tree_entries {
779            let oid_hex = entry.oid.to_hex();
780
781            if entry.is_tree() {
782                // Recursively build subdirectory
783                let sub_entries = self
784                    .build_working_tree_entries_boxed(&oid_hex, objects)
785                    .await?;
786
787                // Create subdirectory in hashtree
788                let dir_cid =
789                    self.tree.put_directory(sub_entries).await.map_err(|e| {
790                        Error::StorageError(format!("put dir {}: {}", entry.name, e))
791                    })?;
792
793                // Use from_cid to preserve encryption key
794                entries
795                    .push(DirEntry::from_cid(&entry.name, &dir_cid).with_link_type(LinkType::Dir));
796            } else {
797                // Get blob content
798                if let Some((ObjectType::Blob, blob_content)) =
799                    self.get_object_content(&oid_hex, objects)
800                {
801                    // Use put() instead of put_blob() to chunk large files
802                    let (cid, size) = self.tree.put(&blob_content).await.map_err(|e| {
803                        Error::StorageError(format!("put blob {}: {}", entry.name, e))
804                    })?;
805
806                    // Use from_cid to preserve encryption key
807                    entries.push(DirEntry::from_cid(&entry.name, &cid).with_size(size));
808                }
809            }
810        }
811
812        // Sort for deterministic ordering
813        entries.sort_by(|a, b| a.name.cmp(&b.name));
814
815        Ok(entries)
816    }
817
818    /// Boxed version for async recursion
819    fn build_working_tree_entries_boxed<'a>(
820        &'a self,
821        tree_oid: &'a str,
822        objects: &'a HashMap<String, Vec<u8>>,
823    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
824        Box::pin(self.build_working_tree_entries(tree_oid, objects))
825    }
826
827    /// Build the objects directory using HashTree
828    async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
829        let mut top_entries = Vec::new();
830
831        if !objects.is_empty() {
832            // Group objects by first 2 characters of SHA (git loose object structure)
833            // Git expects objects/XX/YYYYYY... where XX is first 2 hex chars
834            let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
835            for (oid, data) in objects {
836                let prefix = &oid[..2];
837                let suffix = &oid[2..];
838                buckets
839                    .entry(prefix.to_string())
840                    .or_default()
841                    .push((suffix.to_string(), data.clone()));
842            }
843
844            // Build subdirectories for each prefix
845            for (prefix, objs) in buckets {
846                let mut sub_entries = Vec::new();
847                for (suffix, data) in objs {
848                    // Use put() instead of put_blob() to chunk large objects
849                    // Git blobs can be >5MB which exceeds blossom server limits
850                    let (cid, size) = self.tree.put(&data).await.map_err(|e| {
851                        Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e))
852                    })?;
853                    // Use from_cid to preserve encryption key
854                    sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
855                }
856                // Sort for deterministic ordering
857                sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
858
859                let sub_cid =
860                    self.tree.put_directory(sub_entries).await.map_err(|e| {
861                        Error::StorageError(format!("put objects/{}: {}", prefix, e))
862                    })?;
863                top_entries
864                    .push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
865            }
866        }
867
868        let (packs_cid, packs_size) = self
869            .tree
870            .put(b"")
871            .await
872            .map_err(|e| Error::StorageError(format!("put objects/info/packs: {}", e)))?;
873        let info_cid = self
874            .tree
875            .put_directory(vec![
876                DirEntry::from_cid("packs", &packs_cid).with_size(packs_size)
877            ])
878            .await
879            .map_err(|e| Error::StorageError(format!("put objects/info: {}", e)))?;
880        top_entries.push(DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir));
881
882        // Sort for deterministic ordering
883        top_entries.sort_by(|a, b| a.name.cmp(&b.name));
884
885        let entry_count = top_entries.len();
886        let cid = self
887            .tree
888            .put_directory(top_entries)
889            .await
890            .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
891
892        debug!(
893            "Built objects dir with {} entries: {}",
894            entry_count,
895            hex::encode(cid.hash)
896        );
897        Ok(cid)
898    }
899
900    /// Build the refs directory using HashTree
901    async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
902        // Group refs by category (heads, tags, etc.)
903        let mut groups: HashMap<String, Vec<(String, String)>> = HashMap::new();
904
905        for (ref_name, value) in refs {
906            let parts: Vec<&str> = ref_name.split('/').collect();
907            if parts.len() >= 3 && parts[0] == "refs" {
908                let category = parts[1].to_string();
909                let name = parts[2..].join("/");
910                groups
911                    .entry(category)
912                    .or_default()
913                    .push((name, value.clone()));
914            }
915        }
916
917        let mut ref_entries = Vec::new();
918
919        for (category, refs_in_category) in groups {
920            let mut cat_entries = Vec::new();
921            for (name, value) in refs_in_category {
922                // Use put() to get Cid with encryption key
923                let (cid, _size) = self
924                    .tree
925                    .put(value.as_bytes())
926                    .await
927                    .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
928                debug!(
929                    "refs/{}/{} -> blob {}",
930                    category,
931                    name,
932                    hex::encode(cid.hash)
933                );
934                cat_entries.push(DirEntry::from_cid(name, &cid));
935            }
936
937            cat_entries.sort_by(|a, b| a.name.cmp(&b.name));
938
939            let cat_cid = self
940                .tree
941                .put_directory(cat_entries)
942                .await
943                .map_err(|e| Error::StorageError(format!("put {} dir: {}", category, e)))?;
944            debug!("refs/{} dir -> {}", category, hex::encode(cat_cid.hash));
945            ref_entries.push(DirEntry::from_cid(category, &cat_cid).with_link_type(LinkType::Dir));
946        }
947
948        if ref_entries.is_empty() {
949            // Return empty directory Cid
950            let empty_cid = self
951                .tree
952                .put_directory(vec![])
953                .await
954                .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
955            return Ok(empty_cid);
956        }
957
958        ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
959
960        let refs_cid = self
961            .tree
962            .put_directory(ref_entries)
963            .await
964            .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
965        debug!("refs dir -> {}", hex::encode(refs_cid.hash));
966        Ok(refs_cid)
967    }
968
969    /// Build git index file from tree entries
970    /// Returns the raw binary content of the index file
971    fn build_index_file(
972        &self,
973        tree_oid: &str,
974        objects: &HashMap<String, Vec<u8>>,
975    ) -> Result<Vec<u8>> {
976        // Collect all file entries from the tree (recursively)
977        let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); // (path, sha1, mode, size)
978        self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
979
980        // Sort entries by path (git index requirement)
981        entries.sort_by(|a, b| a.0.cmp(&b.0));
982
983        let entry_count = entries.len() as u32;
984        debug!("Building git index with {} entries", entry_count);
985
986        // Build index content
987        let mut index_data = Vec::new();
988
989        // Header: DIRC + version 2 + entry count
990        index_data.extend_from_slice(b"DIRC");
991        index_data.extend_from_slice(&2u32.to_be_bytes()); // version 2
992        index_data.extend_from_slice(&entry_count.to_be_bytes());
993
994        // Current time for ctime/mtime (doesn't matter much for our use case)
995        let now_sec = std::time::SystemTime::now()
996            .duration_since(std::time::UNIX_EPOCH)
997            .unwrap_or_default()
998            .as_secs() as u32;
999
1000        for (path, sha1, mode, size) in &entries {
1001            let entry_start = index_data.len();
1002
1003            // ctime sec, nsec
1004            index_data.extend_from_slice(&now_sec.to_be_bytes());
1005            index_data.extend_from_slice(&0u32.to_be_bytes());
1006            // mtime sec, nsec
1007            index_data.extend_from_slice(&now_sec.to_be_bytes());
1008            index_data.extend_from_slice(&0u32.to_be_bytes());
1009            // dev, ino (use 0)
1010            index_data.extend_from_slice(&0u32.to_be_bytes());
1011            index_data.extend_from_slice(&0u32.to_be_bytes());
1012            // mode
1013            index_data.extend_from_slice(&mode.to_be_bytes());
1014            // uid, gid (use 0)
1015            index_data.extend_from_slice(&0u32.to_be_bytes());
1016            index_data.extend_from_slice(&0u32.to_be_bytes());
1017            // file size
1018            index_data.extend_from_slice(&size.to_be_bytes());
1019            // SHA-1
1020            index_data.extend_from_slice(sha1);
1021            // flags: path length (max 0xFFF) in low 12 bits
1022            let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
1023            index_data.extend_from_slice(&path_len.to_be_bytes());
1024            // path (NUL-terminated)
1025            index_data.extend_from_slice(path.as_bytes());
1026            index_data.push(0); // NUL terminator
1027
1028            // Pad to 8-byte boundary relative to entry start
1029            let entry_len = index_data.len() - entry_start;
1030            let padding = (8 - (entry_len % 8)) % 8;
1031            index_data.extend(std::iter::repeat_n(0, padding));
1032        }
1033
1034        // Calculate SHA-1 checksum of everything and append
1035        let mut hasher = Sha1::new();
1036        hasher.update(&index_data);
1037        let checksum = hasher.finalize();
1038        index_data.extend_from_slice(&checksum);
1039
1040        debug!(
1041            "Built git index: {} bytes, {} entries",
1042            index_data.len(),
1043            entry_count
1044        );
1045        Ok(index_data)
1046    }
1047
1048    /// Collect file entries from a git tree for building the index
1049    fn collect_tree_entries_for_index(
1050        &self,
1051        tree_oid: &str,
1052        objects: &HashMap<String, Vec<u8>>,
1053        prefix: &str,
1054        entries: &mut Vec<(String, [u8; 20], u32, u32)>,
1055    ) -> Result<()> {
1056        let (obj_type, content) = self
1057            .get_object_content(tree_oid, objects)
1058            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
1059
1060        if obj_type != ObjectType::Tree {
1061            return Err(Error::InvalidObjectType(format!(
1062                "expected tree, got {:?}",
1063                obj_type
1064            )));
1065        }
1066
1067        let tree_entries = parse_tree(&content)?;
1068
1069        for entry in tree_entries {
1070            let path = if prefix.is_empty() {
1071                entry.name.clone()
1072            } else {
1073                format!("{}/{}", prefix, entry.name)
1074            };
1075
1076            let oid_hex = entry.oid.to_hex();
1077
1078            if entry.is_tree() {
1079                // Recursively process subdirectory
1080                self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
1081            } else {
1082                // Get blob content for size and SHA-1
1083                if let Some((ObjectType::Blob, blob_content)) =
1084                    self.get_object_content(&oid_hex, objects)
1085                {
1086                    // Convert hex SHA to bytes
1087                    let mut sha1_bytes = [0u8; 20];
1088                    if let Ok(bytes) = hex::decode(&oid_hex) {
1089                        if bytes.len() == 20 {
1090                            sha1_bytes.copy_from_slice(&bytes);
1091                        }
1092                    }
1093
1094                    // Mode: use entry.mode or default to regular file
1095                    let mode = entry.mode;
1096                    let size = blob_content.len() as u32;
1097
1098                    entries.push((path, sha1_bytes, mode, size));
1099                }
1100            }
1101        }
1102
1103        Ok(())
1104    }
1105
1106    /// Get the underlying store
1107    pub fn store(&self) -> &Arc<LocalStore> {
1108        &self.store
1109    }
1110
1111    /// Get the HashTree for direct access
1112    #[allow(dead_code)]
1113    pub fn hashtree(&self) -> &HashTree<LocalStore> {
1114        &self.tree
1115    }
1116
1117    /// Push all blobs to file servers
1118    #[allow(dead_code)]
1119    pub fn push_to_file_servers(
1120        &self,
1121        blossom: &hashtree_blossom::BlossomClient,
1122    ) -> Result<(usize, usize)> {
1123        let hashes = self
1124            .store
1125            .list()
1126            .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
1127
1128        info!("Pushing {} blobs to file servers", hashes.len());
1129
1130        let mut uploaded = 0;
1131        let mut existed = 0;
1132
1133        self.runtime.block_on(async {
1134            for hash in &hashes {
1135                let hex_hash = hex::encode(hash);
1136                let data = match self.store.get_sync(hash) {
1137                    Ok(Some(d)) => d,
1138                    _ => continue,
1139                };
1140
1141                match blossom.upload_if_missing(&data).await {
1142                    Ok((_, true)) => {
1143                        debug!("Uploaded {}", &hex_hash[..12]);
1144                        uploaded += 1;
1145                    }
1146                    Ok((_, false)) => {
1147                        existed += 1;
1148                    }
1149                    Err(e) => {
1150                        debug!("Failed to upload {}: {}", &hex_hash[..12], e);
1151                    }
1152                }
1153            }
1154        });
1155
1156        info!(
1157            "Upload complete: {} new, {} already existed",
1158            uploaded, existed
1159        );
1160        Ok((uploaded, existed))
1161    }
1162
1163    /// Clear all state (for testing or re-initialization)
1164    #[allow(dead_code)]
1165    pub fn clear(&self) -> Result<()> {
1166        let mut objects = self
1167            .objects
1168            .write()
1169            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1170        let mut refs = self
1171            .refs
1172            .write()
1173            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1174        let mut root = self
1175            .root_cid
1176            .write()
1177            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1178
1179        objects.clear();
1180        refs.clear();
1181        *root = None;
1182        Ok(())
1183    }
1184}
1185
1186#[cfg(test)]
1187mod tests {
1188    use super::*;
1189    use hashtree_core::store::Store;
1190    use hashtree_core::LinkType;
1191    use std::io::{Read, Write};
1192    use std::net::{TcpListener, TcpStream};
1193    use std::path::Path;
1194    use std::process::{Child, Command, Stdio};
1195    use std::time::{Duration, Instant};
1196    use tempfile::TempDir;
1197
1198    fn create_test_storage() -> (GitStorage, TempDir) {
1199        let temp_dir = TempDir::new().unwrap();
1200        let storage = GitStorage::open(temp_dir.path()).unwrap();
1201        (storage, temp_dir)
1202    }
1203
1204    fn create_test_storage_with_limit(max_size_bytes: u64) -> (GitStorage, TempDir) {
1205        let temp_dir = TempDir::new().unwrap();
1206        let storage = GitStorage::open_with_backend_and_max_bytes(
1207            temp_dir.path(),
1208            StorageBackend::Fs,
1209            max_size_bytes,
1210        )
1211        .unwrap();
1212        (storage, temp_dir)
1213    }
1214
1215    fn local_total_bytes(storage: &GitStorage) -> u64 {
1216        match storage.store().as_ref() {
1217            LocalStore::Fs(store) => store.stats().unwrap().total_bytes,
1218            #[cfg(feature = "lmdb")]
1219            LocalStore::Lmdb(store) => store.stats().unwrap().total_bytes,
1220        }
1221    }
1222
1223    fn write_test_commit(storage: &GitStorage) -> ObjectId {
1224        let blob_oid = storage
1225            .write_raw_object(ObjectType::Blob, b"hello from hashtree\n")
1226            .unwrap();
1227
1228        let mut tree_content = Vec::new();
1229        tree_content.extend_from_slice(b"100644 README.md\0");
1230        tree_content.extend_from_slice(&hex::decode(blob_oid.to_hex()).unwrap());
1231        let tree_oid = storage
1232            .write_raw_object(ObjectType::Tree, &tree_content)
1233            .unwrap();
1234
1235        let commit_content = format!(
1236            "tree {}\nauthor Test User <test@example.com> 0 +0000\ncommitter Test User <test@example.com> 0 +0000\n\nInitial commit\n",
1237            tree_oid.to_hex()
1238        );
1239        storage
1240            .write_raw_object(ObjectType::Commit, commit_content.as_bytes())
1241            .unwrap()
1242    }
1243
1244    fn export_tree_to_fs<S: Store>(
1245        runtime: &RuntimeExecutor,
1246        tree: &HashTree<S>,
1247        cid: &Cid,
1248        dst: &Path,
1249    ) {
1250        std::fs::create_dir_all(dst).unwrap();
1251        let entries = runtime.block_on(tree.list_directory(cid)).unwrap();
1252        for entry in entries {
1253            let entry_cid = Cid {
1254                hash: entry.hash,
1255                key: entry.key,
1256            };
1257            let path = dst.join(&entry.name);
1258            match entry.link_type {
1259                LinkType::Dir => export_tree_to_fs(runtime, tree, &entry_cid, &path),
1260                LinkType::Blob | LinkType::File => {
1261                    let data = runtime
1262                        .block_on(tree.get(&entry_cid, None))
1263                        .unwrap()
1264                        .unwrap();
1265                    if let Some(parent) = path.parent() {
1266                        std::fs::create_dir_all(parent).unwrap();
1267                    }
1268                    std::fs::write(path, data).unwrap();
1269                }
1270            }
1271        }
1272    }
1273
1274    fn spawn_http_server(root: &Path, port: u16) -> Child {
1275        Command::new("python3")
1276            .args([
1277                "-m",
1278                "http.server",
1279                &port.to_string(),
1280                "--bind",
1281                "127.0.0.1",
1282            ])
1283            .current_dir(root)
1284            .stdout(Stdio::null())
1285            .stderr(Stdio::null())
1286            .spawn()
1287            .expect("spawn python http server")
1288    }
1289
1290    fn wait_for_http_server(server: &mut Child, port: u16, path: &str) {
1291        let deadline = Instant::now() + Duration::from_secs(5);
1292
1293        loop {
1294            if let Some(status) = server.try_wait().expect("check http server status") {
1295                panic!("python http server exited before becoming ready: {status}");
1296            }
1297
1298            if let Ok(mut stream) = TcpStream::connect(("127.0.0.1", port)) {
1299                stream
1300                    .set_read_timeout(Some(Duration::from_millis(200)))
1301                    .expect("set read timeout");
1302                stream
1303                    .set_write_timeout(Some(Duration::from_millis(200)))
1304                    .expect("set write timeout");
1305                let request = format!(
1306                    "GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n"
1307                );
1308                if stream.write_all(request.as_bytes()).is_ok() {
1309                    let mut response = String::new();
1310                    if stream.read_to_string(&mut response).is_ok()
1311                        && response.starts_with("HTTP/1.0 200")
1312                    {
1313                        return;
1314                    }
1315                }
1316            }
1317
1318            if Instant::now() >= deadline {
1319                panic!("python http server did not become ready on port {port}");
1320            }
1321            std::thread::sleep(Duration::from_millis(50));
1322        }
1323    }
1324
1325    #[test]
1326    fn test_import_ref() {
1327        let (storage, _temp) = create_test_storage();
1328
1329        // Import a ref
1330        storage
1331            .import_ref("refs/heads/main", "abc123def456")
1332            .unwrap();
1333
1334        // Check it exists
1335        assert!(storage.has_ref("refs/heads/main").unwrap());
1336
1337        // Check value via list_refs
1338        let refs = storage.list_refs().unwrap();
1339        assert_eq!(
1340            refs.get("refs/heads/main"),
1341            Some(&"abc123def456".to_string())
1342        );
1343    }
1344
1345    #[test]
1346    fn test_import_multiple_refs_preserves_all() {
1347        let (storage, _temp) = create_test_storage();
1348
1349        // Import multiple refs (simulating loading from remote)
1350        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1351        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1352        storage
1353            .import_ref("refs/heads/feature", "sha_feature")
1354            .unwrap();
1355
1356        // All should exist
1357        assert!(storage.has_ref("refs/heads/main").unwrap());
1358        assert!(storage.has_ref("refs/heads/dev").unwrap());
1359        assert!(storage.has_ref("refs/heads/feature").unwrap());
1360
1361        // Now write a new ref (simulating push)
1362        storage
1363            .write_ref(
1364                "refs/heads/new-branch",
1365                &Ref::Direct(
1366                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1367                ),
1368            )
1369            .unwrap();
1370
1371        // Original refs should still exist
1372        let refs = storage.list_refs().unwrap();
1373        assert_eq!(refs.len(), 4);
1374        assert!(refs.contains_key("refs/heads/main"));
1375        assert!(refs.contains_key("refs/heads/dev"));
1376        assert!(refs.contains_key("refs/heads/feature"));
1377        assert!(refs.contains_key("refs/heads/new-branch"));
1378    }
1379
1380    #[test]
1381    fn test_import_compressed_object() {
1382        let (storage, _temp) = create_test_storage();
1383
1384        // Create a fake compressed object
1385        let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; // fake zlib data
1386
1387        storage
1388            .import_compressed_object("abc123def456", fake_compressed.clone())
1389            .unwrap();
1390
1391        // Check object count
1392        assert_eq!(storage.object_count().unwrap(), 1);
1393    }
1394
1395    #[test]
1396    fn test_write_ref_overwrites_imported() {
1397        let (storage, _temp) = create_test_storage();
1398
1399        // Import a ref
1400        storage.import_ref("refs/heads/main", "old_sha").unwrap();
1401
1402        // Write same ref with new value
1403        storage
1404            .write_ref(
1405                "refs/heads/main",
1406                &Ref::Direct(
1407                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1408                ),
1409            )
1410            .unwrap();
1411
1412        // Should have new value
1413        let refs = storage.list_refs().unwrap();
1414        assert_eq!(
1415            refs.get("refs/heads/main"),
1416            Some(&"0123456789abcdef0123456789abcdef01234567".to_string())
1417        );
1418    }
1419
1420    #[test]
1421    fn test_delete_ref_preserves_others() {
1422        let (storage, _temp) = create_test_storage();
1423
1424        // Import multiple refs
1425        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1426        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1427
1428        // Delete one
1429        storage.delete_ref("refs/heads/dev").unwrap();
1430
1431        // Other should still exist
1432        assert!(storage.has_ref("refs/heads/main").unwrap());
1433        assert!(!storage.has_ref("refs/heads/dev").unwrap());
1434    }
1435
1436    #[test]
1437    fn test_clear_removes_all() {
1438        let (storage, _temp) = create_test_storage();
1439
1440        // Import refs and objects
1441        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1442        storage
1443            .import_compressed_object("obj1", vec![1, 2, 3])
1444            .unwrap();
1445
1446        // Clear
1447        storage.clear().unwrap();
1448
1449        // All gone
1450        assert!(!storage.has_ref("refs/heads/main").unwrap());
1451        assert_eq!(storage.object_count().unwrap(), 0);
1452    }
1453
1454    #[test]
1455    fn test_evict_if_needed_respects_configured_limit() {
1456        let (storage, _temp) = create_test_storage_with_limit(1_024);
1457
1458        storage
1459            .write_raw_object(ObjectType::Blob, &vec![b'a'; 900])
1460            .unwrap();
1461        storage
1462            .write_raw_object(ObjectType::Blob, &vec![b'b'; 900])
1463            .unwrap();
1464        storage
1465            .write_ref(
1466                "refs/heads/main",
1467                &Ref::Direct(
1468                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1469                ),
1470            )
1471            .unwrap();
1472
1473        storage.build_tree().unwrap();
1474
1475        let before = local_total_bytes(&storage);
1476        assert!(before > 1_024);
1477
1478        let freed = storage.evict_if_needed().unwrap();
1479        assert!(freed > 0);
1480
1481        let after = local_total_bytes(&storage);
1482        assert!(after <= 1_024);
1483    }
1484
1485    #[test]
1486    fn test_build_tree_adds_dumb_http_metadata() {
1487        let (storage, _temp) = create_test_storage();
1488        let commit_oid = write_test_commit(&storage);
1489        let tag_content = format!(
1490            "object {}\ntype commit\ntag v1.0.0\ntagger Test User <test@example.com> 0 +0000\n\nrelease\n",
1491            commit_oid.to_hex()
1492        );
1493        let tag_oid = storage
1494            .write_raw_object(ObjectType::Tag, tag_content.as_bytes())
1495            .unwrap();
1496
1497        storage
1498            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1499            .unwrap();
1500        storage
1501            .write_ref("refs/tags/v1.0.0", &Ref::Direct(tag_oid))
1502            .unwrap();
1503        storage
1504            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1505            .unwrap();
1506
1507        let root_cid = storage.build_tree().unwrap();
1508
1509        let info_refs_cid = storage
1510            .runtime
1511            .block_on(storage.tree.resolve_path(&root_cid, ".git/info/refs"))
1512            .unwrap()
1513            .expect("info/refs exists");
1514        let info_refs = storage
1515            .runtime
1516            .block_on(storage.tree.get(&info_refs_cid, None))
1517            .unwrap()
1518            .unwrap();
1519        let info_refs = String::from_utf8(info_refs).unwrap();
1520
1521        assert_eq!(
1522            info_refs,
1523            format!(
1524                "{commit}\trefs/heads/main\n{tag}\trefs/tags/v1.0.0\n{commit}\trefs/tags/v1.0.0^{{}}\n",
1525                commit = commit_oid.to_hex(),
1526                tag = tag_oid.to_hex()
1527            )
1528        );
1529
1530        let packs_cid = storage
1531            .runtime
1532            .block_on(
1533                storage
1534                    .tree
1535                    .resolve_path(&root_cid, ".git/objects/info/packs"),
1536            )
1537            .unwrap()
1538            .expect("objects/info/packs exists");
1539        let packs = storage
1540            .runtime
1541            .block_on(storage.tree.get(&packs_cid, None))
1542            .unwrap()
1543            .unwrap();
1544        assert!(packs.is_empty(), "objects/info/packs should be empty");
1545    }
1546
1547    #[test]
1548    fn test_materialized_tree_supports_static_http_clone_from_git_dir() {
1549        let (storage, _temp) = create_test_storage();
1550        let commit_oid = write_test_commit(&storage);
1551        storage
1552            .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1553            .unwrap();
1554        storage
1555            .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1556            .unwrap();
1557
1558        let root_cid = storage.build_tree().unwrap();
1559        let export_dir = TempDir::new().unwrap();
1560        let repo_dir = export_dir.path().join("repo");
1561        export_tree_to_fs(&storage.runtime, &storage.tree, &root_cid, &repo_dir);
1562
1563        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
1564        let port = listener.local_addr().unwrap().port();
1565        drop(listener);
1566
1567        let mut server = spawn_http_server(export_dir.path(), port);
1568        wait_for_http_server(&mut server, port, "/repo/.git/HEAD");
1569
1570        let clone_dir = TempDir::new().unwrap();
1571        let clone_path = clone_dir.path().join("clone");
1572        let output = Command::new("git")
1573            .args([
1574                "clone",
1575                &format!("http://127.0.0.1:{port}/repo/.git", port = port),
1576                clone_path.to_str().unwrap(),
1577            ])
1578            .output()
1579            .unwrap();
1580
1581        let _ = server.kill();
1582        let _ = server.wait();
1583
1584        assert!(
1585            output.status.success(),
1586            "git clone failed: {}",
1587            String::from_utf8_lossy(&output.stderr)
1588        );
1589        assert_eq!(
1590            std::fs::read_to_string(clone_path.join("README.md")).unwrap(),
1591            "hello from hashtree\n"
1592        );
1593    }
1594}