git_remote_htree/git/
storage.rs

1//! Hashtree-backed git object and ref storage with configurable persistence
2//!
3//! Stores git objects and refs in a hashtree merkle tree:
4//!   root/
5//!     .git/
6//!       HEAD -> "ref: refs/heads/main"
7//!       refs/heads/main -> <commit-sha1>
8//!       objects/XX/YYYY... -> zlib-compressed loose object (standard git layout)
9//!
10//! The root hash (SHA-256) is the content-addressed identifier for the entire repo state.
11
12use flate2::read::ZlibDecoder;
13use flate2::write::ZlibEncoder;
14use flate2::Compression;
15use hashtree_config::{Config, StorageBackend};
16use hashtree_core::store::{Store, StoreError};
17use hashtree_core::types::Hash;
18use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
19use hashtree_fs::FsBlobStore;
20#[cfg(feature = "lmdb")]
21use hashtree_lmdb::LmdbBlobStore;
22use sha1::{Sha1, Digest};
23use std::collections::HashMap;
24use std::io::{Read, Write};
25use std::path::Path;
26use std::sync::Arc;
27use tokio::runtime::{Handle, Runtime};
28use tracing::{debug, info, warn};
29
30use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
31use super::refs::{validate_ref_name, Ref};
32use super::{Error, Result};
33
34/// Box type for async recursion
35type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
36
37/// Runtime executor - either owns a runtime or reuses an existing one
38enum RuntimeExecutor {
39    Owned(Runtime),
40    Handle(Handle),
41}
42
43impl RuntimeExecutor {
44    fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
45        match self {
46            RuntimeExecutor::Owned(rt) => rt.block_on(f),
47            RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
48        }
49    }
50}
51
52/// Local blob store - wraps either FsBlobStore or LmdbBlobStore
53pub enum LocalStore {
54    Fs(FsBlobStore),
55    #[cfg(feature = "lmdb")]
56    Lmdb(LmdbBlobStore),
57}
58
59impl LocalStore {
60    /// Create a new local store based on config
61    pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
62        let config = Config::load_or_default();
63        match config.storage.backend {
64            StorageBackend::Fs => {
65                Ok(LocalStore::Fs(FsBlobStore::new(path)?))
66            }
67            #[cfg(feature = "lmdb")]
68            StorageBackend::Lmdb => {
69                Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?))
70            }
71            #[cfg(not(feature = "lmdb"))]
72            StorageBackend::Lmdb => {
73                warn!("LMDB backend requested but lmdb feature not enabled, using filesystem storage");
74                Ok(LocalStore::Fs(FsBlobStore::new(path)?))
75            }
76        }
77    }
78
79    /// List all hashes in the store
80    pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
81        match self {
82            LocalStore::Fs(store) => store.list(),
83            #[cfg(feature = "lmdb")]
84            LocalStore::Lmdb(store) => store.list(),
85        }
86    }
87
88    /// Sync get operation
89    pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
90        match self {
91            LocalStore::Fs(store) => store.get_sync(hash),
92            #[cfg(feature = "lmdb")]
93            LocalStore::Lmdb(store) => store.get_sync(hash),
94        }
95    }
96}
97
98#[async_trait::async_trait]
99impl Store for LocalStore {
100    async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
101        match self {
102            LocalStore::Fs(store) => store.put(hash, data).await,
103            #[cfg(feature = "lmdb")]
104            LocalStore::Lmdb(store) => store.put(hash, data).await,
105        }
106    }
107
108    async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
109        match self {
110            LocalStore::Fs(store) => store.get(hash).await,
111            #[cfg(feature = "lmdb")]
112            LocalStore::Lmdb(store) => store.get(hash).await,
113        }
114    }
115
116    async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
117        match self {
118            LocalStore::Fs(store) => store.has(hash).await,
119            #[cfg(feature = "lmdb")]
120            LocalStore::Lmdb(store) => store.has(hash).await,
121        }
122    }
123
124    async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
125        match self {
126            LocalStore::Fs(store) => store.delete(hash).await,
127            #[cfg(feature = "lmdb")]
128            LocalStore::Lmdb(store) => store.delete(hash).await,
129        }
130    }
131}
132
133/// Git storage backed by HashTree with configurable persistence
134pub struct GitStorage {
135    store: Arc<LocalStore>,
136    tree: HashTree<LocalStore>,
137    runtime: RuntimeExecutor,
138    /// In-memory state for the current session
139    objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
140    refs: std::sync::RwLock<HashMap<String, String>>,
141    /// Cached root CID (hash + encryption key)
142    root_cid: std::sync::RwLock<Option<Cid>>,
143}
144
145impl GitStorage {
146    /// Open or create a git storage at the given path
147    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
148        let runtime = match Handle::try_current() {
149            Ok(handle) => RuntimeExecutor::Handle(handle),
150            Err(_) => {
151                let rt = Runtime::new()
152                    .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
153                RuntimeExecutor::Owned(rt)
154            }
155        };
156
157        let store_path = path.as_ref().join("blobs");
158        let store = Arc::new(
159            LocalStore::new(&store_path)
160                .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
161        );
162
163        // Use encrypted mode (default) - blossom servers require encrypted data
164        let tree = HashTree::new(HashTreeConfig::new(store.clone()));
165
166        Ok(Self {
167            store,
168            tree,
169            runtime,
170            objects: std::sync::RwLock::new(HashMap::new()),
171            refs: std::sync::RwLock::new(HashMap::new()),
172            root_cid: std::sync::RwLock::new(None),
173        })
174    }
175
176    /// Write an object, returning its ID
177    fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
178        let oid = obj.id();
179        let key = oid.to_hex();
180
181        let loose = obj.to_loose_format();
182        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
183        encoder.write_all(&loose)?;
184        let compressed = encoder.finish()?;
185
186        let mut objects = self.objects.write()
187            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
188        objects.insert(key, compressed);
189
190        // Invalidate cached root
191        if let Ok(mut root) = self.root_cid.write() {
192            *root = None;
193        }
194
195        Ok(oid)
196    }
197
198    /// Write raw object data (type + content already parsed)
199    pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
200        let obj = GitObject::new(obj_type, content.to_vec());
201        self.write_object(&obj)
202    }
203
204    /// Read an object by ID from in-memory cache
205    #[allow(dead_code)]
206    fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
207        let key = oid.to_hex();
208        let objects = self.objects.read()
209            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
210        let compressed = objects
211            .get(&key)
212            .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
213
214        let mut decoder = ZlibDecoder::new(compressed.as_slice());
215        let mut data = Vec::new();
216        decoder.read_to_end(&mut data)?;
217
218        GitObject::from_loose_format(&data)
219    }
220
221    /// Write a ref
222    pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
223        validate_ref_name(name)?;
224
225        let value = match target {
226            Ref::Direct(oid) => oid.to_hex(),
227            Ref::Symbolic(target) => format!("ref: {}", target),
228        };
229
230        let mut refs = self.refs.write()
231            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
232        refs.insert(name.to_string(), value);
233
234        // Invalidate cached root
235        if let Ok(mut root) = self.root_cid.write() {
236            *root = None;
237        }
238
239        Ok(())
240    }
241
242    /// Read a ref
243    #[allow(dead_code)]
244    pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
245        let refs = self.refs.read()
246            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
247
248        match refs.get(name) {
249            Some(value) => {
250                if let Some(target) = value.strip_prefix("ref: ") {
251                    Ok(Some(Ref::Symbolic(target.to_string())))
252                } else {
253                    let oid = ObjectId::from_hex(value)
254                        .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
255                    Ok(Some(Ref::Direct(oid)))
256                }
257            }
258            None => Ok(None),
259        }
260    }
261
262    /// List all refs
263    #[allow(dead_code)]
264    pub fn list_refs(&self) -> Result<HashMap<String, String>> {
265        let refs = self.refs.read()
266            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
267        Ok(refs.clone())
268    }
269
270    /// Delete a ref
271    pub fn delete_ref(&self, name: &str) -> Result<bool> {
272        let mut refs = self.refs.write()
273            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
274        let existed = refs.remove(name).is_some();
275
276        // Invalidate cached root
277        if let Ok(mut root) = self.root_cid.write() {
278            *root = None;
279        }
280
281        Ok(existed)
282    }
283
284    /// Import a raw git object (already in loose format, zlib compressed)
285    /// Used when fetching existing objects from remote before push
286    pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
287        let mut objects = self.objects.write()
288            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
289        objects.insert(oid.to_string(), compressed_data);
290
291        // Invalidate cached root
292        if let Ok(mut root) = self.root_cid.write() {
293            *root = None;
294        }
295
296        Ok(())
297    }
298
299    /// Import a ref directly (used when loading existing refs from remote)
300    pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
301        let mut refs = self.refs.write()
302            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
303        refs.insert(name.to_string(), value.to_string());
304
305        // Invalidate cached root
306        if let Ok(mut root) = self.root_cid.write() {
307            *root = None;
308        }
309
310        Ok(())
311    }
312
313    /// Check if a ref exists
314    #[cfg(test)]
315    pub fn has_ref(&self, name: &str) -> Result<bool> {
316        let refs = self.refs.read()
317            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
318        Ok(refs.contains_key(name))
319    }
320
321    /// Get count of objects in storage
322    #[cfg(test)]
323    pub fn object_count(&self) -> Result<usize> {
324        let objects = self.objects.read()
325            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
326        Ok(objects.len())
327    }
328
329    /// Get the cached root CID (returns None if tree hasn't been built)
330    #[allow(dead_code)]
331    pub fn get_root_cid(&self) -> Result<Option<Cid>> {
332        let root = self.root_cid.read()
333            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
334        Ok(root.clone())
335    }
336
337    /// Get the default branch name
338    #[allow(dead_code)]
339    pub fn default_branch(&self) -> Result<Option<String>> {
340        let refs = self.refs.read()
341            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
342
343        if let Some(head) = refs.get("HEAD") {
344            if let Some(target) = head.strip_prefix("ref: ") {
345                return Ok(Some(target.to_string()));
346            }
347        }
348        Ok(None)
349    }
350
351    /// Get the tree SHA from a commit object
352    fn get_commit_tree(&self, commit_oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<String> {
353        let compressed = objects.get(commit_oid)?;
354
355        // Decompress the object
356        let mut decoder = ZlibDecoder::new(&compressed[..]);
357        let mut decompressed = Vec::new();
358        decoder.read_to_end(&mut decompressed).ok()?;
359
360        // Parse git object format: "type size\0content"
361        let null_pos = decompressed.iter().position(|&b| b == 0)?;
362        let content = &decompressed[null_pos + 1..];
363
364        // Parse commit content - first line is "tree <sha>"
365        let content_str = std::str::from_utf8(content).ok()?;
366        let first_line = content_str.lines().next()?;
367        if first_line.starts_with("tree ") {
368            Some(first_line[5..].to_string())
369        } else {
370            None
371        }
372    }
373
374    /// Get git object content (decompressed, without header)
375    fn get_object_content(&self, oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<(ObjectType, Vec<u8>)> {
376        let compressed = objects.get(oid)?;
377
378        // Decompress the object
379        let mut decoder = ZlibDecoder::new(&compressed[..]);
380        let mut decompressed = Vec::new();
381        decoder.read_to_end(&mut decompressed).ok()?;
382
383        // Parse git object format: "type size\0content"
384        let null_pos = decompressed.iter().position(|&b| b == 0)?;
385        let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
386        let obj_type = if header.starts_with("blob") {
387            ObjectType::Blob
388        } else if header.starts_with("tree") {
389            ObjectType::Tree
390        } else if header.starts_with("commit") {
391            ObjectType::Commit
392        } else {
393            return None;
394        };
395        let content = decompressed[null_pos + 1..].to_vec();
396        Some((obj_type, content))
397    }
398
399    /// Build the hashtree and return the root CID (hash + encryption key)
400    pub fn build_tree(&self) -> Result<Cid> {
401        // Check if we have a cached root
402        if let Ok(root) = self.root_cid.read() {
403            if let Some(ref cid) = *root {
404                return Ok(cid.clone());
405            }
406        }
407
408        let objects = self.objects.read()
409            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
410        let refs = self.refs.read()
411            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
412
413        // Get default branch from HEAD or find first branch ref
414        let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
415            let branch = head.strip_prefix("ref: ").map(String::from);
416            let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
417            (branch, sha)
418        } else {
419            // No HEAD ref - find first refs/heads/* ref directly
420            let mut branch_info: Option<(String, String)> = None;
421            for (ref_name, sha) in refs.iter() {
422                if ref_name.starts_with("refs/heads/") {
423                    branch_info = Some((ref_name.clone(), sha.clone()));
424                    break;
425                }
426            }
427            match branch_info {
428                Some((branch, sha)) => (Some(branch), Some(sha)),
429                None => (None, None),
430            }
431        };
432
433        // Get tree SHA from commit
434        let tree_sha = commit_sha.as_ref()
435            .and_then(|sha| self.get_commit_tree(sha, &objects));
436
437        // Clone objects for async block
438        let objects_clone = objects.clone();
439
440        let root_cid = self.runtime.block_on(async {
441            // Build objects directory
442            let objects_cid = self.build_objects_dir(&objects).await?;
443
444            // Build refs directory
445            let refs_cid = self.build_refs_dir(&refs).await?;
446
447            // Build HEAD file - use default_branch if no explicit HEAD
448            // Git expects HEAD to end with newline, so add it if missing
449            let head_content = refs.get("HEAD")
450                .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
451                .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
452                .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
453            debug!("HEAD content: {:?}", head_content);
454            let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
455                .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
456            debug!("HEAD hash: {}", hex::encode(head_cid.hash));
457
458            // Build .git directory - use from_cid to preserve encryption keys
459            let mut git_entries = vec![
460                DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
461                DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
462                DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
463            ];
464
465            // Add config if we have a default branch
466            if let Some(ref branch) = default_branch {
467                let config = format!(
468                    "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
469                    branch.trim_start_matches("refs/heads/")
470                );
471                let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
472                    .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
473                git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
474            }
475
476            // Build and add index file if we have a tree SHA
477            if let Some(ref tree_oid) = tree_sha {
478                match self.build_index_file(tree_oid, &objects_clone) {
479                    Ok(index_data) => {
480                        let (index_cid, index_size) = self.tree.put(&index_data).await
481                            .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
482                        git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
483                        info!("Added git index file ({} bytes)", index_data.len());
484                    }
485                    Err(e) => {
486                        debug!("Failed to build git index file: {} - continuing without index", e);
487                    }
488                }
489            }
490
491            let git_cid = self.tree.put_directory(git_entries).await
492                .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
493
494            // Build root entries starting with .git
495            // Use from_cid to preserve the encryption key
496            let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
497
498            // Add working tree files if we have a tree SHA
499            if let Some(ref tree_oid) = tree_sha {
500                let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
501                root_entries.extend(working_tree_entries);
502                info!("Added {} working tree entries to root", root_entries.len() - 1);
503            }
504
505            // Sort entries for deterministic ordering
506            root_entries.sort_by(|a, b| a.name.cmp(&b.name));
507
508            let root_cid = self.tree.put_directory(root_entries).await
509                .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
510
511            info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
512                hex::encode(root_cid.hash),
513                root_cid.key.is_some(),
514                hex::encode(git_cid.hash));
515
516            Ok::<Cid, Error>(root_cid)
517        })?;
518
519        // Cache the root CID
520        if let Ok(mut root) = self.root_cid.write() {
521            *root = Some(root_cid.clone());
522        }
523
524        Ok(root_cid)
525    }
526
527    /// Build working tree entries from a git tree object
528    async fn build_working_tree_entries(
529        &self,
530        tree_oid: &str,
531        objects: &HashMap<String, Vec<u8>>,
532    ) -> Result<Vec<DirEntry>> {
533        let mut entries = Vec::new();
534
535        // Get tree content
536        let (obj_type, content) = self.get_object_content(tree_oid, objects)
537            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
538
539        if obj_type != ObjectType::Tree {
540            return Err(Error::InvalidObjectType(format!("expected tree, got {:?}", obj_type)));
541        }
542
543        // Parse tree entries
544        let tree_entries = parse_tree(&content)?;
545
546        for entry in tree_entries {
547            let oid_hex = entry.oid.to_hex();
548
549            if entry.is_tree() {
550                // Recursively build subdirectory
551                let sub_entries = self.build_working_tree_entries_boxed(&oid_hex, objects).await?;
552
553                // Create subdirectory in hashtree
554                let dir_cid = self.tree.put_directory(sub_entries).await
555                    .map_err(|e| Error::StorageError(format!("put dir {}: {}", entry.name, e)))?;
556
557                // Use from_cid to preserve encryption key
558                entries.push(
559                    DirEntry::from_cid(&entry.name, &dir_cid)
560                        .with_link_type(LinkType::Dir)
561                );
562            } else {
563                // Get blob content
564                if let Some((ObjectType::Blob, blob_content)) = self.get_object_content(&oid_hex, objects) {
565                    // Use put() instead of put_blob() to chunk large files
566                    let (cid, size) = self.tree.put(&blob_content).await
567                        .map_err(|e| Error::StorageError(format!("put blob {}: {}", entry.name, e)))?;
568
569                    // Use from_cid to preserve encryption key
570                    entries.push(
571                        DirEntry::from_cid(&entry.name, &cid)
572                            .with_size(size)
573                    );
574                }
575            }
576        }
577
578        // Sort for deterministic ordering
579        entries.sort_by(|a, b| a.name.cmp(&b.name));
580
581        Ok(entries)
582    }
583
584    /// Boxed version for async recursion
585    fn build_working_tree_entries_boxed<'a>(
586        &'a self,
587        tree_oid: &'a str,
588        objects: &'a HashMap<String, Vec<u8>>,
589    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
590        Box::pin(self.build_working_tree_entries(tree_oid, objects))
591    }
592
593    /// Build the objects directory using HashTree
594    async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
595        if objects.is_empty() {
596            // Return empty directory Cid
597            let empty_cid = self.tree.put_directory(vec![]).await
598                .map_err(|e| Error::StorageError(format!("put empty objects: {}", e)))?;
599            return Ok(empty_cid);
600        }
601
602        // Group objects by first 2 characters of SHA (git loose object structure)
603        // Git expects objects/XX/YYYYYY... where XX is first 2 hex chars
604        let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
605        for (oid, data) in objects {
606            let prefix = &oid[..2];
607            let suffix = &oid[2..];
608            buckets.entry(prefix.to_string())
609                .or_default()
610                .push((suffix.to_string(), data.clone()));
611        }
612
613        // Build subdirectories for each prefix
614        let mut top_entries = Vec::new();
615        for (prefix, objs) in buckets {
616            let mut sub_entries = Vec::new();
617            for (suffix, data) in objs {
618                // Use put() instead of put_blob() to chunk large objects
619                // Git blobs can be >5MB which exceeds blossom server limits
620                let (cid, size) = self.tree.put(&data).await
621                    .map_err(|e| Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e)))?;
622                // Use from_cid to preserve encryption key
623                sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
624            }
625            // Sort for deterministic ordering
626            sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
627
628            let sub_cid = self.tree.put_directory(sub_entries).await
629                .map_err(|e| Error::StorageError(format!("put objects/{}: {}", prefix, e)))?;
630            top_entries.push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
631        }
632
633        // Sort for deterministic ordering
634        top_entries.sort_by(|a, b| a.name.cmp(&b.name));
635
636        let bucket_count = top_entries.len();
637        let cid = self.tree.put_directory(top_entries).await
638            .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
639
640        debug!("Built objects dir with {} buckets: {}", bucket_count, hex::encode(cid.hash));
641        Ok(cid)
642    }
643
644    /// Build the refs directory using HashTree
645    async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
646        // Group refs by category (heads, tags, etc.)
647        let mut groups: HashMap<String, Vec<(String, String)>> = HashMap::new();
648
649        for (ref_name, value) in refs {
650            let parts: Vec<&str> = ref_name.split('/').collect();
651            if parts.len() >= 3 && parts[0] == "refs" {
652                let category = parts[1].to_string();
653                let name = parts[2..].join("/");
654                groups.entry(category).or_default().push((name, value.clone()));
655            }
656        }
657
658        let mut ref_entries = Vec::new();
659
660        for (category, refs_in_category) in groups {
661            let mut cat_entries = Vec::new();
662            for (name, value) in refs_in_category {
663                // Use put() to get Cid with encryption key
664                let (cid, _size) = self.tree.put(value.as_bytes()).await
665                    .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
666                debug!("refs/{}/{} -> blob {}", category, name, hex::encode(cid.hash));
667                cat_entries.push(DirEntry::from_cid(name, &cid));
668            }
669
670            cat_entries.sort_by(|a, b| a.name.cmp(&b.name));
671
672            let cat_cid = self.tree.put_directory(cat_entries).await
673                .map_err(|e| Error::StorageError(format!("put {} dir: {}", category, e)))?;
674            debug!("refs/{} dir -> {}", category, hex::encode(cat_cid.hash));
675            ref_entries.push(DirEntry::from_cid(category, &cat_cid).with_link_type(LinkType::Dir));
676        }
677
678        if ref_entries.is_empty() {
679            // Return empty directory Cid
680            let empty_cid = self.tree.put_directory(vec![]).await
681                .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
682            return Ok(empty_cid);
683        }
684
685        ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
686
687        let refs_cid = self.tree.put_directory(ref_entries).await
688            .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
689        debug!("refs dir -> {}", hex::encode(refs_cid.hash));
690        Ok(refs_cid)
691    }
692
693    /// Build git index file from tree entries
694    /// Returns the raw binary content of the index file
695    fn build_index_file(
696        &self,
697        tree_oid: &str,
698        objects: &HashMap<String, Vec<u8>>,
699    ) -> Result<Vec<u8>> {
700        // Collect all file entries from the tree (recursively)
701        let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); // (path, sha1, mode, size)
702        self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
703
704        // Sort entries by path (git index requirement)
705        entries.sort_by(|a, b| a.0.cmp(&b.0));
706
707        let entry_count = entries.len() as u32;
708        debug!("Building git index with {} entries", entry_count);
709
710        // Build index content
711        let mut index_data = Vec::new();
712
713        // Header: DIRC + version 2 + entry count
714        index_data.extend_from_slice(b"DIRC");
715        index_data.extend_from_slice(&2u32.to_be_bytes()); // version 2
716        index_data.extend_from_slice(&entry_count.to_be_bytes());
717
718        // Current time for ctime/mtime (doesn't matter much for our use case)
719        let now_sec = std::time::SystemTime::now()
720            .duration_since(std::time::UNIX_EPOCH)
721            .unwrap_or_default()
722            .as_secs() as u32;
723
724        for (path, sha1, mode, size) in &entries {
725            let entry_start = index_data.len();
726
727            // ctime sec, nsec
728            index_data.extend_from_slice(&now_sec.to_be_bytes());
729            index_data.extend_from_slice(&0u32.to_be_bytes());
730            // mtime sec, nsec
731            index_data.extend_from_slice(&now_sec.to_be_bytes());
732            index_data.extend_from_slice(&0u32.to_be_bytes());
733            // dev, ino (use 0)
734            index_data.extend_from_slice(&0u32.to_be_bytes());
735            index_data.extend_from_slice(&0u32.to_be_bytes());
736            // mode
737            index_data.extend_from_slice(&mode.to_be_bytes());
738            // uid, gid (use 0)
739            index_data.extend_from_slice(&0u32.to_be_bytes());
740            index_data.extend_from_slice(&0u32.to_be_bytes());
741            // file size
742            index_data.extend_from_slice(&size.to_be_bytes());
743            // SHA-1
744            index_data.extend_from_slice(sha1);
745            // flags: path length (max 0xFFF) in low 12 bits
746            let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
747            index_data.extend_from_slice(&path_len.to_be_bytes());
748            // path (NUL-terminated)
749            index_data.extend_from_slice(path.as_bytes());
750            index_data.push(0); // NUL terminator
751
752            // Pad to 8-byte boundary relative to entry start
753            let entry_len = index_data.len() - entry_start;
754            let padding = (8 - (entry_len % 8)) % 8;
755            for _ in 0..padding {
756                index_data.push(0);
757            }
758        }
759
760        // Calculate SHA-1 checksum of everything and append
761        let mut hasher = Sha1::new();
762        hasher.update(&index_data);
763        let checksum = hasher.finalize();
764        index_data.extend_from_slice(&checksum);
765
766        debug!("Built git index: {} bytes, {} entries", index_data.len(), entry_count);
767        Ok(index_data)
768    }
769
770    /// Collect file entries from a git tree for building the index
771    fn collect_tree_entries_for_index(
772        &self,
773        tree_oid: &str,
774        objects: &HashMap<String, Vec<u8>>,
775        prefix: &str,
776        entries: &mut Vec<(String, [u8; 20], u32, u32)>,
777    ) -> Result<()> {
778        let (obj_type, content) = self.get_object_content(tree_oid, objects)
779            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
780
781        if obj_type != ObjectType::Tree {
782            return Err(Error::InvalidObjectType(format!("expected tree, got {:?}", obj_type)));
783        }
784
785        let tree_entries = parse_tree(&content)?;
786
787        for entry in tree_entries {
788            let path = if prefix.is_empty() {
789                entry.name.clone()
790            } else {
791                format!("{}/{}", prefix, entry.name)
792            };
793
794            let oid_hex = entry.oid.to_hex();
795
796            if entry.is_tree() {
797                // Recursively process subdirectory
798                self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
799            } else {
800                // Get blob content for size and SHA-1
801                if let Some((ObjectType::Blob, blob_content)) = self.get_object_content(&oid_hex, objects) {
802                    // Convert hex SHA to bytes
803                    let mut sha1_bytes = [0u8; 20];
804                    if let Ok(bytes) = hex::decode(&oid_hex) {
805                        if bytes.len() == 20 {
806                            sha1_bytes.copy_from_slice(&bytes);
807                        }
808                    }
809
810                    // Mode: use entry.mode or default to regular file
811                    let mode = entry.mode;
812                    let size = blob_content.len() as u32;
813
814                    entries.push((path, sha1_bytes, mode, size));
815                }
816            }
817        }
818
819        Ok(())
820    }
821
822    /// Get the underlying store
823    pub fn store(&self) -> &Arc<LocalStore> {
824        &self.store
825    }
826
827    /// Get the HashTree for direct access
828    #[allow(dead_code)]
829    pub fn hashtree(&self) -> &HashTree<LocalStore> {
830        &self.tree
831    }
832
833    /// Push all blobs to file servers
834    #[allow(dead_code)]
835    pub fn push_to_file_servers(
836        &self,
837        blossom: &hashtree_blossom::BlossomClient,
838    ) -> Result<(usize, usize)> {
839        let hashes = self.store.list()
840            .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
841
842        info!("Pushing {} blobs to file servers", hashes.len());
843
844        let mut uploaded = 0;
845        let mut existed = 0;
846
847        self.runtime.block_on(async {
848            for hash in &hashes {
849                let hex_hash = hex::encode(hash);
850                let data = match self.store.get_sync(hash) {
851                    Ok(Some(d)) => d,
852                    _ => continue,
853                };
854
855                match blossom.upload_if_missing(&data).await {
856                    Ok((_, true)) => {
857                        debug!("Uploaded {}", &hex_hash[..12]);
858                        uploaded += 1;
859                    }
860                    Ok((_, false)) => {
861                        existed += 1;
862                    }
863                    Err(e) => {
864                        debug!("Failed to upload {}: {}", &hex_hash[..12], e);
865                    }
866                }
867            }
868        });
869
870        info!("Upload complete: {} new, {} already existed", uploaded, existed);
871        Ok((uploaded, existed))
872    }
873
874    /// Clear all state (for testing or re-initialization)
875    #[allow(dead_code)]
876    pub fn clear(&self) -> Result<()> {
877        let mut objects = self.objects.write()
878            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
879        let mut refs = self.refs.write()
880            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
881        let mut root = self.root_cid.write()
882            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
883
884        objects.clear();
885        refs.clear();
886        *root = None;
887        Ok(())
888    }
889}
890
891#[cfg(test)]
892mod tests {
893    use super::*;
894    use tempfile::TempDir;
895
896    fn create_test_storage() -> (GitStorage, TempDir) {
897        let temp_dir = TempDir::new().unwrap();
898        let storage = GitStorage::open(temp_dir.path()).unwrap();
899        (storage, temp_dir)
900    }
901
902    #[test]
903    fn test_import_ref() {
904        let (storage, _temp) = create_test_storage();
905
906        // Import a ref
907        storage.import_ref("refs/heads/main", "abc123def456").unwrap();
908
909        // Check it exists
910        assert!(storage.has_ref("refs/heads/main").unwrap());
911
912        // Check value via list_refs
913        let refs = storage.list_refs().unwrap();
914        assert_eq!(refs.get("refs/heads/main"), Some(&"abc123def456".to_string()));
915    }
916
917    #[test]
918    fn test_import_multiple_refs_preserves_all() {
919        let (storage, _temp) = create_test_storage();
920
921        // Import multiple refs (simulating loading from remote)
922        storage.import_ref("refs/heads/main", "sha_main").unwrap();
923        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
924        storage.import_ref("refs/heads/feature", "sha_feature").unwrap();
925
926        // All should exist
927        assert!(storage.has_ref("refs/heads/main").unwrap());
928        assert!(storage.has_ref("refs/heads/dev").unwrap());
929        assert!(storage.has_ref("refs/heads/feature").unwrap());
930
931        // Now write a new ref (simulating push)
932        storage.write_ref("refs/heads/new-branch", &Ref::Direct(
933            ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap()
934        )).unwrap();
935
936        // Original refs should still exist
937        let refs = storage.list_refs().unwrap();
938        assert_eq!(refs.len(), 4);
939        assert!(refs.contains_key("refs/heads/main"));
940        assert!(refs.contains_key("refs/heads/dev"));
941        assert!(refs.contains_key("refs/heads/feature"));
942        assert!(refs.contains_key("refs/heads/new-branch"));
943    }
944
945    #[test]
946    fn test_import_compressed_object() {
947        let (storage, _temp) = create_test_storage();
948
949        // Create a fake compressed object
950        let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; // fake zlib data
951
952        storage.import_compressed_object("abc123def456", fake_compressed.clone()).unwrap();
953
954        // Check object count
955        assert_eq!(storage.object_count().unwrap(), 1);
956    }
957
958    #[test]
959    fn test_write_ref_overwrites_imported() {
960        let (storage, _temp) = create_test_storage();
961
962        // Import a ref
963        storage.import_ref("refs/heads/main", "old_sha").unwrap();
964
965        // Write same ref with new value
966        storage.write_ref("refs/heads/main", &Ref::Direct(
967            ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap()
968        )).unwrap();
969
970        // Should have new value
971        let refs = storage.list_refs().unwrap();
972        assert_eq!(refs.get("refs/heads/main"),
973            Some(&"0123456789abcdef0123456789abcdef01234567".to_string()));
974    }
975
976    #[test]
977    fn test_delete_ref_preserves_others() {
978        let (storage, _temp) = create_test_storage();
979
980        // Import multiple refs
981        storage.import_ref("refs/heads/main", "sha_main").unwrap();
982        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
983
984        // Delete one
985        storage.delete_ref("refs/heads/dev").unwrap();
986
987        // Other should still exist
988        assert!(storage.has_ref("refs/heads/main").unwrap());
989        assert!(!storage.has_ref("refs/heads/dev").unwrap());
990    }
991
992    #[test]
993    fn test_clear_removes_all() {
994        let (storage, _temp) = create_test_storage();
995
996        // Import refs and objects
997        storage.import_ref("refs/heads/main", "sha_main").unwrap();
998        storage.import_compressed_object("obj1", vec![1, 2, 3]).unwrap();
999
1000        // Clear
1001        storage.clear().unwrap();
1002
1003        // All gone
1004        assert!(!storage.has_ref("refs/heads/main").unwrap());
1005        assert_eq!(storage.object_count().unwrap(), 0);
1006    }
1007}