Skip to main content

git_remote_htree/git/
storage.rs

1//! Hashtree-backed git object and ref storage with configurable persistence
2//!
3//! Stores git objects and refs in a hashtree merkle tree:
4//!   root/
5//!     .git/
6//!       HEAD -> "ref: refs/heads/main"
7//!       refs/heads/main -> <commit-sha1>
8//!       objects/XX/YYYY... -> zlib-compressed loose object (standard git layout)
9//!
10//! The root hash (SHA-256) is the content-addressed identifier for the entire repo state.
11
12use flate2::read::ZlibDecoder;
13use flate2::write::ZlibEncoder;
14use flate2::Compression;
15use hashtree_config::{Config, StorageBackend};
16use hashtree_core::store::{Store, StoreError};
17use hashtree_core::types::Hash;
18use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
19use hashtree_fs::FsBlobStore;
20#[cfg(feature = "lmdb")]
21use hashtree_lmdb::LmdbBlobStore;
22use sha1::{Digest, Sha1};
23use std::collections::HashMap;
24use std::io::{Read, Write};
25use std::path::Path;
26use std::sync::Arc;
27use tokio::runtime::{Handle, Runtime};
28use tracing::{debug, info, warn};
29
30use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
31use super::refs::{validate_ref_name, Ref};
32use super::{Error, Result};
33
34/// Box type for async recursion
35type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
36
37/// Runtime executor - either owns a runtime or reuses an existing one
38enum RuntimeExecutor {
39    Owned(Runtime),
40    Handle(Handle),
41}
42
43impl RuntimeExecutor {
44    fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
45        match self {
46            RuntimeExecutor::Owned(rt) => rt.block_on(f),
47            RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
48        }
49    }
50}
51
52/// Local blob store - wraps either FsBlobStore or LmdbBlobStore
53pub enum LocalStore {
54    Fs(FsBlobStore),
55    #[cfg(feature = "lmdb")]
56    Lmdb(LmdbBlobStore),
57}
58
59impl LocalStore {
60    /// Create a new local store based on config
61    pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
62        let config = Config::load_or_default();
63        match config.storage.backend {
64            StorageBackend::Fs => Ok(LocalStore::Fs(FsBlobStore::new(path)?)),
65            #[cfg(feature = "lmdb")]
66            StorageBackend::Lmdb => Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?)),
67            #[cfg(not(feature = "lmdb"))]
68            StorageBackend::Lmdb => {
69                warn!(
70                    "LMDB backend requested but lmdb feature not enabled, using filesystem storage"
71                );
72                Ok(LocalStore::Fs(FsBlobStore::new(path)?))
73            }
74        }
75    }
76
77    /// List all hashes in the store
78    pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
79        match self {
80            LocalStore::Fs(store) => store.list(),
81            #[cfg(feature = "lmdb")]
82            LocalStore::Lmdb(store) => store.list(),
83        }
84    }
85
86    /// Sync get operation
87    pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
88        match self {
89            LocalStore::Fs(store) => store.get_sync(hash),
90            #[cfg(feature = "lmdb")]
91            LocalStore::Lmdb(store) => store.get_sync(hash),
92        }
93    }
94}
95
96#[async_trait::async_trait]
97impl Store for LocalStore {
98    async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
99        match self {
100            LocalStore::Fs(store) => store.put(hash, data).await,
101            #[cfg(feature = "lmdb")]
102            LocalStore::Lmdb(store) => store.put(hash, data).await,
103        }
104    }
105
106    async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
107        match self {
108            LocalStore::Fs(store) => store.get(hash).await,
109            #[cfg(feature = "lmdb")]
110            LocalStore::Lmdb(store) => store.get(hash).await,
111        }
112    }
113
114    async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
115        match self {
116            LocalStore::Fs(store) => store.has(hash).await,
117            #[cfg(feature = "lmdb")]
118            LocalStore::Lmdb(store) => store.has(hash).await,
119        }
120    }
121
122    async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
123        match self {
124            LocalStore::Fs(store) => store.delete(hash).await,
125            #[cfg(feature = "lmdb")]
126            LocalStore::Lmdb(store) => store.delete(hash).await,
127        }
128    }
129}
130
131/// Git storage backed by HashTree with configurable persistence
132pub struct GitStorage {
133    store: Arc<LocalStore>,
134    tree: HashTree<LocalStore>,
135    runtime: RuntimeExecutor,
136    /// In-memory state for the current session
137    objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
138    refs: std::sync::RwLock<HashMap<String, String>>,
139    /// Cached root CID (hash + encryption key)
140    root_cid: std::sync::RwLock<Option<Cid>>,
141}
142
143impl GitStorage {
144    /// Open or create a git storage at the given path
145    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
146        let runtime = match Handle::try_current() {
147            Ok(handle) => RuntimeExecutor::Handle(handle),
148            Err(_) => {
149                let rt = Runtime::new()
150                    .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
151                RuntimeExecutor::Owned(rt)
152            }
153        };
154
155        let store_path = path.as_ref().join("blobs");
156        let store = Arc::new(
157            LocalStore::new(&store_path)
158                .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
159        );
160
161        // Use encrypted mode (default) - blossom servers require encrypted data
162        let tree = HashTree::new(HashTreeConfig::new(store.clone()));
163
164        Ok(Self {
165            store,
166            tree,
167            runtime,
168            objects: std::sync::RwLock::new(HashMap::new()),
169            refs: std::sync::RwLock::new(HashMap::new()),
170            root_cid: std::sync::RwLock::new(None),
171        })
172    }
173
174    /// Write an object, returning its ID
175    fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
176        let oid = obj.id();
177        let key = oid.to_hex();
178
179        let loose = obj.to_loose_format();
180        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
181        encoder.write_all(&loose)?;
182        let compressed = encoder.finish()?;
183
184        let mut objects = self
185            .objects
186            .write()
187            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
188        objects.insert(key, compressed);
189
190        // Invalidate cached root
191        if let Ok(mut root) = self.root_cid.write() {
192            *root = None;
193        }
194
195        Ok(oid)
196    }
197
198    /// Write raw object data (type + content already parsed)
199    pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
200        let obj = GitObject::new(obj_type, content.to_vec());
201        self.write_object(&obj)
202    }
203
204    /// Read an object by ID from in-memory cache
205    #[allow(dead_code)]
206    fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
207        let key = oid.to_hex();
208        let objects = self
209            .objects
210            .read()
211            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
212        let compressed = objects
213            .get(&key)
214            .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
215
216        let mut decoder = ZlibDecoder::new(compressed.as_slice());
217        let mut data = Vec::new();
218        decoder.read_to_end(&mut data)?;
219
220        GitObject::from_loose_format(&data)
221    }
222
223    /// Write a ref
224    pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
225        validate_ref_name(name)?;
226
227        let value = match target {
228            Ref::Direct(oid) => oid.to_hex(),
229            Ref::Symbolic(target) => format!("ref: {}", target),
230        };
231
232        let mut refs = self
233            .refs
234            .write()
235            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
236        refs.insert(name.to_string(), value);
237
238        // Invalidate cached root
239        if let Ok(mut root) = self.root_cid.write() {
240            *root = None;
241        }
242
243        Ok(())
244    }
245
246    /// Read a ref
247    #[allow(dead_code)]
248    pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
249        let refs = self
250            .refs
251            .read()
252            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
253
254        match refs.get(name) {
255            Some(value) => {
256                if let Some(target) = value.strip_prefix("ref: ") {
257                    Ok(Some(Ref::Symbolic(target.to_string())))
258                } else {
259                    let oid = ObjectId::from_hex(value)
260                        .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
261                    Ok(Some(Ref::Direct(oid)))
262                }
263            }
264            None => Ok(None),
265        }
266    }
267
268    /// List all refs
269    #[allow(dead_code)]
270    pub fn list_refs(&self) -> Result<HashMap<String, String>> {
271        let refs = self
272            .refs
273            .read()
274            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
275        Ok(refs.clone())
276    }
277
278    /// Delete a ref
279    pub fn delete_ref(&self, name: &str) -> Result<bool> {
280        let mut refs = self
281            .refs
282            .write()
283            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
284        let existed = refs.remove(name).is_some();
285
286        // Invalidate cached root
287        if let Ok(mut root) = self.root_cid.write() {
288            *root = None;
289        }
290
291        Ok(existed)
292    }
293
294    /// Import a raw git object (already in loose format, zlib compressed)
295    /// Used when fetching existing objects from remote before push
296    pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
297        let mut objects = self
298            .objects
299            .write()
300            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
301        objects.insert(oid.to_string(), compressed_data);
302
303        // Invalidate cached root
304        if let Ok(mut root) = self.root_cid.write() {
305            *root = None;
306        }
307
308        Ok(())
309    }
310
311    /// Import a ref directly (used when loading existing refs from remote)
312    pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
313        let mut refs = self
314            .refs
315            .write()
316            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
317        refs.insert(name.to_string(), value.to_string());
318
319        // Invalidate cached root
320        if let Ok(mut root) = self.root_cid.write() {
321            *root = None;
322        }
323
324        Ok(())
325    }
326
327    /// Check if a ref exists
328    #[cfg(test)]
329    pub fn has_ref(&self, name: &str) -> Result<bool> {
330        let refs = self
331            .refs
332            .read()
333            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
334        Ok(refs.contains_key(name))
335    }
336
337    /// Get count of objects in storage
338    #[cfg(test)]
339    pub fn object_count(&self) -> Result<usize> {
340        let objects = self
341            .objects
342            .read()
343            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
344        Ok(objects.len())
345    }
346
347    /// Get the cached root CID (returns None if tree hasn't been built)
348    #[allow(dead_code)]
349    pub fn get_root_cid(&self) -> Result<Option<Cid>> {
350        let root = self
351            .root_cid
352            .read()
353            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
354        Ok(root.clone())
355    }
356
357    /// Get the default branch name
358    #[allow(dead_code)]
359    pub fn default_branch(&self) -> Result<Option<String>> {
360        let refs = self
361            .refs
362            .read()
363            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
364
365        if let Some(head) = refs.get("HEAD") {
366            if let Some(target) = head.strip_prefix("ref: ") {
367                return Ok(Some(target.to_string()));
368            }
369        }
370        Ok(None)
371    }
372
373    /// Get the tree SHA from a commit object
374    fn get_commit_tree(
375        &self,
376        commit_oid: &str,
377        objects: &HashMap<String, Vec<u8>>,
378    ) -> Option<String> {
379        let compressed = objects.get(commit_oid)?;
380
381        // Decompress the object
382        let mut decoder = ZlibDecoder::new(&compressed[..]);
383        let mut decompressed = Vec::new();
384        decoder.read_to_end(&mut decompressed).ok()?;
385
386        // Parse git object format: "type size\0content"
387        let null_pos = decompressed.iter().position(|&b| b == 0)?;
388        let content = &decompressed[null_pos + 1..];
389
390        // Parse commit content - first line is "tree <sha>"
391        let content_str = std::str::from_utf8(content).ok()?;
392        let first_line = content_str.lines().next()?;
393        if first_line.starts_with("tree ") {
394            Some(first_line[5..].to_string())
395        } else {
396            None
397        }
398    }
399
400    /// Get git object content (decompressed, without header)
401    fn get_object_content(
402        &self,
403        oid: &str,
404        objects: &HashMap<String, Vec<u8>>,
405    ) -> Option<(ObjectType, Vec<u8>)> {
406        let compressed = objects.get(oid)?;
407
408        // Decompress the object
409        let mut decoder = ZlibDecoder::new(&compressed[..]);
410        let mut decompressed = Vec::new();
411        decoder.read_to_end(&mut decompressed).ok()?;
412
413        // Parse git object format: "type size\0content"
414        let null_pos = decompressed.iter().position(|&b| b == 0)?;
415        let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
416        let obj_type = if header.starts_with("blob") {
417            ObjectType::Blob
418        } else if header.starts_with("tree") {
419            ObjectType::Tree
420        } else if header.starts_with("commit") {
421            ObjectType::Commit
422        } else {
423            return None;
424        };
425        let content = decompressed[null_pos + 1..].to_vec();
426        Some((obj_type, content))
427    }
428
429    /// Build the hashtree and return the root CID (hash + encryption key)
430    pub fn build_tree(&self) -> Result<Cid> {
431        // Check if we have a cached root
432        if let Ok(root) = self.root_cid.read() {
433            if let Some(ref cid) = *root {
434                return Ok(cid.clone());
435            }
436        }
437
438        let objects = self
439            .objects
440            .read()
441            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
442        let refs = self
443            .refs
444            .read()
445            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
446
447        // Get default branch from HEAD or find first branch ref
448        let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
449            let branch = head.strip_prefix("ref: ").map(String::from);
450            let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
451            (branch, sha)
452        } else {
453            // No HEAD ref - find first refs/heads/* ref directly
454            let mut branch_info: Option<(String, String)> = None;
455            for (ref_name, sha) in refs.iter() {
456                if ref_name.starts_with("refs/heads/") {
457                    branch_info = Some((ref_name.clone(), sha.clone()));
458                    break;
459                }
460            }
461            match branch_info {
462                Some((branch, sha)) => (Some(branch), Some(sha)),
463                None => (None, None),
464            }
465        };
466
467        // Get tree SHA from commit
468        let tree_sha = commit_sha
469            .as_ref()
470            .and_then(|sha| self.get_commit_tree(sha, &objects));
471
472        // Clone objects for async block
473        let objects_clone = objects.clone();
474
475        let root_cid = self.runtime.block_on(async {
476            // Build objects directory
477            let objects_cid = self.build_objects_dir(&objects).await?;
478
479            // Build refs directory
480            let refs_cid = self.build_refs_dir(&refs).await?;
481
482            // Build HEAD file - use default_branch if no explicit HEAD
483            // Git expects HEAD to end with newline, so add it if missing
484            let head_content = refs.get("HEAD")
485                .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
486                .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
487                .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
488            debug!("HEAD content: {:?}", head_content);
489            let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
490                .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
491            debug!("HEAD hash: {}", hex::encode(head_cid.hash));
492
493            // Build .git directory - use from_cid to preserve encryption keys
494            let mut git_entries = vec![
495                DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
496                DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
497                DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
498            ];
499
500            // Add config if we have a default branch
501            if let Some(ref branch) = default_branch {
502                let config = format!(
503                    "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
504                    branch.trim_start_matches("refs/heads/")
505                );
506                let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
507                    .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
508                git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
509            }
510
511            // Build and add index file if we have a tree SHA
512            if let Some(ref tree_oid) = tree_sha {
513                match self.build_index_file(tree_oid, &objects_clone) {
514                    Ok(index_data) => {
515                        let (index_cid, index_size) = self.tree.put(&index_data).await
516                            .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
517                        git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
518                        info!("Added git index file ({} bytes)", index_data.len());
519                    }
520                    Err(e) => {
521                        debug!("Failed to build git index file: {} - continuing without index", e);
522                    }
523                }
524            }
525
526            let git_cid = self.tree.put_directory(git_entries).await
527                .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
528
529            // Build root entries starting with .git
530            // Use from_cid to preserve the encryption key
531            let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
532
533            // Add working tree files if we have a tree SHA
534            if let Some(ref tree_oid) = tree_sha {
535                let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
536                root_entries.extend(working_tree_entries);
537                info!("Added {} working tree entries to root", root_entries.len() - 1);
538            }
539
540            // Sort entries for deterministic ordering
541            root_entries.sort_by(|a, b| a.name.cmp(&b.name));
542
543            let root_cid = self.tree.put_directory(root_entries).await
544                .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
545
546            info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
547                hex::encode(root_cid.hash),
548                root_cid.key.is_some(),
549                hex::encode(git_cid.hash));
550
551            Ok::<Cid, Error>(root_cid)
552        })?;
553
554        // Cache the root CID
555        if let Ok(mut root) = self.root_cid.write() {
556            *root = Some(root_cid.clone());
557        }
558
559        Ok(root_cid)
560    }
561
562    /// Build working tree entries from a git tree object
563    async fn build_working_tree_entries(
564        &self,
565        tree_oid: &str,
566        objects: &HashMap<String, Vec<u8>>,
567    ) -> Result<Vec<DirEntry>> {
568        let mut entries = Vec::new();
569
570        // Get tree content
571        let (obj_type, content) = self
572            .get_object_content(tree_oid, objects)
573            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
574
575        if obj_type != ObjectType::Tree {
576            return Err(Error::InvalidObjectType(format!(
577                "expected tree, got {:?}",
578                obj_type
579            )));
580        }
581
582        // Parse tree entries
583        let tree_entries = parse_tree(&content)?;
584
585        for entry in tree_entries {
586            let oid_hex = entry.oid.to_hex();
587
588            if entry.is_tree() {
589                // Recursively build subdirectory
590                let sub_entries = self
591                    .build_working_tree_entries_boxed(&oid_hex, objects)
592                    .await?;
593
594                // Create subdirectory in hashtree
595                let dir_cid =
596                    self.tree.put_directory(sub_entries).await.map_err(|e| {
597                        Error::StorageError(format!("put dir {}: {}", entry.name, e))
598                    })?;
599
600                // Use from_cid to preserve encryption key
601                entries
602                    .push(DirEntry::from_cid(&entry.name, &dir_cid).with_link_type(LinkType::Dir));
603            } else {
604                // Get blob content
605                if let Some((ObjectType::Blob, blob_content)) =
606                    self.get_object_content(&oid_hex, objects)
607                {
608                    // Use put() instead of put_blob() to chunk large files
609                    let (cid, size) = self.tree.put(&blob_content).await.map_err(|e| {
610                        Error::StorageError(format!("put blob {}: {}", entry.name, e))
611                    })?;
612
613                    // Use from_cid to preserve encryption key
614                    entries.push(DirEntry::from_cid(&entry.name, &cid).with_size(size));
615                }
616            }
617        }
618
619        // Sort for deterministic ordering
620        entries.sort_by(|a, b| a.name.cmp(&b.name));
621
622        Ok(entries)
623    }
624
625    /// Boxed version for async recursion
626    fn build_working_tree_entries_boxed<'a>(
627        &'a self,
628        tree_oid: &'a str,
629        objects: &'a HashMap<String, Vec<u8>>,
630    ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
631        Box::pin(self.build_working_tree_entries(tree_oid, objects))
632    }
633
634    /// Build the objects directory using HashTree
635    async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
636        if objects.is_empty() {
637            // Return empty directory Cid
638            let empty_cid = self
639                .tree
640                .put_directory(vec![])
641                .await
642                .map_err(|e| Error::StorageError(format!("put empty objects: {}", e)))?;
643            return Ok(empty_cid);
644        }
645
646        // Group objects by first 2 characters of SHA (git loose object structure)
647        // Git expects objects/XX/YYYYYY... where XX is first 2 hex chars
648        let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
649        for (oid, data) in objects {
650            let prefix = &oid[..2];
651            let suffix = &oid[2..];
652            buckets
653                .entry(prefix.to_string())
654                .or_default()
655                .push((suffix.to_string(), data.clone()));
656        }
657
658        // Build subdirectories for each prefix
659        let mut top_entries = Vec::new();
660        for (prefix, objs) in buckets {
661            let mut sub_entries = Vec::new();
662            for (suffix, data) in objs {
663                // Use put() instead of put_blob() to chunk large objects
664                // Git blobs can be >5MB which exceeds blossom server limits
665                let (cid, size) = self.tree.put(&data).await.map_err(|e| {
666                    Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e))
667                })?;
668                // Use from_cid to preserve encryption key
669                sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
670            }
671            // Sort for deterministic ordering
672            sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
673
674            let sub_cid = self
675                .tree
676                .put_directory(sub_entries)
677                .await
678                .map_err(|e| Error::StorageError(format!("put objects/{}: {}", prefix, e)))?;
679            top_entries.push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
680        }
681
682        // Sort for deterministic ordering
683        top_entries.sort_by(|a, b| a.name.cmp(&b.name));
684
685        let bucket_count = top_entries.len();
686        let cid = self
687            .tree
688            .put_directory(top_entries)
689            .await
690            .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
691
692        debug!(
693            "Built objects dir with {} buckets: {}",
694            bucket_count,
695            hex::encode(cid.hash)
696        );
697        Ok(cid)
698    }
699
700    /// Build the refs directory using HashTree
701    async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
702        // Group refs by category (heads, tags, etc.)
703        let mut groups: HashMap<String, Vec<(String, String)>> = HashMap::new();
704
705        for (ref_name, value) in refs {
706            let parts: Vec<&str> = ref_name.split('/').collect();
707            if parts.len() >= 3 && parts[0] == "refs" {
708                let category = parts[1].to_string();
709                let name = parts[2..].join("/");
710                groups
711                    .entry(category)
712                    .or_default()
713                    .push((name, value.clone()));
714            }
715        }
716
717        let mut ref_entries = Vec::new();
718
719        for (category, refs_in_category) in groups {
720            let mut cat_entries = Vec::new();
721            for (name, value) in refs_in_category {
722                // Use put() to get Cid with encryption key
723                let (cid, _size) = self
724                    .tree
725                    .put(value.as_bytes())
726                    .await
727                    .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
728                debug!(
729                    "refs/{}/{} -> blob {}",
730                    category,
731                    name,
732                    hex::encode(cid.hash)
733                );
734                cat_entries.push(DirEntry::from_cid(name, &cid));
735            }
736
737            cat_entries.sort_by(|a, b| a.name.cmp(&b.name));
738
739            let cat_cid = self
740                .tree
741                .put_directory(cat_entries)
742                .await
743                .map_err(|e| Error::StorageError(format!("put {} dir: {}", category, e)))?;
744            debug!("refs/{} dir -> {}", category, hex::encode(cat_cid.hash));
745            ref_entries.push(DirEntry::from_cid(category, &cat_cid).with_link_type(LinkType::Dir));
746        }
747
748        if ref_entries.is_empty() {
749            // Return empty directory Cid
750            let empty_cid = self
751                .tree
752                .put_directory(vec![])
753                .await
754                .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
755            return Ok(empty_cid);
756        }
757
758        ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
759
760        let refs_cid = self
761            .tree
762            .put_directory(ref_entries)
763            .await
764            .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
765        debug!("refs dir -> {}", hex::encode(refs_cid.hash));
766        Ok(refs_cid)
767    }
768
769    /// Build git index file from tree entries
770    /// Returns the raw binary content of the index file
771    fn build_index_file(
772        &self,
773        tree_oid: &str,
774        objects: &HashMap<String, Vec<u8>>,
775    ) -> Result<Vec<u8>> {
776        // Collect all file entries from the tree (recursively)
777        let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); // (path, sha1, mode, size)
778        self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
779
780        // Sort entries by path (git index requirement)
781        entries.sort_by(|a, b| a.0.cmp(&b.0));
782
783        let entry_count = entries.len() as u32;
784        debug!("Building git index with {} entries", entry_count);
785
786        // Build index content
787        let mut index_data = Vec::new();
788
789        // Header: DIRC + version 2 + entry count
790        index_data.extend_from_slice(b"DIRC");
791        index_data.extend_from_slice(&2u32.to_be_bytes()); // version 2
792        index_data.extend_from_slice(&entry_count.to_be_bytes());
793
794        // Current time for ctime/mtime (doesn't matter much for our use case)
795        let now_sec = std::time::SystemTime::now()
796            .duration_since(std::time::UNIX_EPOCH)
797            .unwrap_or_default()
798            .as_secs() as u32;
799
800        for (path, sha1, mode, size) in &entries {
801            let entry_start = index_data.len();
802
803            // ctime sec, nsec
804            index_data.extend_from_slice(&now_sec.to_be_bytes());
805            index_data.extend_from_slice(&0u32.to_be_bytes());
806            // mtime sec, nsec
807            index_data.extend_from_slice(&now_sec.to_be_bytes());
808            index_data.extend_from_slice(&0u32.to_be_bytes());
809            // dev, ino (use 0)
810            index_data.extend_from_slice(&0u32.to_be_bytes());
811            index_data.extend_from_slice(&0u32.to_be_bytes());
812            // mode
813            index_data.extend_from_slice(&mode.to_be_bytes());
814            // uid, gid (use 0)
815            index_data.extend_from_slice(&0u32.to_be_bytes());
816            index_data.extend_from_slice(&0u32.to_be_bytes());
817            // file size
818            index_data.extend_from_slice(&size.to_be_bytes());
819            // SHA-1
820            index_data.extend_from_slice(sha1);
821            // flags: path length (max 0xFFF) in low 12 bits
822            let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
823            index_data.extend_from_slice(&path_len.to_be_bytes());
824            // path (NUL-terminated)
825            index_data.extend_from_slice(path.as_bytes());
826            index_data.push(0); // NUL terminator
827
828            // Pad to 8-byte boundary relative to entry start
829            let entry_len = index_data.len() - entry_start;
830            let padding = (8 - (entry_len % 8)) % 8;
831            for _ in 0..padding {
832                index_data.push(0);
833            }
834        }
835
836        // Calculate SHA-1 checksum of everything and append
837        let mut hasher = Sha1::new();
838        hasher.update(&index_data);
839        let checksum = hasher.finalize();
840        index_data.extend_from_slice(&checksum);
841
842        debug!(
843            "Built git index: {} bytes, {} entries",
844            index_data.len(),
845            entry_count
846        );
847        Ok(index_data)
848    }
849
850    /// Collect file entries from a git tree for building the index
851    fn collect_tree_entries_for_index(
852        &self,
853        tree_oid: &str,
854        objects: &HashMap<String, Vec<u8>>,
855        prefix: &str,
856        entries: &mut Vec<(String, [u8; 20], u32, u32)>,
857    ) -> Result<()> {
858        let (obj_type, content) = self
859            .get_object_content(tree_oid, objects)
860            .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
861
862        if obj_type != ObjectType::Tree {
863            return Err(Error::InvalidObjectType(format!(
864                "expected tree, got {:?}",
865                obj_type
866            )));
867        }
868
869        let tree_entries = parse_tree(&content)?;
870
871        for entry in tree_entries {
872            let path = if prefix.is_empty() {
873                entry.name.clone()
874            } else {
875                format!("{}/{}", prefix, entry.name)
876            };
877
878            let oid_hex = entry.oid.to_hex();
879
880            if entry.is_tree() {
881                // Recursively process subdirectory
882                self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
883            } else {
884                // Get blob content for size and SHA-1
885                if let Some((ObjectType::Blob, blob_content)) =
886                    self.get_object_content(&oid_hex, objects)
887                {
888                    // Convert hex SHA to bytes
889                    let mut sha1_bytes = [0u8; 20];
890                    if let Ok(bytes) = hex::decode(&oid_hex) {
891                        if bytes.len() == 20 {
892                            sha1_bytes.copy_from_slice(&bytes);
893                        }
894                    }
895
896                    // Mode: use entry.mode or default to regular file
897                    let mode = entry.mode;
898                    let size = blob_content.len() as u32;
899
900                    entries.push((path, sha1_bytes, mode, size));
901                }
902            }
903        }
904
905        Ok(())
906    }
907
908    /// Get the underlying store
909    pub fn store(&self) -> &Arc<LocalStore> {
910        &self.store
911    }
912
913    /// Get the HashTree for direct access
914    #[allow(dead_code)]
915    pub fn hashtree(&self) -> &HashTree<LocalStore> {
916        &self.tree
917    }
918
919    /// Push all blobs to file servers
920    #[allow(dead_code)]
921    pub fn push_to_file_servers(
922        &self,
923        blossom: &hashtree_blossom::BlossomClient,
924    ) -> Result<(usize, usize)> {
925        let hashes = self
926            .store
927            .list()
928            .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
929
930        info!("Pushing {} blobs to file servers", hashes.len());
931
932        let mut uploaded = 0;
933        let mut existed = 0;
934
935        self.runtime.block_on(async {
936            for hash in &hashes {
937                let hex_hash = hex::encode(hash);
938                let data = match self.store.get_sync(hash) {
939                    Ok(Some(d)) => d,
940                    _ => continue,
941                };
942
943                match blossom.upload_if_missing(&data).await {
944                    Ok((_, true)) => {
945                        debug!("Uploaded {}", &hex_hash[..12]);
946                        uploaded += 1;
947                    }
948                    Ok((_, false)) => {
949                        existed += 1;
950                    }
951                    Err(e) => {
952                        debug!("Failed to upload {}: {}", &hex_hash[..12], e);
953                    }
954                }
955            }
956        });
957
958        info!(
959            "Upload complete: {} new, {} already existed",
960            uploaded, existed
961        );
962        Ok((uploaded, existed))
963    }
964
965    /// Clear all state (for testing or re-initialization)
966    #[allow(dead_code)]
967    pub fn clear(&self) -> Result<()> {
968        let mut objects = self
969            .objects
970            .write()
971            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
972        let mut refs = self
973            .refs
974            .write()
975            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
976        let mut root = self
977            .root_cid
978            .write()
979            .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
980
981        objects.clear();
982        refs.clear();
983        *root = None;
984        Ok(())
985    }
986}
987
988#[cfg(test)]
989mod tests {
990    use super::*;
991    use tempfile::TempDir;
992
993    fn create_test_storage() -> (GitStorage, TempDir) {
994        let temp_dir = TempDir::new().unwrap();
995        let storage = GitStorage::open(temp_dir.path()).unwrap();
996        (storage, temp_dir)
997    }
998
999    #[test]
1000    fn test_import_ref() {
1001        let (storage, _temp) = create_test_storage();
1002
1003        // Import a ref
1004        storage
1005            .import_ref("refs/heads/main", "abc123def456")
1006            .unwrap();
1007
1008        // Check it exists
1009        assert!(storage.has_ref("refs/heads/main").unwrap());
1010
1011        // Check value via list_refs
1012        let refs = storage.list_refs().unwrap();
1013        assert_eq!(
1014            refs.get("refs/heads/main"),
1015            Some(&"abc123def456".to_string())
1016        );
1017    }
1018
1019    #[test]
1020    fn test_import_multiple_refs_preserves_all() {
1021        let (storage, _temp) = create_test_storage();
1022
1023        // Import multiple refs (simulating loading from remote)
1024        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1025        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1026        storage
1027            .import_ref("refs/heads/feature", "sha_feature")
1028            .unwrap();
1029
1030        // All should exist
1031        assert!(storage.has_ref("refs/heads/main").unwrap());
1032        assert!(storage.has_ref("refs/heads/dev").unwrap());
1033        assert!(storage.has_ref("refs/heads/feature").unwrap());
1034
1035        // Now write a new ref (simulating push)
1036        storage
1037            .write_ref(
1038                "refs/heads/new-branch",
1039                &Ref::Direct(
1040                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1041                ),
1042            )
1043            .unwrap();
1044
1045        // Original refs should still exist
1046        let refs = storage.list_refs().unwrap();
1047        assert_eq!(refs.len(), 4);
1048        assert!(refs.contains_key("refs/heads/main"));
1049        assert!(refs.contains_key("refs/heads/dev"));
1050        assert!(refs.contains_key("refs/heads/feature"));
1051        assert!(refs.contains_key("refs/heads/new-branch"));
1052    }
1053
1054    #[test]
1055    fn test_import_compressed_object() {
1056        let (storage, _temp) = create_test_storage();
1057
1058        // Create a fake compressed object
1059        let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; // fake zlib data
1060
1061        storage
1062            .import_compressed_object("abc123def456", fake_compressed.clone())
1063            .unwrap();
1064
1065        // Check object count
1066        assert_eq!(storage.object_count().unwrap(), 1);
1067    }
1068
1069    #[test]
1070    fn test_write_ref_overwrites_imported() {
1071        let (storage, _temp) = create_test_storage();
1072
1073        // Import a ref
1074        storage.import_ref("refs/heads/main", "old_sha").unwrap();
1075
1076        // Write same ref with new value
1077        storage
1078            .write_ref(
1079                "refs/heads/main",
1080                &Ref::Direct(
1081                    ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1082                ),
1083            )
1084            .unwrap();
1085
1086        // Should have new value
1087        let refs = storage.list_refs().unwrap();
1088        assert_eq!(
1089            refs.get("refs/heads/main"),
1090            Some(&"0123456789abcdef0123456789abcdef01234567".to_string())
1091        );
1092    }
1093
1094    #[test]
1095    fn test_delete_ref_preserves_others() {
1096        let (storage, _temp) = create_test_storage();
1097
1098        // Import multiple refs
1099        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1100        storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1101
1102        // Delete one
1103        storage.delete_ref("refs/heads/dev").unwrap();
1104
1105        // Other should still exist
1106        assert!(storage.has_ref("refs/heads/main").unwrap());
1107        assert!(!storage.has_ref("refs/heads/dev").unwrap());
1108    }
1109
1110    #[test]
1111    fn test_clear_removes_all() {
1112        let (storage, _temp) = create_test_storage();
1113
1114        // Import refs and objects
1115        storage.import_ref("refs/heads/main", "sha_main").unwrap();
1116        storage
1117            .import_compressed_object("obj1", vec![1, 2, 3])
1118            .unwrap();
1119
1120        // Clear
1121        storage.clear().unwrap();
1122
1123        // All gone
1124        assert!(!storage.has_ref("refs/heads/main").unwrap());
1125        assert_eq!(storage.object_count().unwrap(), 0);
1126    }
1127}