1use flate2::read::ZlibDecoder;
15use flate2::write::ZlibEncoder;
16use flate2::Compression;
17use hashtree_config::{Config, StorageBackend};
18use hashtree_core::store::{Store, StoreError, StoreStats};
19use hashtree_core::types::Hash;
20use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
21use hashtree_fs::FsBlobStore;
22#[cfg(feature = "lmdb")]
23use hashtree_lmdb::LmdbBlobStore;
24use sha1::{Digest, Sha1};
25use std::collections::{BTreeMap, HashMap};
26use std::io::{Read, Write};
27use std::path::Path;
28use std::sync::Arc;
29use tokio::runtime::{Handle, Runtime};
30use tracing::{debug, info, warn};
31
32use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
33use super::refs::{validate_ref_name, Ref};
34use super::{Error, Result};
35
36type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
38
39#[derive(Default)]
40struct RefDirectory {
41 files: BTreeMap<String, String>,
42 dirs: BTreeMap<String, RefDirectory>,
43}
44
45impl RefDirectory {
46 fn insert(&mut self, parts: &[&str], value: String) {
47 let Some((name, rest)) = parts.split_first() else {
48 return;
49 };
50
51 if rest.is_empty() {
52 self.files.insert((*name).to_string(), value);
53 } else {
54 self.dirs
55 .entry((*name).to_string())
56 .or_default()
57 .insert(rest, value);
58 }
59 }
60}
61
62enum RuntimeExecutor {
64 Owned(Runtime),
65 Handle(Handle),
66}
67
68impl RuntimeExecutor {
69 fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
70 match self {
71 RuntimeExecutor::Owned(rt) => rt.block_on(f),
72 RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
73 }
74 }
75}
76
77pub enum LocalStore {
79 Fs(FsBlobStore),
80 #[cfg(feature = "lmdb")]
81 Lmdb(LmdbBlobStore),
82}
83
84impl LocalStore {
85 pub(crate) fn new_for_backend<P: AsRef<Path>>(
86 path: P,
87 backend: StorageBackend,
88 max_bytes: u64,
89 ) -> std::result::Result<Self, StoreError> {
90 let path = path.as_ref();
91 #[cfg(feature = "lmdb")]
92 {
93 return Self::new_for_backend_with_openers(
94 path,
95 backend,
96 max_bytes,
97 Self::open_fs_store,
98 Self::open_lmdb_store,
99 );
100 }
101
102 #[cfg(not(feature = "lmdb"))]
103 match backend {
104 StorageBackend::Fs => Self::open_fs_store(path, max_bytes),
105 #[cfg(not(feature = "lmdb"))]
106 StorageBackend::Lmdb => {
107 warn!(
108 "LMDB backend requested but lmdb feature not enabled, using filesystem storage"
109 );
110 Self::open_fs_store(path, max_bytes)
111 }
112 }
113 }
114
115 fn open_fs_store(path: &Path, max_bytes: u64) -> std::result::Result<Self, StoreError> {
116 if max_bytes > 0 {
117 Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
118 path, max_bytes,
119 )?))
120 } else {
121 Ok(LocalStore::Fs(FsBlobStore::new(path)?))
122 }
123 }
124
125 #[cfg(feature = "lmdb")]
126 fn open_lmdb_store(path: &Path, max_bytes: u64) -> std::result::Result<Self, StoreError> {
127 if max_bytes > 0 {
128 Ok(LocalStore::Lmdb(LmdbBlobStore::with_max_bytes(
129 path, max_bytes,
130 )?))
131 } else {
132 Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?))
133 }
134 }
135
136 #[cfg(feature = "lmdb")]
137 fn new_for_backend_with_openers<FS, LMDB>(
138 path: &Path,
139 backend: StorageBackend,
140 max_bytes: u64,
141 fs_open: FS,
142 lmdb_open: LMDB,
143 ) -> std::result::Result<Self, StoreError>
144 where
145 FS: Fn(&Path, u64) -> std::result::Result<Self, StoreError>,
146 LMDB: Fn(&Path, u64) -> std::result::Result<Self, StoreError>,
147 {
148 match backend {
149 StorageBackend::Fs => fs_open(path, max_bytes),
150 StorageBackend::Lmdb => match lmdb_open(path, max_bytes) {
151 Ok(store) => Ok(store),
152 Err(err) if should_fallback_from_lmdb_error(&err) => {
153 warn!(
154 path = %path.display(),
155 "LMDB backend is unsupported in this environment, falling back to filesystem storage"
156 );
157 fs_open(path, max_bytes)
158 }
159 Err(err) => Err(err),
160 },
161 }
162 }
163
164 pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
166 Self::new_with_max_bytes(path, 0)
167 }
168
169 pub fn new_with_max_bytes<P: AsRef<Path>>(
171 path: P,
172 max_bytes: u64,
173 ) -> std::result::Result<Self, StoreError> {
174 let config = Config::load_or_default();
175 Self::new_for_backend(path, config.storage.backend, max_bytes)
176 }
177
178 pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
180 match self {
181 LocalStore::Fs(store) => store.list(),
182 #[cfg(feature = "lmdb")]
183 LocalStore::Lmdb(store) => store.list(),
184 }
185 }
186
187 pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
189 match self {
190 LocalStore::Fs(store) => store.get_sync(hash),
191 #[cfg(feature = "lmdb")]
192 LocalStore::Lmdb(store) => store.get_sync(hash),
193 }
194 }
195}
196
197#[cfg(feature = "lmdb")]
198pub(crate) fn should_fallback_from_lmdb_error(error: &StoreError) -> bool {
199 matches!(
200 error,
201 StoreError::Io(io_error) if io_error.raw_os_error() == Some(libc::ENOSYS)
202 )
203}
204
205#[async_trait::async_trait]
206impl Store for LocalStore {
207 async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
208 match self {
209 LocalStore::Fs(store) => store.put(hash, data).await,
210 #[cfg(feature = "lmdb")]
211 LocalStore::Lmdb(store) => store.put(hash, data).await,
212 }
213 }
214
215 async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
216 match self {
217 LocalStore::Fs(store) => store.get(hash).await,
218 #[cfg(feature = "lmdb")]
219 LocalStore::Lmdb(store) => store.get(hash).await,
220 }
221 }
222
223 async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
224 match self {
225 LocalStore::Fs(store) => store.has(hash).await,
226 #[cfg(feature = "lmdb")]
227 LocalStore::Lmdb(store) => store.has(hash).await,
228 }
229 }
230
231 async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
232 match self {
233 LocalStore::Fs(store) => store.delete(hash).await,
234 #[cfg(feature = "lmdb")]
235 LocalStore::Lmdb(store) => store.delete(hash).await,
236 }
237 }
238
239 fn set_max_bytes(&self, max: u64) {
240 match self {
241 LocalStore::Fs(store) => store.set_max_bytes(max),
242 #[cfg(feature = "lmdb")]
243 LocalStore::Lmdb(store) => store.set_max_bytes(max),
244 }
245 }
246
247 fn max_bytes(&self) -> Option<u64> {
248 match self {
249 LocalStore::Fs(store) => store.max_bytes(),
250 #[cfg(feature = "lmdb")]
251 LocalStore::Lmdb(store) => store.max_bytes(),
252 }
253 }
254
255 async fn stats(&self) -> StoreStats {
256 match self {
257 LocalStore::Fs(store) => match store.stats() {
258 Ok(stats) => StoreStats {
259 count: stats.count as u64,
260 bytes: stats.total_bytes,
261 pinned_count: stats.pinned_count as u64,
262 pinned_bytes: stats.pinned_bytes,
263 },
264 Err(_) => StoreStats::default(),
265 },
266 #[cfg(feature = "lmdb")]
267 LocalStore::Lmdb(store) => match store.stats() {
268 Ok(stats) => StoreStats {
269 count: stats.count as u64,
270 bytes: stats.total_bytes,
271 pinned_count: 0,
272 pinned_bytes: 0,
273 },
274 Err(_) => StoreStats::default(),
275 },
276 }
277 }
278
279 async fn evict_if_needed(&self) -> std::result::Result<u64, StoreError> {
280 match self {
281 LocalStore::Fs(store) => store.evict_if_needed().await,
282 #[cfg(feature = "lmdb")]
283 LocalStore::Lmdb(store) => store.evict_if_needed().await,
284 }
285 }
286}
287
288pub struct GitStorage {
290 store: Arc<LocalStore>,
291 tree: HashTree<LocalStore>,
292 runtime: RuntimeExecutor,
293 objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
295 refs: std::sync::RwLock<HashMap<String, String>>,
296 root_cid: std::sync::RwLock<Option<Cid>>,
298}
299
300impl GitStorage {
301 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
303 let config = Config::load_or_default();
304 let max_size_bytes = config
305 .storage
306 .max_size_gb
307 .saturating_mul(1024 * 1024 * 1024);
308 Self::open_with_max_bytes(path, max_size_bytes)
309 }
310
311 pub fn open_with_max_bytes(path: impl AsRef<Path>, max_size_bytes: u64) -> Result<Self> {
313 let config = Config::load_or_default();
314 Self::open_with_backend_and_max_bytes(path, config.storage.backend, max_size_bytes)
315 }
316
317 pub fn open_with_backend_and_max_bytes(
318 path: impl AsRef<Path>,
319 backend: StorageBackend,
320 max_size_bytes: u64,
321 ) -> Result<Self> {
322 let runtime = match Handle::try_current() {
323 Ok(handle) => RuntimeExecutor::Handle(handle),
324 Err(_) => {
325 let rt = Runtime::new()
326 .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
327 RuntimeExecutor::Owned(rt)
328 }
329 };
330
331 let store_path = path.as_ref().join("blobs");
332 let store = Arc::new(
333 LocalStore::new_for_backend(&store_path, backend, max_size_bytes)
334 .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
335 );
336
337 let tree = HashTree::new(HashTreeConfig::new(store.clone()));
339
340 Ok(Self {
341 store,
342 tree,
343 runtime,
344 objects: std::sync::RwLock::new(HashMap::new()),
345 refs: std::sync::RwLock::new(HashMap::new()),
346 root_cid: std::sync::RwLock::new(None),
347 })
348 }
349
350 pub fn evict_if_needed(&self) -> Result<u64> {
352 self.runtime
353 .block_on(self.store.evict_if_needed())
354 .map_err(|e| Error::StorageError(format!("evict: {}", e)))
355 }
356
357 fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
359 let oid = obj.id();
360 let key = oid.to_hex();
361
362 let loose = obj.to_loose_format();
363 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
364 encoder.write_all(&loose)?;
365 let compressed = encoder.finish()?;
366
367 let mut objects = self
368 .objects
369 .write()
370 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
371 objects.insert(key, compressed);
372
373 if let Ok(mut root) = self.root_cid.write() {
375 *root = None;
376 }
377
378 Ok(oid)
379 }
380
381 pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
383 let obj = GitObject::new(obj_type, content.to_vec());
384 self.write_object(&obj)
385 }
386
387 #[allow(dead_code)]
389 fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
390 let key = oid.to_hex();
391 let objects = self
392 .objects
393 .read()
394 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
395 let compressed = objects
396 .get(&key)
397 .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
398
399 let mut decoder = ZlibDecoder::new(compressed.as_slice());
400 let mut data = Vec::new();
401 decoder.read_to_end(&mut data)?;
402
403 GitObject::from_loose_format(&data)
404 }
405
406 pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
408 validate_ref_name(name)?;
409
410 let value = match target {
411 Ref::Direct(oid) => oid.to_hex(),
412 Ref::Symbolic(target) => format!("ref: {}", target),
413 };
414
415 let mut refs = self
416 .refs
417 .write()
418 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
419 refs.insert(name.to_string(), value);
420
421 if let Ok(mut root) = self.root_cid.write() {
423 *root = None;
424 }
425
426 Ok(())
427 }
428
429 #[allow(dead_code)]
431 pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
432 let refs = self
433 .refs
434 .read()
435 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
436
437 match refs.get(name) {
438 Some(value) => {
439 if let Some(target) = value.strip_prefix("ref: ") {
440 Ok(Some(Ref::Symbolic(target.to_string())))
441 } else {
442 let oid = ObjectId::from_hex(value)
443 .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
444 Ok(Some(Ref::Direct(oid)))
445 }
446 }
447 None => Ok(None),
448 }
449 }
450
451 #[allow(dead_code)]
453 pub fn list_refs(&self) -> Result<HashMap<String, String>> {
454 let refs = self
455 .refs
456 .read()
457 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
458 Ok(refs.clone())
459 }
460
461 pub fn delete_ref(&self, name: &str) -> Result<bool> {
463 let mut refs = self
464 .refs
465 .write()
466 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
467 let existed = refs.remove(name).is_some();
468
469 if let Ok(mut root) = self.root_cid.write() {
471 *root = None;
472 }
473
474 Ok(existed)
475 }
476
477 pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
480 let mut objects = self
481 .objects
482 .write()
483 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
484 objects.insert(oid.to_string(), compressed_data);
485
486 if let Ok(mut root) = self.root_cid.write() {
488 *root = None;
489 }
490
491 Ok(())
492 }
493
494 pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
496 let mut refs = self
497 .refs
498 .write()
499 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
500 refs.insert(name.to_string(), value.to_string());
501
502 if let Ok(mut root) = self.root_cid.write() {
504 *root = None;
505 }
506
507 Ok(())
508 }
509
510 #[cfg(test)]
512 pub fn has_ref(&self, name: &str) -> Result<bool> {
513 let refs = self
514 .refs
515 .read()
516 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
517 Ok(refs.contains_key(name))
518 }
519
520 #[cfg(test)]
522 pub fn object_count(&self) -> Result<usize> {
523 let objects = self
524 .objects
525 .read()
526 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
527 Ok(objects.len())
528 }
529
530 #[allow(dead_code)]
532 pub fn get_root_cid(&self) -> Result<Option<Cid>> {
533 let root = self
534 .root_cid
535 .read()
536 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
537 Ok(root.clone())
538 }
539
540 #[allow(dead_code)]
542 pub fn default_branch(&self) -> Result<Option<String>> {
543 let refs = self
544 .refs
545 .read()
546 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
547
548 if let Some(head) = refs.get("HEAD") {
549 if let Some(target) = head.strip_prefix("ref: ") {
550 return Ok(Some(target.to_string()));
551 }
552 }
553 Ok(None)
554 }
555
556 fn get_commit_tree(
558 &self,
559 commit_oid: &str,
560 objects: &HashMap<String, Vec<u8>>,
561 ) -> Option<String> {
562 let compressed = objects.get(commit_oid)?;
563
564 let mut decoder = ZlibDecoder::new(&compressed[..]);
566 let mut decompressed = Vec::new();
567 decoder.read_to_end(&mut decompressed).ok()?;
568
569 let null_pos = decompressed.iter().position(|&b| b == 0)?;
571 let content = &decompressed[null_pos + 1..];
572
573 let content_str = std::str::from_utf8(content).ok()?;
575 let first_line = content_str.lines().next()?;
576 first_line
577 .strip_prefix("tree ")
578 .map(|tree_hash| tree_hash.to_string())
579 }
580
581 fn get_object_content(
583 &self,
584 oid: &str,
585 objects: &HashMap<String, Vec<u8>>,
586 ) -> Option<(ObjectType, Vec<u8>)> {
587 let compressed = objects.get(oid)?;
588
589 let mut decoder = ZlibDecoder::new(&compressed[..]);
591 let mut decompressed = Vec::new();
592 decoder.read_to_end(&mut decompressed).ok()?;
593
594 let null_pos = decompressed.iter().position(|&b| b == 0)?;
596 let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
597 let obj_type = if header.starts_with("blob") {
598 ObjectType::Blob
599 } else if header.starts_with("tree") {
600 ObjectType::Tree
601 } else if header.starts_with("commit") {
602 ObjectType::Commit
603 } else if header.starts_with("tag") {
604 ObjectType::Tag
605 } else {
606 return None;
607 };
608 let content = decompressed[null_pos + 1..].to_vec();
609 Some((obj_type, content))
610 }
611
612 fn peel_tag_target(&self, oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<String> {
613 let (obj_type, content) = self.get_object_content(oid, objects)?;
614 if obj_type != ObjectType::Tag {
615 return Some(oid.to_string());
616 }
617
618 let target = std::str::from_utf8(&content)
619 .ok()?
620 .lines()
621 .find_map(|line| line.strip_prefix("object "))
622 .map(str::trim)?
623 .to_string();
624
625 match self.get_object_content(&target, objects)?.0 {
626 ObjectType::Tag => self.peel_tag_target(&target, objects),
627 _ => Some(target),
628 }
629 }
630
631 fn build_info_refs_content(
632 &self,
633 refs: &HashMap<String, String>,
634 objects: &HashMap<String, Vec<u8>>,
635 ) -> String {
636 let mut lines = Vec::new();
637
638 for (name, value) in refs {
639 if name == "HEAD" {
640 continue;
641 }
642
643 let oid = value.trim().to_string();
644 lines.push((name.clone(), oid.clone()));
645
646 if name.starts_with("refs/tags/") {
647 if let Some(peeled) = self.peel_tag_target(&oid, objects) {
648 if peeled != oid {
649 lines.push((format!("{}^{{}}", name), peeled));
650 }
651 }
652 }
653 }
654
655 lines.sort_by(|a, b| a.0.cmp(&b.0));
656
657 let mut content = String::new();
658 for (name, oid) in lines {
659 content.push_str(&oid);
660 content.push('\t');
661 content.push_str(&name);
662 content.push('\n');
663 }
664 content
665 }
666
667 async fn build_info_dir(
668 &self,
669 refs: &HashMap<String, String>,
670 objects: &HashMap<String, Vec<u8>>,
671 ) -> Result<Cid> {
672 let info_refs = self.build_info_refs_content(refs, objects);
673 let (info_refs_cid, info_refs_size) = self
674 .tree
675 .put(info_refs.as_bytes())
676 .await
677 .map_err(|e| Error::StorageError(format!("put info/refs: {}", e)))?;
678
679 self.tree
680 .put_directory(vec![
681 DirEntry::from_cid("refs", &info_refs_cid).with_size(info_refs_size)
682 ])
683 .await
684 .map_err(|e| Error::StorageError(format!("put info dir: {}", e)))
685 }
686
687 pub fn build_tree(&self) -> Result<Cid> {
689 if let Ok(root) = self.root_cid.read() {
691 if let Some(ref cid) = *root {
692 return Ok(cid.clone());
693 }
694 }
695
696 if let Err(err) = self.evict_if_needed() {
697 debug!("pre-build eviction skipped: {}", err);
698 }
699
700 let objects = self
701 .objects
702 .read()
703 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
704 let refs = self
705 .refs
706 .read()
707 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
708
709 let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
711 let branch = head.strip_prefix("ref: ").map(String::from);
712 let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
713 (branch, sha)
714 } else {
715 let mut branch_info: Option<(String, String)> = None;
717 for (ref_name, sha) in refs.iter() {
718 if ref_name.starts_with("refs/heads/") {
719 branch_info = Some((ref_name.clone(), sha.clone()));
720 break;
721 }
722 }
723 match branch_info {
724 Some((branch, sha)) => (Some(branch), Some(sha)),
725 None => (None, None),
726 }
727 };
728
729 let tree_sha = commit_sha
731 .as_ref()
732 .and_then(|sha| self.get_commit_tree(sha, &objects));
733
734 let objects_clone = objects.clone();
736
737 let root_cid = self.runtime.block_on(async {
738 let objects_cid = self.build_objects_dir(&objects).await?;
740
741 let refs_cid = self.build_refs_dir(&refs).await?;
743
744 let info_cid = self.build_info_dir(&refs, &objects_clone).await?;
746
747 let head_content = refs.get("HEAD")
750 .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
751 .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
752 .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
753 debug!("HEAD content: {:?}", head_content);
754 let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
755 .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
756 debug!("HEAD hash: {}", hex::encode(head_cid.hash));
757
758 let mut git_entries = vec![
760 DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
761 DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir),
762 DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
763 DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
764 ];
765
766 if let Some(ref branch) = default_branch {
768 let config = format!(
769 "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
770 branch.trim_start_matches("refs/heads/")
771 );
772 let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
773 .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
774 git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
775 }
776
777 if let Some(ref tree_oid) = tree_sha {
779 match self.build_index_file(tree_oid, &objects_clone) {
780 Ok(index_data) => {
781 let (index_cid, index_size) = self.tree.put(&index_data).await
782 .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
783 git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
784 info!("Added git index file ({} bytes)", index_data.len());
785 }
786 Err(e) => {
787 debug!("Failed to build git index file: {} - continuing without index", e);
788 }
789 }
790 }
791
792 let git_cid = self.tree.put_directory(git_entries).await
793 .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
794
795 let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
798
799 if let Some(ref tree_oid) = tree_sha {
801 let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
802 root_entries.extend(working_tree_entries);
803 info!("Added {} working tree entries to root", root_entries.len() - 1);
804 }
805
806 root_entries.sort_by(|a, b| a.name.cmp(&b.name));
808
809 let root_cid = self.tree.put_directory(root_entries).await
810 .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
811
812 info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
813 hex::encode(root_cid.hash),
814 root_cid.key.is_some(),
815 hex::encode(git_cid.hash));
816
817 Ok::<Cid, Error>(root_cid)
818 })?;
819
820 if let Ok(mut root) = self.root_cid.write() {
822 *root = Some(root_cid.clone());
823 }
824
825 Ok(root_cid)
826 }
827
828 async fn build_working_tree_entries(
830 &self,
831 tree_oid: &str,
832 objects: &HashMap<String, Vec<u8>>,
833 ) -> Result<Vec<DirEntry>> {
834 let mut entries = Vec::new();
835
836 let (obj_type, content) = self
838 .get_object_content(tree_oid, objects)
839 .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
840
841 if obj_type != ObjectType::Tree {
842 return Err(Error::InvalidObjectType(format!(
843 "expected tree, got {:?}",
844 obj_type
845 )));
846 }
847
848 let tree_entries = parse_tree(&content)?;
850
851 for entry in tree_entries {
852 let oid_hex = entry.oid.to_hex();
853
854 if entry.is_tree() {
855 let sub_entries = self
857 .build_working_tree_entries_boxed(&oid_hex, objects)
858 .await?;
859
860 let dir_cid =
862 self.tree.put_directory(sub_entries).await.map_err(|e| {
863 Error::StorageError(format!("put dir {}: {}", entry.name, e))
864 })?;
865
866 entries
868 .push(DirEntry::from_cid(&entry.name, &dir_cid).with_link_type(LinkType::Dir));
869 } else {
870 if let Some((ObjectType::Blob, blob_content)) =
872 self.get_object_content(&oid_hex, objects)
873 {
874 let (cid, size) = self.tree.put(&blob_content).await.map_err(|e| {
876 Error::StorageError(format!("put blob {}: {}", entry.name, e))
877 })?;
878
879 entries.push(DirEntry::from_cid(&entry.name, &cid).with_size(size));
881 }
882 }
883 }
884
885 entries.sort_by(|a, b| a.name.cmp(&b.name));
887
888 Ok(entries)
889 }
890
891 fn build_working_tree_entries_boxed<'a>(
893 &'a self,
894 tree_oid: &'a str,
895 objects: &'a HashMap<String, Vec<u8>>,
896 ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
897 Box::pin(self.build_working_tree_entries(tree_oid, objects))
898 }
899
900 async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
902 let mut top_entries = Vec::new();
903
904 if !objects.is_empty() {
905 let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
908 for (oid, data) in objects {
909 let prefix = &oid[..2];
910 let suffix = &oid[2..];
911 buckets
912 .entry(prefix.to_string())
913 .or_default()
914 .push((suffix.to_string(), data.clone()));
915 }
916
917 for (prefix, objs) in buckets {
919 let mut sub_entries = Vec::new();
920 for (suffix, data) in objs {
921 let (cid, size) = self.tree.put(&data).await.map_err(|e| {
924 Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e))
925 })?;
926 sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
928 }
929 sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
931
932 let sub_cid =
933 self.tree.put_directory(sub_entries).await.map_err(|e| {
934 Error::StorageError(format!("put objects/{}: {}", prefix, e))
935 })?;
936 top_entries
937 .push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
938 }
939 }
940
941 let (packs_cid, packs_size) = self
942 .tree
943 .put(b"")
944 .await
945 .map_err(|e| Error::StorageError(format!("put objects/info/packs: {}", e)))?;
946 let info_cid = self
947 .tree
948 .put_directory(vec![
949 DirEntry::from_cid("packs", &packs_cid).with_size(packs_size)
950 ])
951 .await
952 .map_err(|e| Error::StorageError(format!("put objects/info: {}", e)))?;
953 top_entries.push(DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir));
954
955 top_entries.sort_by(|a, b| a.name.cmp(&b.name));
957
958 let entry_count = top_entries.len();
959 let cid = self
960 .tree
961 .put_directory(top_entries)
962 .await
963 .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
964
965 debug!(
966 "Built objects dir with {} entries: {}",
967 entry_count,
968 hex::encode(cid.hash)
969 );
970 Ok(cid)
971 }
972
973 async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
975 let mut root = RefDirectory::default();
976
977 for (ref_name, value) in refs {
978 let parts: Vec<&str> = ref_name.split('/').collect();
979 if parts.len() >= 3 && parts[0] == "refs" {
980 root.insert(&parts[1..], value.clone());
981 }
982 }
983
984 let mut ref_entries = self.build_ref_entries_recursive(&root, "refs").await?;
985
986 if ref_entries.is_empty() {
987 let empty_cid = self
989 .tree
990 .put_directory(vec![])
991 .await
992 .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
993 return Ok(empty_cid);
994 }
995
996 ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
997
998 let refs_cid = self
999 .tree
1000 .put_directory(ref_entries)
1001 .await
1002 .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
1003 debug!("refs dir -> {}", hex::encode(refs_cid.hash));
1004 Ok(refs_cid)
1005 }
1006
1007 fn build_ref_entries_recursive<'a>(
1008 &'a self,
1009 dir: &'a RefDirectory,
1010 prefix: &'a str,
1011 ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
1012 Box::pin(async move {
1013 let mut entries = Vec::new();
1014
1015 for (name, value) in &dir.files {
1016 let (cid, size) = self
1017 .tree
1018 .put(value.as_bytes())
1019 .await
1020 .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
1021 debug!("{}/{} -> blob {}", prefix, name, hex::encode(cid.hash));
1022 entries.push(DirEntry::from_cid(name, &cid).with_size(size));
1023 }
1024
1025 for (name, child) in &dir.dirs {
1026 let child_prefix = format!("{prefix}/{name}");
1027 let child_entries = self
1028 .build_ref_entries_recursive(child, &child_prefix)
1029 .await?;
1030 let child_cid =
1031 self.tree.put_directory(child_entries).await.map_err(|e| {
1032 Error::StorageError(format!("put {child_prefix} dir: {}", e))
1033 })?;
1034 debug!("{} dir -> {}", child_prefix, hex::encode(child_cid.hash));
1035 entries.push(DirEntry::from_cid(name, &child_cid).with_link_type(LinkType::Dir));
1036 }
1037
1038 entries.sort_by(|a, b| a.name.cmp(&b.name));
1039 Ok(entries)
1040 })
1041 }
1042
1043 fn build_index_file(
1046 &self,
1047 tree_oid: &str,
1048 objects: &HashMap<String, Vec<u8>>,
1049 ) -> Result<Vec<u8>> {
1050 let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
1053
1054 entries.sort_by(|a, b| a.0.cmp(&b.0));
1056
1057 let entry_count = entries.len() as u32;
1058 debug!("Building git index with {} entries", entry_count);
1059
1060 let mut index_data = Vec::new();
1062
1063 index_data.extend_from_slice(b"DIRC");
1065 index_data.extend_from_slice(&2u32.to_be_bytes()); index_data.extend_from_slice(&entry_count.to_be_bytes());
1067
1068 let now_sec = std::time::SystemTime::now()
1070 .duration_since(std::time::UNIX_EPOCH)
1071 .unwrap_or_default()
1072 .as_secs() as u32;
1073
1074 for (path, sha1, mode, size) in &entries {
1075 let entry_start = index_data.len();
1076
1077 index_data.extend_from_slice(&now_sec.to_be_bytes());
1079 index_data.extend_from_slice(&0u32.to_be_bytes());
1080 index_data.extend_from_slice(&now_sec.to_be_bytes());
1082 index_data.extend_from_slice(&0u32.to_be_bytes());
1083 index_data.extend_from_slice(&0u32.to_be_bytes());
1085 index_data.extend_from_slice(&0u32.to_be_bytes());
1086 index_data.extend_from_slice(&mode.to_be_bytes());
1088 index_data.extend_from_slice(&0u32.to_be_bytes());
1090 index_data.extend_from_slice(&0u32.to_be_bytes());
1091 index_data.extend_from_slice(&size.to_be_bytes());
1093 index_data.extend_from_slice(sha1);
1095 let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
1097 index_data.extend_from_slice(&path_len.to_be_bytes());
1098 index_data.extend_from_slice(path.as_bytes());
1100 index_data.push(0); let entry_len = index_data.len() - entry_start;
1104 let padding = (8 - (entry_len % 8)) % 8;
1105 index_data.extend(std::iter::repeat_n(0, padding));
1106 }
1107
1108 let mut hasher = Sha1::new();
1110 hasher.update(&index_data);
1111 let checksum = hasher.finalize();
1112 index_data.extend_from_slice(&checksum);
1113
1114 debug!(
1115 "Built git index: {} bytes, {} entries",
1116 index_data.len(),
1117 entry_count
1118 );
1119 Ok(index_data)
1120 }
1121
1122 fn collect_tree_entries_for_index(
1124 &self,
1125 tree_oid: &str,
1126 objects: &HashMap<String, Vec<u8>>,
1127 prefix: &str,
1128 entries: &mut Vec<(String, [u8; 20], u32, u32)>,
1129 ) -> Result<()> {
1130 let (obj_type, content) = self
1131 .get_object_content(tree_oid, objects)
1132 .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
1133
1134 if obj_type != ObjectType::Tree {
1135 return Err(Error::InvalidObjectType(format!(
1136 "expected tree, got {:?}",
1137 obj_type
1138 )));
1139 }
1140
1141 let tree_entries = parse_tree(&content)?;
1142
1143 for entry in tree_entries {
1144 let path = if prefix.is_empty() {
1145 entry.name.clone()
1146 } else {
1147 format!("{}/{}", prefix, entry.name)
1148 };
1149
1150 let oid_hex = entry.oid.to_hex();
1151
1152 if entry.is_tree() {
1153 self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
1155 } else {
1156 if let Some((ObjectType::Blob, blob_content)) =
1158 self.get_object_content(&oid_hex, objects)
1159 {
1160 let mut sha1_bytes = [0u8; 20];
1162 if let Ok(bytes) = hex::decode(&oid_hex) {
1163 if bytes.len() == 20 {
1164 sha1_bytes.copy_from_slice(&bytes);
1165 }
1166 }
1167
1168 let mode = entry.mode;
1170 let size = blob_content.len() as u32;
1171
1172 entries.push((path, sha1_bytes, mode, size));
1173 }
1174 }
1175 }
1176
1177 Ok(())
1178 }
1179
1180 pub fn store(&self) -> &Arc<LocalStore> {
1182 &self.store
1183 }
1184
1185 #[allow(dead_code)]
1187 pub fn hashtree(&self) -> &HashTree<LocalStore> {
1188 &self.tree
1189 }
1190
1191 #[allow(dead_code)]
1193 pub fn push_to_file_servers(
1194 &self,
1195 blossom: &hashtree_blossom::BlossomClient,
1196 ) -> Result<(usize, usize)> {
1197 let hashes = self
1198 .store
1199 .list()
1200 .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
1201
1202 info!("Pushing {} blobs to file servers", hashes.len());
1203
1204 let mut uploaded = 0;
1205 let mut existed = 0;
1206
1207 self.runtime.block_on(async {
1208 for hash in &hashes {
1209 let hex_hash = hex::encode(hash);
1210 let data = match self.store.get_sync(hash) {
1211 Ok(Some(d)) => d,
1212 _ => continue,
1213 };
1214
1215 match blossom.upload_if_missing(&data).await {
1216 Ok((_, true)) => {
1217 debug!("Uploaded {}", &hex_hash[..12]);
1218 uploaded += 1;
1219 }
1220 Ok((_, false)) => {
1221 existed += 1;
1222 }
1223 Err(e) => {
1224 debug!("Failed to upload {}: {}", &hex_hash[..12], e);
1225 }
1226 }
1227 }
1228 });
1229
1230 info!(
1231 "Upload complete: {} new, {} already existed",
1232 uploaded, existed
1233 );
1234 Ok((uploaded, existed))
1235 }
1236
1237 #[allow(dead_code)]
1239 pub fn clear(&self) -> Result<()> {
1240 let mut objects = self
1241 .objects
1242 .write()
1243 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1244 let mut refs = self
1245 .refs
1246 .write()
1247 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1248 let mut root = self
1249 .root_cid
1250 .write()
1251 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1252
1253 objects.clear();
1254 refs.clear();
1255 *root = None;
1256 Ok(())
1257 }
1258}
1259
1260#[cfg(test)]
1261mod tests {
1262 use super::*;
1263 use hashtree_core::store::Store;
1264 use hashtree_core::{sha256, LinkType};
1265 use std::io::{Read, Write};
1266 use std::net::{TcpListener, TcpStream};
1267 use std::path::Path;
1268 use std::process::{Child, Command, Stdio};
1269 #[cfg(feature = "lmdb")]
1270 use std::sync::atomic::{AtomicUsize, Ordering};
1271 use std::time::{Duration, Instant};
1272 use tempfile::TempDir;
1273
1274 fn create_test_storage() -> (GitStorage, TempDir) {
1275 let temp_dir = TempDir::new().unwrap();
1276 let storage = GitStorage::open(temp_dir.path()).unwrap();
1277 (storage, temp_dir)
1278 }
1279
1280 fn create_test_storage_with_limit(max_size_bytes: u64) -> (GitStorage, TempDir) {
1281 let temp_dir = TempDir::new().unwrap();
1282 let storage = GitStorage::open_with_backend_and_max_bytes(
1283 temp_dir.path(),
1284 StorageBackend::Fs,
1285 max_size_bytes,
1286 )
1287 .unwrap();
1288 (storage, temp_dir)
1289 }
1290
1291 fn local_total_bytes(storage: &GitStorage) -> u64 {
1292 match storage.store().as_ref() {
1293 LocalStore::Fs(store) => store.stats().unwrap().total_bytes,
1294 #[cfg(feature = "lmdb")]
1295 LocalStore::Lmdb(store) => store.stats().unwrap().total_bytes,
1296 }
1297 }
1298
1299 fn write_test_commit(storage: &GitStorage) -> ObjectId {
1300 let blob_oid = storage
1301 .write_raw_object(ObjectType::Blob, b"hello from hashtree\n")
1302 .unwrap();
1303
1304 let mut tree_content = Vec::new();
1305 tree_content.extend_from_slice(b"100644 README.md\0");
1306 tree_content.extend_from_slice(&hex::decode(blob_oid.to_hex()).unwrap());
1307 let tree_oid = storage
1308 .write_raw_object(ObjectType::Tree, &tree_content)
1309 .unwrap();
1310
1311 let commit_content = format!(
1312 "tree {}\nauthor Test User <test@example.com> 0 +0000\ncommitter Test User <test@example.com> 0 +0000\n\nInitial commit\n",
1313 tree_oid.to_hex()
1314 );
1315 storage
1316 .write_raw_object(ObjectType::Commit, commit_content.as_bytes())
1317 .unwrap()
1318 }
1319
1320 fn export_tree_to_fs<S: Store>(
1321 runtime: &RuntimeExecutor,
1322 tree: &HashTree<S>,
1323 cid: &Cid,
1324 dst: &Path,
1325 ) {
1326 std::fs::create_dir_all(dst).unwrap();
1327 let entries = runtime.block_on(tree.list_directory(cid)).unwrap();
1328 for entry in entries {
1329 let entry_cid = Cid {
1330 hash: entry.hash,
1331 key: entry.key,
1332 };
1333 let path = dst.join(&entry.name);
1334 match entry.link_type {
1335 LinkType::Dir => export_tree_to_fs(runtime, tree, &entry_cid, &path),
1336 LinkType::Blob | LinkType::File => {
1337 let data = runtime
1338 .block_on(tree.get(&entry_cid, None))
1339 .unwrap()
1340 .unwrap();
1341 if let Some(parent) = path.parent() {
1342 std::fs::create_dir_all(parent).unwrap();
1343 }
1344 std::fs::write(path, data).unwrap();
1345 }
1346 }
1347 }
1348 }
1349
1350 fn spawn_http_server(root: &Path, port: u16) -> Child {
1351 Command::new("python3")
1352 .args([
1353 "-m",
1354 "http.server",
1355 &port.to_string(),
1356 "--bind",
1357 "127.0.0.1",
1358 ])
1359 .current_dir(root)
1360 .stdout(Stdio::null())
1361 .stderr(Stdio::null())
1362 .spawn()
1363 .expect("spawn python http server")
1364 }
1365
1366 fn wait_for_http_server(server: &mut Child, port: u16, path: &str) {
1367 let deadline = Instant::now() + Duration::from_secs(5);
1368
1369 loop {
1370 if let Some(status) = server.try_wait().expect("check http server status") {
1371 panic!("python http server exited before becoming ready: {status}");
1372 }
1373
1374 if let Ok(mut stream) = TcpStream::connect(("127.0.0.1", port)) {
1375 stream
1376 .set_read_timeout(Some(Duration::from_millis(200)))
1377 .expect("set read timeout");
1378 stream
1379 .set_write_timeout(Some(Duration::from_millis(200)))
1380 .expect("set write timeout");
1381 let request =
1382 format!("GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n");
1383 if stream.write_all(request.as_bytes()).is_ok() {
1384 let mut response = String::new();
1385 if stream.read_to_string(&mut response).is_ok()
1386 && response.starts_with("HTTP/1.0 200")
1387 {
1388 return;
1389 }
1390 }
1391 }
1392
1393 if Instant::now() >= deadline {
1394 panic!("python http server did not become ready on port {port}");
1395 }
1396 std::thread::sleep(Duration::from_millis(50));
1397 }
1398 }
1399
1400 #[test]
1401 fn test_import_ref() {
1402 let (storage, _temp) = create_test_storage();
1403
1404 storage
1406 .import_ref("refs/heads/main", "abc123def456")
1407 .unwrap();
1408
1409 assert!(storage.has_ref("refs/heads/main").unwrap());
1411
1412 let refs = storage.list_refs().unwrap();
1414 assert_eq!(
1415 refs.get("refs/heads/main"),
1416 Some(&"abc123def456".to_string())
1417 );
1418 }
1419
1420 #[cfg(feature = "lmdb")]
1421 #[test]
1422 fn test_local_store_falls_back_to_fs_when_lmdb_open_returns_enosys() {
1423 let temp_dir = TempDir::new().unwrap();
1424 let fs_calls = AtomicUsize::new(0);
1425 let lmdb_calls = AtomicUsize::new(0);
1426
1427 let store = LocalStore::new_for_backend_with_openers(
1428 temp_dir.path(),
1429 StorageBackend::Lmdb,
1430 0,
1431 |path, max_bytes| {
1432 fs_calls.fetch_add(1, Ordering::SeqCst);
1433 LocalStore::open_fs_store(path, max_bytes)
1434 },
1435 |_path, _max_bytes| {
1436 lmdb_calls.fetch_add(1, Ordering::SeqCst);
1437 Err(StoreError::Io(std::io::Error::from_raw_os_error(
1438 libc::ENOSYS,
1439 )))
1440 },
1441 )
1442 .unwrap();
1443
1444 assert!(matches!(store, LocalStore::Fs(_)));
1445 assert_eq!(lmdb_calls.load(Ordering::SeqCst), 1);
1446 assert_eq!(fs_calls.load(Ordering::SeqCst), 1);
1447 }
1448
1449 #[cfg(feature = "lmdb")]
1450 #[test]
1451 fn test_local_store_does_not_fallback_on_unrelated_lmdb_errors() {
1452 let temp_dir = TempDir::new().unwrap();
1453 let fs_calls = AtomicUsize::new(0);
1454
1455 let result = LocalStore::new_for_backend_with_openers(
1456 temp_dir.path(),
1457 StorageBackend::Lmdb,
1458 0,
1459 |path, max_bytes| {
1460 fs_calls.fetch_add(1, Ordering::SeqCst);
1461 LocalStore::open_fs_store(path, max_bytes)
1462 },
1463 |_path, _max_bytes| {
1464 Err(StoreError::Io(std::io::Error::from_raw_os_error(
1465 libc::EACCES,
1466 )))
1467 },
1468 );
1469
1470 assert!(
1471 matches!(result, Err(StoreError::Io(io_error)) if io_error.raw_os_error() == Some(libc::EACCES))
1472 );
1473 assert_eq!(fs_calls.load(Ordering::SeqCst), 0);
1474 }
1475
1476 #[test]
1477 fn test_import_multiple_refs_preserves_all() {
1478 let (storage, _temp) = create_test_storage();
1479
1480 storage.import_ref("refs/heads/main", "sha_main").unwrap();
1482 storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1483 storage
1484 .import_ref("refs/heads/feature", "sha_feature")
1485 .unwrap();
1486
1487 assert!(storage.has_ref("refs/heads/main").unwrap());
1489 assert!(storage.has_ref("refs/heads/dev").unwrap());
1490 assert!(storage.has_ref("refs/heads/feature").unwrap());
1491
1492 storage
1494 .write_ref(
1495 "refs/heads/new-branch",
1496 &Ref::Direct(
1497 ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1498 ),
1499 )
1500 .unwrap();
1501
1502 let refs = storage.list_refs().unwrap();
1504 assert_eq!(refs.len(), 4);
1505 assert!(refs.contains_key("refs/heads/main"));
1506 assert!(refs.contains_key("refs/heads/dev"));
1507 assert!(refs.contains_key("refs/heads/feature"));
1508 assert!(refs.contains_key("refs/heads/new-branch"));
1509 }
1510
1511 #[test]
1512 fn test_import_compressed_object() {
1513 let (storage, _temp) = create_test_storage();
1514
1515 let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; storage
1519 .import_compressed_object("abc123def456", fake_compressed.clone())
1520 .unwrap();
1521
1522 assert_eq!(storage.object_count().unwrap(), 1);
1524 }
1525
1526 #[test]
1527 fn test_write_ref_overwrites_imported() {
1528 let (storage, _temp) = create_test_storage();
1529
1530 storage.import_ref("refs/heads/main", "old_sha").unwrap();
1532
1533 storage
1535 .write_ref(
1536 "refs/heads/main",
1537 &Ref::Direct(
1538 ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1539 ),
1540 )
1541 .unwrap();
1542
1543 let refs = storage.list_refs().unwrap();
1545 assert_eq!(
1546 refs.get("refs/heads/main"),
1547 Some(&"0123456789abcdef0123456789abcdef01234567".to_string())
1548 );
1549 }
1550
1551 #[test]
1552 fn test_delete_ref_preserves_others() {
1553 let (storage, _temp) = create_test_storage();
1554
1555 storage.import_ref("refs/heads/main", "sha_main").unwrap();
1557 storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1558
1559 storage.delete_ref("refs/heads/dev").unwrap();
1561
1562 assert!(storage.has_ref("refs/heads/main").unwrap());
1564 assert!(!storage.has_ref("refs/heads/dev").unwrap());
1565 }
1566
1567 #[test]
1568 fn test_clear_removes_all() {
1569 let (storage, _temp) = create_test_storage();
1570
1571 storage.import_ref("refs/heads/main", "sha_main").unwrap();
1573 storage
1574 .import_compressed_object("obj1", vec![1, 2, 3])
1575 .unwrap();
1576
1577 storage.clear().unwrap();
1579
1580 assert!(!storage.has_ref("refs/heads/main").unwrap());
1582 assert_eq!(storage.object_count().unwrap(), 0);
1583 }
1584
1585 #[test]
1586 fn test_evict_if_needed_respects_configured_limit() {
1587 let (storage, _temp) = create_test_storage_with_limit(1_024);
1588
1589 storage
1590 .write_raw_object(ObjectType::Blob, &vec![b'a'; 900])
1591 .unwrap();
1592 storage
1593 .write_raw_object(ObjectType::Blob, &vec![b'b'; 900])
1594 .unwrap();
1595 storage
1596 .write_ref(
1597 "refs/heads/main",
1598 &Ref::Direct(
1599 ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1600 ),
1601 )
1602 .unwrap();
1603
1604 storage.build_tree().unwrap();
1605
1606 let before = local_total_bytes(&storage);
1607 assert!(before > 1_024);
1608
1609 let freed = storage.evict_if_needed().unwrap();
1610 assert!(freed > 0);
1611
1612 let after = local_total_bytes(&storage);
1613 assert!(after <= 1_024);
1614 }
1615
1616 #[test]
1617 fn test_build_tree_evicts_stale_blobs_before_writing_new_tree() {
1618 let max_size_bytes = 16 * 1024;
1619 let (storage, _temp) = create_test_storage_with_limit(max_size_bytes);
1620
1621 let stale_blobs = vec![
1622 vec![b'x'; 7 * 1024],
1623 vec![b'y'; 7 * 1024],
1624 vec![b'z'; 7 * 1024],
1625 ];
1626 let stale_hashes: Vec<Hash> = stale_blobs.iter().map(|blob| sha256(blob)).collect();
1627
1628 for (hash, blob) in stale_hashes.iter().zip(stale_blobs) {
1629 storage
1630 .runtime
1631 .block_on(storage.store().put(*hash, blob))
1632 .unwrap();
1633 }
1634
1635 let before = local_total_bytes(&storage);
1636 assert!(before > max_size_bytes);
1637
1638 let commit_oid = write_test_commit(&storage);
1639 storage
1640 .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1641 .unwrap();
1642 storage
1643 .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1644 .unwrap();
1645
1646 storage.build_tree().unwrap();
1647
1648 let evicted_stale = stale_hashes
1649 .iter()
1650 .filter(|hash| !storage.runtime.block_on(storage.store().has(hash)).unwrap())
1651 .count();
1652
1653 assert!(
1654 evicted_stale > 0,
1655 "expected build_tree preflight eviction to remove stale blobs before writing"
1656 );
1657 }
1658
1659 #[test]
1660 fn test_build_tree_adds_dumb_http_metadata() {
1661 let (storage, _temp) = create_test_storage();
1662 let commit_oid = write_test_commit(&storage);
1663 let tag_content = format!(
1664 "object {}\ntype commit\ntag v1.0.0\ntagger Test User <test@example.com> 0 +0000\n\nrelease\n",
1665 commit_oid.to_hex()
1666 );
1667 let tag_oid = storage
1668 .write_raw_object(ObjectType::Tag, tag_content.as_bytes())
1669 .unwrap();
1670
1671 storage
1672 .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1673 .unwrap();
1674 storage
1675 .write_ref("refs/tags/v1.0.0", &Ref::Direct(tag_oid))
1676 .unwrap();
1677 storage
1678 .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1679 .unwrap();
1680
1681 let root_cid = storage.build_tree().unwrap();
1682
1683 let info_refs_cid = storage
1684 .runtime
1685 .block_on(storage.tree.resolve_path(&root_cid, ".git/info/refs"))
1686 .unwrap()
1687 .expect("info/refs exists");
1688 let info_refs = storage
1689 .runtime
1690 .block_on(storage.tree.get(&info_refs_cid, None))
1691 .unwrap()
1692 .unwrap();
1693 let info_refs = String::from_utf8(info_refs).unwrap();
1694
1695 assert_eq!(
1696 info_refs,
1697 format!(
1698 "{commit}\trefs/heads/main\n{tag}\trefs/tags/v1.0.0\n{commit}\trefs/tags/v1.0.0^{{}}\n",
1699 commit = commit_oid.to_hex(),
1700 tag = tag_oid.to_hex()
1701 )
1702 );
1703
1704 let packs_cid = storage
1705 .runtime
1706 .block_on(
1707 storage
1708 .tree
1709 .resolve_path(&root_cid, ".git/objects/info/packs"),
1710 )
1711 .unwrap()
1712 .expect("objects/info/packs exists");
1713 let packs = storage
1714 .runtime
1715 .block_on(storage.tree.get(&packs_cid, None))
1716 .unwrap()
1717 .unwrap();
1718 assert!(packs.is_empty(), "objects/info/packs should be empty");
1719 }
1720
1721 #[test]
1722 fn test_build_tree_materializes_loose_refs_at_git_paths() {
1723 let (storage, _temp) = create_test_storage();
1724 let commit_oid = write_test_commit(&storage);
1725
1726 storage
1727 .write_ref("refs/heads/master", &Ref::Direct(commit_oid))
1728 .unwrap();
1729 storage
1730 .write_ref("refs/heads/codex/meshrouter-prod", &Ref::Direct(commit_oid))
1731 .unwrap();
1732 storage
1733 .write_ref("refs/tags/v1.0.0", &Ref::Direct(commit_oid))
1734 .unwrap();
1735 storage
1736 .write_ref("HEAD", &Ref::Symbolic("refs/heads/master".to_string()))
1737 .unwrap();
1738
1739 let root_cid = storage.build_tree().unwrap();
1740
1741 for path in [
1742 ".git/refs/heads/master",
1743 ".git/refs/heads/codex/meshrouter-prod",
1744 ".git/refs/tags/v1.0.0",
1745 ] {
1746 let ref_cid = storage
1747 .runtime
1748 .block_on(storage.tree.resolve_path(&root_cid, path))
1749 .unwrap()
1750 .unwrap_or_else(|| panic!("{path} should exist"));
1751 let ref_value = storage
1752 .runtime
1753 .block_on(storage.tree.get(&ref_cid, None))
1754 .unwrap()
1755 .unwrap();
1756 assert_eq!(
1757 String::from_utf8(ref_value).unwrap(),
1758 commit_oid.to_hex(),
1759 "{path} should contain the ref target",
1760 );
1761 }
1762 }
1763
1764 #[test]
1765 fn test_materialized_tree_supports_static_http_clone_from_git_dir() {
1766 let (storage, _temp) = create_test_storage();
1767 let commit_oid = write_test_commit(&storage);
1768 storage
1769 .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1770 .unwrap();
1771 storage
1772 .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1773 .unwrap();
1774
1775 let root_cid = storage.build_tree().unwrap();
1776 let export_dir = TempDir::new().unwrap();
1777 let repo_dir = export_dir.path().join("repo");
1778 export_tree_to_fs(&storage.runtime, &storage.tree, &root_cid, &repo_dir);
1779
1780 let listener = TcpListener::bind("127.0.0.1:0").unwrap();
1781 let port = listener.local_addr().unwrap().port();
1782 drop(listener);
1783
1784 let mut server = spawn_http_server(export_dir.path(), port);
1785 wait_for_http_server(&mut server, port, "/repo/.git/HEAD");
1786
1787 let clone_dir = TempDir::new().unwrap();
1788 let clone_path = clone_dir.path().join("clone");
1789 let output = Command::new("git")
1790 .args([
1791 "clone",
1792 &format!("http://127.0.0.1:{port}/repo/.git", port = port),
1793 clone_path.to_str().unwrap(),
1794 ])
1795 .output()
1796 .unwrap();
1797
1798 let _ = server.kill();
1799 let _ = server.wait();
1800
1801 assert!(
1802 output.status.success(),
1803 "git clone failed: {}",
1804 String::from_utf8_lossy(&output.stderr)
1805 );
1806 assert_eq!(
1807 std::fs::read_to_string(clone_path.join("README.md")).unwrap(),
1808 "hello from hashtree\n"
1809 );
1810 }
1811}