1use flate2::read::ZlibDecoder;
15use flate2::write::ZlibEncoder;
16use flate2::Compression;
17use hashtree_config::{Config, StorageBackend};
18use hashtree_core::store::{Store, StoreError, StoreStats};
19use hashtree_core::types::Hash;
20use hashtree_core::{Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
21use hashtree_fs::FsBlobStore;
22#[cfg(feature = "lmdb")]
23use hashtree_lmdb::LmdbBlobStore;
24use sha1::{Digest, Sha1};
25use std::collections::{BTreeMap, HashMap};
26use std::io::{Read, Write};
27use std::path::Path;
28use std::sync::Arc;
29use tokio::runtime::{Handle, Runtime};
30use tracing::{debug, info};
31
32use super::object::{parse_tree, GitObject, ObjectId, ObjectType};
33use super::refs::{validate_ref_name, Ref};
34use super::{Error, Result};
35
36type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
38
39#[derive(Default)]
40struct RefDirectory {
41 files: BTreeMap<String, String>,
42 dirs: BTreeMap<String, RefDirectory>,
43}
44
45impl RefDirectory {
46 fn insert(&mut self, parts: &[&str], value: String) {
47 let Some((name, rest)) = parts.split_first() else {
48 return;
49 };
50
51 if rest.is_empty() {
52 self.files.insert((*name).to_string(), value);
53 } else {
54 self.dirs
55 .entry((*name).to_string())
56 .or_default()
57 .insert(rest, value);
58 }
59 }
60}
61
62enum RuntimeExecutor {
64 Owned(Runtime),
65 Handle(Handle),
66}
67
68impl RuntimeExecutor {
69 fn block_on<F: std::future::Future>(&self, f: F) -> F::Output {
70 match self {
71 RuntimeExecutor::Owned(rt) => rt.block_on(f),
72 RuntimeExecutor::Handle(handle) => tokio::task::block_in_place(|| handle.block_on(f)),
73 }
74 }
75}
76
77pub enum LocalStore {
79 Fs(FsBlobStore),
80 #[cfg(feature = "lmdb")]
81 Lmdb(LmdbBlobStore),
82}
83
84impl LocalStore {
85 fn new_for_backend<P: AsRef<Path>>(
86 path: P,
87 backend: StorageBackend,
88 max_bytes: u64,
89 ) -> std::result::Result<Self, StoreError> {
90 match backend {
91 StorageBackend::Fs => {
92 if max_bytes > 0 {
93 Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
94 path, max_bytes,
95 )?))
96 } else {
97 Ok(LocalStore::Fs(FsBlobStore::new(path)?))
98 }
99 }
100 #[cfg(feature = "lmdb")]
101 StorageBackend::Lmdb => {
102 if max_bytes > 0 {
103 Ok(LocalStore::Lmdb(LmdbBlobStore::with_max_bytes(
104 path, max_bytes,
105 )?))
106 } else {
107 Ok(LocalStore::Lmdb(LmdbBlobStore::new(path)?))
108 }
109 }
110 #[cfg(not(feature = "lmdb"))]
111 StorageBackend::Lmdb => {
112 warn!(
113 "LMDB backend requested but lmdb feature not enabled, using filesystem storage"
114 );
115 if max_bytes > 0 {
116 Ok(LocalStore::Fs(FsBlobStore::with_max_bytes(
117 path, max_bytes,
118 )?))
119 } else {
120 Ok(LocalStore::Fs(FsBlobStore::new(path)?))
121 }
122 }
123 }
124 }
125
126 pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<Self, StoreError> {
128 Self::new_with_max_bytes(path, 0)
129 }
130
131 pub fn new_with_max_bytes<P: AsRef<Path>>(
133 path: P,
134 max_bytes: u64,
135 ) -> std::result::Result<Self, StoreError> {
136 let config = Config::load_or_default();
137 Self::new_for_backend(path, config.storage.backend, max_bytes)
138 }
139
140 pub fn list(&self) -> std::result::Result<Vec<Hash>, StoreError> {
142 match self {
143 LocalStore::Fs(store) => store.list(),
144 #[cfg(feature = "lmdb")]
145 LocalStore::Lmdb(store) => store.list(),
146 }
147 }
148
149 pub fn get_sync(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
151 match self {
152 LocalStore::Fs(store) => store.get_sync(hash),
153 #[cfg(feature = "lmdb")]
154 LocalStore::Lmdb(store) => store.get_sync(hash),
155 }
156 }
157}
158
159#[async_trait::async_trait]
160impl Store for LocalStore {
161 async fn put(&self, hash: Hash, data: Vec<u8>) -> std::result::Result<bool, StoreError> {
162 match self {
163 LocalStore::Fs(store) => store.put(hash, data).await,
164 #[cfg(feature = "lmdb")]
165 LocalStore::Lmdb(store) => store.put(hash, data).await,
166 }
167 }
168
169 async fn get(&self, hash: &Hash) -> std::result::Result<Option<Vec<u8>>, StoreError> {
170 match self {
171 LocalStore::Fs(store) => store.get(hash).await,
172 #[cfg(feature = "lmdb")]
173 LocalStore::Lmdb(store) => store.get(hash).await,
174 }
175 }
176
177 async fn has(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
178 match self {
179 LocalStore::Fs(store) => store.has(hash).await,
180 #[cfg(feature = "lmdb")]
181 LocalStore::Lmdb(store) => store.has(hash).await,
182 }
183 }
184
185 async fn delete(&self, hash: &Hash) -> std::result::Result<bool, StoreError> {
186 match self {
187 LocalStore::Fs(store) => store.delete(hash).await,
188 #[cfg(feature = "lmdb")]
189 LocalStore::Lmdb(store) => store.delete(hash).await,
190 }
191 }
192
193 fn set_max_bytes(&self, max: u64) {
194 match self {
195 LocalStore::Fs(store) => store.set_max_bytes(max),
196 #[cfg(feature = "lmdb")]
197 LocalStore::Lmdb(store) => store.set_max_bytes(max),
198 }
199 }
200
201 fn max_bytes(&self) -> Option<u64> {
202 match self {
203 LocalStore::Fs(store) => store.max_bytes(),
204 #[cfg(feature = "lmdb")]
205 LocalStore::Lmdb(store) => store.max_bytes(),
206 }
207 }
208
209 async fn stats(&self) -> StoreStats {
210 match self {
211 LocalStore::Fs(store) => match store.stats() {
212 Ok(stats) => StoreStats {
213 count: stats.count as u64,
214 bytes: stats.total_bytes,
215 pinned_count: stats.pinned_count as u64,
216 pinned_bytes: stats.pinned_bytes,
217 },
218 Err(_) => StoreStats::default(),
219 },
220 #[cfg(feature = "lmdb")]
221 LocalStore::Lmdb(store) => match store.stats() {
222 Ok(stats) => StoreStats {
223 count: stats.count as u64,
224 bytes: stats.total_bytes,
225 pinned_count: 0,
226 pinned_bytes: 0,
227 },
228 Err(_) => StoreStats::default(),
229 },
230 }
231 }
232
233 async fn evict_if_needed(&self) -> std::result::Result<u64, StoreError> {
234 match self {
235 LocalStore::Fs(store) => store.evict_if_needed().await,
236 #[cfg(feature = "lmdb")]
237 LocalStore::Lmdb(store) => store.evict_if_needed().await,
238 }
239 }
240}
241
242pub struct GitStorage {
244 store: Arc<LocalStore>,
245 tree: HashTree<LocalStore>,
246 runtime: RuntimeExecutor,
247 objects: std::sync::RwLock<HashMap<String, Vec<u8>>>,
249 refs: std::sync::RwLock<HashMap<String, String>>,
250 root_cid: std::sync::RwLock<Option<Cid>>,
252}
253
254impl GitStorage {
255 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
257 let config = Config::load_or_default();
258 let max_size_bytes = config
259 .storage
260 .max_size_gb
261 .saturating_mul(1024 * 1024 * 1024);
262 Self::open_with_max_bytes(path, max_size_bytes)
263 }
264
265 pub fn open_with_max_bytes(path: impl AsRef<Path>, max_size_bytes: u64) -> Result<Self> {
267 let config = Config::load_or_default();
268 Self::open_with_backend_and_max_bytes(path, config.storage.backend, max_size_bytes)
269 }
270
271 pub fn open_with_backend_and_max_bytes(
272 path: impl AsRef<Path>,
273 backend: StorageBackend,
274 max_size_bytes: u64,
275 ) -> Result<Self> {
276 let runtime = match Handle::try_current() {
277 Ok(handle) => RuntimeExecutor::Handle(handle),
278 Err(_) => {
279 let rt = Runtime::new()
280 .map_err(|e| Error::StorageError(format!("tokio runtime: {}", e)))?;
281 RuntimeExecutor::Owned(rt)
282 }
283 };
284
285 let store_path = path.as_ref().join("blobs");
286 let store = Arc::new(
287 LocalStore::new_for_backend(&store_path, backend, max_size_bytes)
288 .map_err(|e| Error::StorageError(format!("local store: {}", e)))?,
289 );
290
291 let tree = HashTree::new(HashTreeConfig::new(store.clone()));
293
294 Ok(Self {
295 store,
296 tree,
297 runtime,
298 objects: std::sync::RwLock::new(HashMap::new()),
299 refs: std::sync::RwLock::new(HashMap::new()),
300 root_cid: std::sync::RwLock::new(None),
301 })
302 }
303
304 pub fn evict_if_needed(&self) -> Result<u64> {
306 self.runtime
307 .block_on(self.store.evict_if_needed())
308 .map_err(|e| Error::StorageError(format!("evict: {}", e)))
309 }
310
311 fn write_object(&self, obj: &GitObject) -> Result<ObjectId> {
313 let oid = obj.id();
314 let key = oid.to_hex();
315
316 let loose = obj.to_loose_format();
317 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
318 encoder.write_all(&loose)?;
319 let compressed = encoder.finish()?;
320
321 let mut objects = self
322 .objects
323 .write()
324 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
325 objects.insert(key, compressed);
326
327 if let Ok(mut root) = self.root_cid.write() {
329 *root = None;
330 }
331
332 Ok(oid)
333 }
334
335 pub fn write_raw_object(&self, obj_type: ObjectType, content: &[u8]) -> Result<ObjectId> {
337 let obj = GitObject::new(obj_type, content.to_vec());
338 self.write_object(&obj)
339 }
340
341 #[allow(dead_code)]
343 fn read_object(&self, oid: &ObjectId) -> Result<GitObject> {
344 let key = oid.to_hex();
345 let objects = self
346 .objects
347 .read()
348 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
349 let compressed = objects
350 .get(&key)
351 .ok_or_else(|| Error::ObjectNotFound(key.clone()))?;
352
353 let mut decoder = ZlibDecoder::new(compressed.as_slice());
354 let mut data = Vec::new();
355 decoder.read_to_end(&mut data)?;
356
357 GitObject::from_loose_format(&data)
358 }
359
360 pub fn write_ref(&self, name: &str, target: &Ref) -> Result<()> {
362 validate_ref_name(name)?;
363
364 let value = match target {
365 Ref::Direct(oid) => oid.to_hex(),
366 Ref::Symbolic(target) => format!("ref: {}", target),
367 };
368
369 let mut refs = self
370 .refs
371 .write()
372 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
373 refs.insert(name.to_string(), value);
374
375 if let Ok(mut root) = self.root_cid.write() {
377 *root = None;
378 }
379
380 Ok(())
381 }
382
383 #[allow(dead_code)]
385 pub fn read_ref(&self, name: &str) -> Result<Option<Ref>> {
386 let refs = self
387 .refs
388 .read()
389 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
390
391 match refs.get(name) {
392 Some(value) => {
393 if let Some(target) = value.strip_prefix("ref: ") {
394 Ok(Some(Ref::Symbolic(target.to_string())))
395 } else {
396 let oid = ObjectId::from_hex(value)
397 .ok_or_else(|| Error::StorageError(format!("invalid ref: {}", value)))?;
398 Ok(Some(Ref::Direct(oid)))
399 }
400 }
401 None => Ok(None),
402 }
403 }
404
405 #[allow(dead_code)]
407 pub fn list_refs(&self) -> Result<HashMap<String, String>> {
408 let refs = self
409 .refs
410 .read()
411 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
412 Ok(refs.clone())
413 }
414
415 pub fn delete_ref(&self, name: &str) -> Result<bool> {
417 let mut refs = self
418 .refs
419 .write()
420 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
421 let existed = refs.remove(name).is_some();
422
423 if let Ok(mut root) = self.root_cid.write() {
425 *root = None;
426 }
427
428 Ok(existed)
429 }
430
431 pub fn import_compressed_object(&self, oid: &str, compressed_data: Vec<u8>) -> Result<()> {
434 let mut objects = self
435 .objects
436 .write()
437 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
438 objects.insert(oid.to_string(), compressed_data);
439
440 if let Ok(mut root) = self.root_cid.write() {
442 *root = None;
443 }
444
445 Ok(())
446 }
447
448 pub fn import_ref(&self, name: &str, value: &str) -> Result<()> {
450 let mut refs = self
451 .refs
452 .write()
453 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
454 refs.insert(name.to_string(), value.to_string());
455
456 if let Ok(mut root) = self.root_cid.write() {
458 *root = None;
459 }
460
461 Ok(())
462 }
463
464 #[cfg(test)]
466 pub fn has_ref(&self, name: &str) -> Result<bool> {
467 let refs = self
468 .refs
469 .read()
470 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
471 Ok(refs.contains_key(name))
472 }
473
474 #[cfg(test)]
476 pub fn object_count(&self) -> Result<usize> {
477 let objects = self
478 .objects
479 .read()
480 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
481 Ok(objects.len())
482 }
483
484 #[allow(dead_code)]
486 pub fn get_root_cid(&self) -> Result<Option<Cid>> {
487 let root = self
488 .root_cid
489 .read()
490 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
491 Ok(root.clone())
492 }
493
494 #[allow(dead_code)]
496 pub fn default_branch(&self) -> Result<Option<String>> {
497 let refs = self
498 .refs
499 .read()
500 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
501
502 if let Some(head) = refs.get("HEAD") {
503 if let Some(target) = head.strip_prefix("ref: ") {
504 return Ok(Some(target.to_string()));
505 }
506 }
507 Ok(None)
508 }
509
510 fn get_commit_tree(
512 &self,
513 commit_oid: &str,
514 objects: &HashMap<String, Vec<u8>>,
515 ) -> Option<String> {
516 let compressed = objects.get(commit_oid)?;
517
518 let mut decoder = ZlibDecoder::new(&compressed[..]);
520 let mut decompressed = Vec::new();
521 decoder.read_to_end(&mut decompressed).ok()?;
522
523 let null_pos = decompressed.iter().position(|&b| b == 0)?;
525 let content = &decompressed[null_pos + 1..];
526
527 let content_str = std::str::from_utf8(content).ok()?;
529 let first_line = content_str.lines().next()?;
530 first_line
531 .strip_prefix("tree ")
532 .map(|tree_hash| tree_hash.to_string())
533 }
534
535 fn get_object_content(
537 &self,
538 oid: &str,
539 objects: &HashMap<String, Vec<u8>>,
540 ) -> Option<(ObjectType, Vec<u8>)> {
541 let compressed = objects.get(oid)?;
542
543 let mut decoder = ZlibDecoder::new(&compressed[..]);
545 let mut decompressed = Vec::new();
546 decoder.read_to_end(&mut decompressed).ok()?;
547
548 let null_pos = decompressed.iter().position(|&b| b == 0)?;
550 let header = std::str::from_utf8(&decompressed[..null_pos]).ok()?;
551 let obj_type = if header.starts_with("blob") {
552 ObjectType::Blob
553 } else if header.starts_with("tree") {
554 ObjectType::Tree
555 } else if header.starts_with("commit") {
556 ObjectType::Commit
557 } else if header.starts_with("tag") {
558 ObjectType::Tag
559 } else {
560 return None;
561 };
562 let content = decompressed[null_pos + 1..].to_vec();
563 Some((obj_type, content))
564 }
565
566 fn peel_tag_target(&self, oid: &str, objects: &HashMap<String, Vec<u8>>) -> Option<String> {
567 let (obj_type, content) = self.get_object_content(oid, objects)?;
568 if obj_type != ObjectType::Tag {
569 return Some(oid.to_string());
570 }
571
572 let target = std::str::from_utf8(&content)
573 .ok()?
574 .lines()
575 .find_map(|line| line.strip_prefix("object "))
576 .map(str::trim)?
577 .to_string();
578
579 match self.get_object_content(&target, objects)?.0 {
580 ObjectType::Tag => self.peel_tag_target(&target, objects),
581 _ => Some(target),
582 }
583 }
584
585 fn build_info_refs_content(
586 &self,
587 refs: &HashMap<String, String>,
588 objects: &HashMap<String, Vec<u8>>,
589 ) -> String {
590 let mut lines = Vec::new();
591
592 for (name, value) in refs {
593 if name == "HEAD" {
594 continue;
595 }
596
597 let oid = value.trim().to_string();
598 lines.push((name.clone(), oid.clone()));
599
600 if name.starts_with("refs/tags/") {
601 if let Some(peeled) = self.peel_tag_target(&oid, objects) {
602 if peeled != oid {
603 lines.push((format!("{}^{{}}", name), peeled));
604 }
605 }
606 }
607 }
608
609 lines.sort_by(|a, b| a.0.cmp(&b.0));
610
611 let mut content = String::new();
612 for (name, oid) in lines {
613 content.push_str(&oid);
614 content.push('\t');
615 content.push_str(&name);
616 content.push('\n');
617 }
618 content
619 }
620
621 async fn build_info_dir(
622 &self,
623 refs: &HashMap<String, String>,
624 objects: &HashMap<String, Vec<u8>>,
625 ) -> Result<Cid> {
626 let info_refs = self.build_info_refs_content(refs, objects);
627 let (info_refs_cid, info_refs_size) = self
628 .tree
629 .put(info_refs.as_bytes())
630 .await
631 .map_err(|e| Error::StorageError(format!("put info/refs: {}", e)))?;
632
633 self.tree
634 .put_directory(vec![
635 DirEntry::from_cid("refs", &info_refs_cid).with_size(info_refs_size)
636 ])
637 .await
638 .map_err(|e| Error::StorageError(format!("put info dir: {}", e)))
639 }
640
641 pub fn build_tree(&self) -> Result<Cid> {
643 if let Ok(root) = self.root_cid.read() {
645 if let Some(ref cid) = *root {
646 return Ok(cid.clone());
647 }
648 }
649
650 if let Err(err) = self.evict_if_needed() {
651 debug!("pre-build eviction skipped: {}", err);
652 }
653
654 let objects = self
655 .objects
656 .read()
657 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
658 let refs = self
659 .refs
660 .read()
661 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
662
663 let (default_branch, commit_sha) = if let Some(head) = refs.get("HEAD") {
665 let branch = head.strip_prefix("ref: ").map(String::from);
666 let sha = branch.as_ref().and_then(|b| refs.get(b)).cloned();
667 (branch, sha)
668 } else {
669 let mut branch_info: Option<(String, String)> = None;
671 for (ref_name, sha) in refs.iter() {
672 if ref_name.starts_with("refs/heads/") {
673 branch_info = Some((ref_name.clone(), sha.clone()));
674 break;
675 }
676 }
677 match branch_info {
678 Some((branch, sha)) => (Some(branch), Some(sha)),
679 None => (None, None),
680 }
681 };
682
683 let tree_sha = commit_sha
685 .as_ref()
686 .and_then(|sha| self.get_commit_tree(sha, &objects));
687
688 let objects_clone = objects.clone();
690
691 let root_cid = self.runtime.block_on(async {
692 let objects_cid = self.build_objects_dir(&objects).await?;
694
695 let refs_cid = self.build_refs_dir(&refs).await?;
697
698 let info_cid = self.build_info_dir(&refs, &objects_clone).await?;
700
701 let head_content = refs.get("HEAD")
704 .map(|h| if h.ends_with('\n') { h.clone() } else { format!("{}\n", h) })
705 .or_else(|| default_branch.as_ref().map(|b| format!("ref: {}\n", b)))
706 .unwrap_or_else(|| "ref: refs/heads/main\n".to_string());
707 debug!("HEAD content: {:?}", head_content);
708 let (head_cid, head_size) = self.tree.put(head_content.as_bytes()).await
709 .map_err(|e| Error::StorageError(format!("put HEAD: {}", e)))?;
710 debug!("HEAD hash: {}", hex::encode(head_cid.hash));
711
712 let mut git_entries = vec![
714 DirEntry::from_cid("HEAD", &head_cid).with_size(head_size),
715 DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir),
716 DirEntry::from_cid("objects", &objects_cid).with_link_type(LinkType::Dir),
717 DirEntry::from_cid("refs", &refs_cid).with_link_type(LinkType::Dir),
718 ];
719
720 if let Some(ref branch) = default_branch {
722 let config = format!(
723 "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = true\n[init]\n\tdefaultBranch = {}\n",
724 branch.trim_start_matches("refs/heads/")
725 );
726 let (config_cid, config_size) = self.tree.put(config.as_bytes()).await
727 .map_err(|e| Error::StorageError(format!("put config: {}", e)))?;
728 git_entries.push(DirEntry::from_cid("config", &config_cid).with_size(config_size));
729 }
730
731 if let Some(ref tree_oid) = tree_sha {
733 match self.build_index_file(tree_oid, &objects_clone) {
734 Ok(index_data) => {
735 let (index_cid, index_size) = self.tree.put(&index_data).await
736 .map_err(|e| Error::StorageError(format!("put index: {}", e)))?;
737 git_entries.push(DirEntry::from_cid("index", &index_cid).with_size(index_size));
738 info!("Added git index file ({} bytes)", index_data.len());
739 }
740 Err(e) => {
741 debug!("Failed to build git index file: {} - continuing without index", e);
742 }
743 }
744 }
745
746 let git_cid = self.tree.put_directory(git_entries).await
747 .map_err(|e| Error::StorageError(format!("put .git: {}", e)))?;
748
749 let mut root_entries = vec![DirEntry::from_cid(".git", &git_cid).with_link_type(LinkType::Dir)];
752
753 if let Some(ref tree_oid) = tree_sha {
755 let working_tree_entries = self.build_working_tree_entries(tree_oid, &objects_clone).await?;
756 root_entries.extend(working_tree_entries);
757 info!("Added {} working tree entries to root", root_entries.len() - 1);
758 }
759
760 root_entries.sort_by(|a, b| a.name.cmp(&b.name));
762
763 let root_cid = self.tree.put_directory(root_entries).await
764 .map_err(|e| Error::StorageError(format!("put root: {}", e)))?;
765
766 info!("Built hashtree root: {} (encrypted: {}) (.git dir: {})",
767 hex::encode(root_cid.hash),
768 root_cid.key.is_some(),
769 hex::encode(git_cid.hash));
770
771 Ok::<Cid, Error>(root_cid)
772 })?;
773
774 if let Ok(mut root) = self.root_cid.write() {
776 *root = Some(root_cid.clone());
777 }
778
779 Ok(root_cid)
780 }
781
782 async fn build_working_tree_entries(
784 &self,
785 tree_oid: &str,
786 objects: &HashMap<String, Vec<u8>>,
787 ) -> Result<Vec<DirEntry>> {
788 let mut entries = Vec::new();
789
790 let (obj_type, content) = self
792 .get_object_content(tree_oid, objects)
793 .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
794
795 if obj_type != ObjectType::Tree {
796 return Err(Error::InvalidObjectType(format!(
797 "expected tree, got {:?}",
798 obj_type
799 )));
800 }
801
802 let tree_entries = parse_tree(&content)?;
804
805 for entry in tree_entries {
806 let oid_hex = entry.oid.to_hex();
807
808 if entry.is_tree() {
809 let sub_entries = self
811 .build_working_tree_entries_boxed(&oid_hex, objects)
812 .await?;
813
814 let dir_cid =
816 self.tree.put_directory(sub_entries).await.map_err(|e| {
817 Error::StorageError(format!("put dir {}: {}", entry.name, e))
818 })?;
819
820 entries
822 .push(DirEntry::from_cid(&entry.name, &dir_cid).with_link_type(LinkType::Dir));
823 } else {
824 if let Some((ObjectType::Blob, blob_content)) =
826 self.get_object_content(&oid_hex, objects)
827 {
828 let (cid, size) = self.tree.put(&blob_content).await.map_err(|e| {
830 Error::StorageError(format!("put blob {}: {}", entry.name, e))
831 })?;
832
833 entries.push(DirEntry::from_cid(&entry.name, &cid).with_size(size));
835 }
836 }
837 }
838
839 entries.sort_by(|a, b| a.name.cmp(&b.name));
841
842 Ok(entries)
843 }
844
845 fn build_working_tree_entries_boxed<'a>(
847 &'a self,
848 tree_oid: &'a str,
849 objects: &'a HashMap<String, Vec<u8>>,
850 ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
851 Box::pin(self.build_working_tree_entries(tree_oid, objects))
852 }
853
854 async fn build_objects_dir(&self, objects: &HashMap<String, Vec<u8>>) -> Result<Cid> {
856 let mut top_entries = Vec::new();
857
858 if !objects.is_empty() {
859 let mut buckets: HashMap<String, Vec<(String, Vec<u8>)>> = HashMap::new();
862 for (oid, data) in objects {
863 let prefix = &oid[..2];
864 let suffix = &oid[2..];
865 buckets
866 .entry(prefix.to_string())
867 .or_default()
868 .push((suffix.to_string(), data.clone()));
869 }
870
871 for (prefix, objs) in buckets {
873 let mut sub_entries = Vec::new();
874 for (suffix, data) in objs {
875 let (cid, size) = self.tree.put(&data).await.map_err(|e| {
878 Error::StorageError(format!("put object {}{}: {}", prefix, suffix, e))
879 })?;
880 sub_entries.push(DirEntry::from_cid(suffix, &cid).with_size(size));
882 }
883 sub_entries.sort_by(|a, b| a.name.cmp(&b.name));
885
886 let sub_cid =
887 self.tree.put_directory(sub_entries).await.map_err(|e| {
888 Error::StorageError(format!("put objects/{}: {}", prefix, e))
889 })?;
890 top_entries
891 .push(DirEntry::from_cid(prefix, &sub_cid).with_link_type(LinkType::Dir));
892 }
893 }
894
895 let (packs_cid, packs_size) = self
896 .tree
897 .put(b"")
898 .await
899 .map_err(|e| Error::StorageError(format!("put objects/info/packs: {}", e)))?;
900 let info_cid = self
901 .tree
902 .put_directory(vec![
903 DirEntry::from_cid("packs", &packs_cid).with_size(packs_size)
904 ])
905 .await
906 .map_err(|e| Error::StorageError(format!("put objects/info: {}", e)))?;
907 top_entries.push(DirEntry::from_cid("info", &info_cid).with_link_type(LinkType::Dir));
908
909 top_entries.sort_by(|a, b| a.name.cmp(&b.name));
911
912 let entry_count = top_entries.len();
913 let cid = self
914 .tree
915 .put_directory(top_entries)
916 .await
917 .map_err(|e| Error::StorageError(format!("put objects dir: {}", e)))?;
918
919 debug!(
920 "Built objects dir with {} entries: {}",
921 entry_count,
922 hex::encode(cid.hash)
923 );
924 Ok(cid)
925 }
926
927 async fn build_refs_dir(&self, refs: &HashMap<String, String>) -> Result<Cid> {
929 let mut root = RefDirectory::default();
930
931 for (ref_name, value) in refs {
932 let parts: Vec<&str> = ref_name.split('/').collect();
933 if parts.len() >= 3 && parts[0] == "refs" {
934 root.insert(&parts[1..], value.clone());
935 }
936 }
937
938 let mut ref_entries = self.build_ref_entries_recursive(&root, "refs").await?;
939
940 if ref_entries.is_empty() {
941 let empty_cid = self
943 .tree
944 .put_directory(vec![])
945 .await
946 .map_err(|e| Error::StorageError(format!("put empty refs: {}", e)))?;
947 return Ok(empty_cid);
948 }
949
950 ref_entries.sort_by(|a, b| a.name.cmp(&b.name));
951
952 let refs_cid = self
953 .tree
954 .put_directory(ref_entries)
955 .await
956 .map_err(|e| Error::StorageError(format!("put refs dir: {}", e)))?;
957 debug!("refs dir -> {}", hex::encode(refs_cid.hash));
958 Ok(refs_cid)
959 }
960
961 fn build_ref_entries_recursive<'a>(
962 &'a self,
963 dir: &'a RefDirectory,
964 prefix: &'a str,
965 ) -> BoxFuture<'a, Result<Vec<DirEntry>>> {
966 Box::pin(async move {
967 let mut entries = Vec::new();
968
969 for (name, value) in &dir.files {
970 let (cid, size) = self
971 .tree
972 .put(value.as_bytes())
973 .await
974 .map_err(|e| Error::StorageError(format!("put ref: {}", e)))?;
975 debug!("{}/{} -> blob {}", prefix, name, hex::encode(cid.hash));
976 entries.push(DirEntry::from_cid(name, &cid).with_size(size));
977 }
978
979 for (name, child) in &dir.dirs {
980 let child_prefix = format!("{prefix}/{name}");
981 let child_entries = self
982 .build_ref_entries_recursive(child, &child_prefix)
983 .await?;
984 let child_cid =
985 self.tree.put_directory(child_entries).await.map_err(|e| {
986 Error::StorageError(format!("put {child_prefix} dir: {}", e))
987 })?;
988 debug!("{} dir -> {}", child_prefix, hex::encode(child_cid.hash));
989 entries.push(DirEntry::from_cid(name, &child_cid).with_link_type(LinkType::Dir));
990 }
991
992 entries.sort_by(|a, b| a.name.cmp(&b.name));
993 Ok(entries)
994 })
995 }
996
997 fn build_index_file(
1000 &self,
1001 tree_oid: &str,
1002 objects: &HashMap<String, Vec<u8>>,
1003 ) -> Result<Vec<u8>> {
1004 let mut entries: Vec<(String, [u8; 20], u32, u32)> = Vec::new(); self.collect_tree_entries_for_index(tree_oid, objects, "", &mut entries)?;
1007
1008 entries.sort_by(|a, b| a.0.cmp(&b.0));
1010
1011 let entry_count = entries.len() as u32;
1012 debug!("Building git index with {} entries", entry_count);
1013
1014 let mut index_data = Vec::new();
1016
1017 index_data.extend_from_slice(b"DIRC");
1019 index_data.extend_from_slice(&2u32.to_be_bytes()); index_data.extend_from_slice(&entry_count.to_be_bytes());
1021
1022 let now_sec = std::time::SystemTime::now()
1024 .duration_since(std::time::UNIX_EPOCH)
1025 .unwrap_or_default()
1026 .as_secs() as u32;
1027
1028 for (path, sha1, mode, size) in &entries {
1029 let entry_start = index_data.len();
1030
1031 index_data.extend_from_slice(&now_sec.to_be_bytes());
1033 index_data.extend_from_slice(&0u32.to_be_bytes());
1034 index_data.extend_from_slice(&now_sec.to_be_bytes());
1036 index_data.extend_from_slice(&0u32.to_be_bytes());
1037 index_data.extend_from_slice(&0u32.to_be_bytes());
1039 index_data.extend_from_slice(&0u32.to_be_bytes());
1040 index_data.extend_from_slice(&mode.to_be_bytes());
1042 index_data.extend_from_slice(&0u32.to_be_bytes());
1044 index_data.extend_from_slice(&0u32.to_be_bytes());
1045 index_data.extend_from_slice(&size.to_be_bytes());
1047 index_data.extend_from_slice(sha1);
1049 let path_len = std::cmp::min(path.len(), 0xFFF) as u16;
1051 index_data.extend_from_slice(&path_len.to_be_bytes());
1052 index_data.extend_from_slice(path.as_bytes());
1054 index_data.push(0); let entry_len = index_data.len() - entry_start;
1058 let padding = (8 - (entry_len % 8)) % 8;
1059 index_data.extend(std::iter::repeat_n(0, padding));
1060 }
1061
1062 let mut hasher = Sha1::new();
1064 hasher.update(&index_data);
1065 let checksum = hasher.finalize();
1066 index_data.extend_from_slice(&checksum);
1067
1068 debug!(
1069 "Built git index: {} bytes, {} entries",
1070 index_data.len(),
1071 entry_count
1072 );
1073 Ok(index_data)
1074 }
1075
1076 fn collect_tree_entries_for_index(
1078 &self,
1079 tree_oid: &str,
1080 objects: &HashMap<String, Vec<u8>>,
1081 prefix: &str,
1082 entries: &mut Vec<(String, [u8; 20], u32, u32)>,
1083 ) -> Result<()> {
1084 let (obj_type, content) = self
1085 .get_object_content(tree_oid, objects)
1086 .ok_or_else(|| Error::ObjectNotFound(tree_oid.to_string()))?;
1087
1088 if obj_type != ObjectType::Tree {
1089 return Err(Error::InvalidObjectType(format!(
1090 "expected tree, got {:?}",
1091 obj_type
1092 )));
1093 }
1094
1095 let tree_entries = parse_tree(&content)?;
1096
1097 for entry in tree_entries {
1098 let path = if prefix.is_empty() {
1099 entry.name.clone()
1100 } else {
1101 format!("{}/{}", prefix, entry.name)
1102 };
1103
1104 let oid_hex = entry.oid.to_hex();
1105
1106 if entry.is_tree() {
1107 self.collect_tree_entries_for_index(&oid_hex, objects, &path, entries)?;
1109 } else {
1110 if let Some((ObjectType::Blob, blob_content)) =
1112 self.get_object_content(&oid_hex, objects)
1113 {
1114 let mut sha1_bytes = [0u8; 20];
1116 if let Ok(bytes) = hex::decode(&oid_hex) {
1117 if bytes.len() == 20 {
1118 sha1_bytes.copy_from_slice(&bytes);
1119 }
1120 }
1121
1122 let mode = entry.mode;
1124 let size = blob_content.len() as u32;
1125
1126 entries.push((path, sha1_bytes, mode, size));
1127 }
1128 }
1129 }
1130
1131 Ok(())
1132 }
1133
1134 pub fn store(&self) -> &Arc<LocalStore> {
1136 &self.store
1137 }
1138
1139 #[allow(dead_code)]
1141 pub fn hashtree(&self) -> &HashTree<LocalStore> {
1142 &self.tree
1143 }
1144
1145 #[allow(dead_code)]
1147 pub fn push_to_file_servers(
1148 &self,
1149 blossom: &hashtree_blossom::BlossomClient,
1150 ) -> Result<(usize, usize)> {
1151 let hashes = self
1152 .store
1153 .list()
1154 .map_err(|e| Error::StorageError(format!("list hashes: {}", e)))?;
1155
1156 info!("Pushing {} blobs to file servers", hashes.len());
1157
1158 let mut uploaded = 0;
1159 let mut existed = 0;
1160
1161 self.runtime.block_on(async {
1162 for hash in &hashes {
1163 let hex_hash = hex::encode(hash);
1164 let data = match self.store.get_sync(hash) {
1165 Ok(Some(d)) => d,
1166 _ => continue,
1167 };
1168
1169 match blossom.upload_if_missing(&data).await {
1170 Ok((_, true)) => {
1171 debug!("Uploaded {}", &hex_hash[..12]);
1172 uploaded += 1;
1173 }
1174 Ok((_, false)) => {
1175 existed += 1;
1176 }
1177 Err(e) => {
1178 debug!("Failed to upload {}: {}", &hex_hash[..12], e);
1179 }
1180 }
1181 }
1182 });
1183
1184 info!(
1185 "Upload complete: {} new, {} already existed",
1186 uploaded, existed
1187 );
1188 Ok((uploaded, existed))
1189 }
1190
1191 #[allow(dead_code)]
1193 pub fn clear(&self) -> Result<()> {
1194 let mut objects = self
1195 .objects
1196 .write()
1197 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1198 let mut refs = self
1199 .refs
1200 .write()
1201 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1202 let mut root = self
1203 .root_cid
1204 .write()
1205 .map_err(|e| Error::StorageError(format!("lock: {}", e)))?;
1206
1207 objects.clear();
1208 refs.clear();
1209 *root = None;
1210 Ok(())
1211 }
1212}
1213
1214#[cfg(test)]
1215mod tests {
1216 use super::*;
1217 use hashtree_core::store::Store;
1218 use hashtree_core::{sha256, LinkType};
1219 use std::io::{Read, Write};
1220 use std::net::{TcpListener, TcpStream};
1221 use std::path::Path;
1222 use std::process::{Child, Command, Stdio};
1223 use std::time::{Duration, Instant};
1224 use tempfile::TempDir;
1225
1226 fn create_test_storage() -> (GitStorage, TempDir) {
1227 let temp_dir = TempDir::new().unwrap();
1228 let storage = GitStorage::open(temp_dir.path()).unwrap();
1229 (storage, temp_dir)
1230 }
1231
1232 fn create_test_storage_with_limit(max_size_bytes: u64) -> (GitStorage, TempDir) {
1233 let temp_dir = TempDir::new().unwrap();
1234 let storage = GitStorage::open_with_backend_and_max_bytes(
1235 temp_dir.path(),
1236 StorageBackend::Fs,
1237 max_size_bytes,
1238 )
1239 .unwrap();
1240 (storage, temp_dir)
1241 }
1242
1243 fn local_total_bytes(storage: &GitStorage) -> u64 {
1244 match storage.store().as_ref() {
1245 LocalStore::Fs(store) => store.stats().unwrap().total_bytes,
1246 #[cfg(feature = "lmdb")]
1247 LocalStore::Lmdb(store) => store.stats().unwrap().total_bytes,
1248 }
1249 }
1250
1251 fn write_test_commit(storage: &GitStorage) -> ObjectId {
1252 let blob_oid = storage
1253 .write_raw_object(ObjectType::Blob, b"hello from hashtree\n")
1254 .unwrap();
1255
1256 let mut tree_content = Vec::new();
1257 tree_content.extend_from_slice(b"100644 README.md\0");
1258 tree_content.extend_from_slice(&hex::decode(blob_oid.to_hex()).unwrap());
1259 let tree_oid = storage
1260 .write_raw_object(ObjectType::Tree, &tree_content)
1261 .unwrap();
1262
1263 let commit_content = format!(
1264 "tree {}\nauthor Test User <test@example.com> 0 +0000\ncommitter Test User <test@example.com> 0 +0000\n\nInitial commit\n",
1265 tree_oid.to_hex()
1266 );
1267 storage
1268 .write_raw_object(ObjectType::Commit, commit_content.as_bytes())
1269 .unwrap()
1270 }
1271
1272 fn export_tree_to_fs<S: Store>(
1273 runtime: &RuntimeExecutor,
1274 tree: &HashTree<S>,
1275 cid: &Cid,
1276 dst: &Path,
1277 ) {
1278 std::fs::create_dir_all(dst).unwrap();
1279 let entries = runtime.block_on(tree.list_directory(cid)).unwrap();
1280 for entry in entries {
1281 let entry_cid = Cid {
1282 hash: entry.hash,
1283 key: entry.key,
1284 };
1285 let path = dst.join(&entry.name);
1286 match entry.link_type {
1287 LinkType::Dir => export_tree_to_fs(runtime, tree, &entry_cid, &path),
1288 LinkType::Blob | LinkType::File => {
1289 let data = runtime
1290 .block_on(tree.get(&entry_cid, None))
1291 .unwrap()
1292 .unwrap();
1293 if let Some(parent) = path.parent() {
1294 std::fs::create_dir_all(parent).unwrap();
1295 }
1296 std::fs::write(path, data).unwrap();
1297 }
1298 }
1299 }
1300 }
1301
1302 fn spawn_http_server(root: &Path, port: u16) -> Child {
1303 Command::new("python3")
1304 .args([
1305 "-m",
1306 "http.server",
1307 &port.to_string(),
1308 "--bind",
1309 "127.0.0.1",
1310 ])
1311 .current_dir(root)
1312 .stdout(Stdio::null())
1313 .stderr(Stdio::null())
1314 .spawn()
1315 .expect("spawn python http server")
1316 }
1317
1318 fn wait_for_http_server(server: &mut Child, port: u16, path: &str) {
1319 let deadline = Instant::now() + Duration::from_secs(5);
1320
1321 loop {
1322 if let Some(status) = server.try_wait().expect("check http server status") {
1323 panic!("python http server exited before becoming ready: {status}");
1324 }
1325
1326 if let Ok(mut stream) = TcpStream::connect(("127.0.0.1", port)) {
1327 stream
1328 .set_read_timeout(Some(Duration::from_millis(200)))
1329 .expect("set read timeout");
1330 stream
1331 .set_write_timeout(Some(Duration::from_millis(200)))
1332 .expect("set write timeout");
1333 let request =
1334 format!("GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n");
1335 if stream.write_all(request.as_bytes()).is_ok() {
1336 let mut response = String::new();
1337 if stream.read_to_string(&mut response).is_ok()
1338 && response.starts_with("HTTP/1.0 200")
1339 {
1340 return;
1341 }
1342 }
1343 }
1344
1345 if Instant::now() >= deadline {
1346 panic!("python http server did not become ready on port {port}");
1347 }
1348 std::thread::sleep(Duration::from_millis(50));
1349 }
1350 }
1351
1352 #[test]
1353 fn test_import_ref() {
1354 let (storage, _temp) = create_test_storage();
1355
1356 storage
1358 .import_ref("refs/heads/main", "abc123def456")
1359 .unwrap();
1360
1361 assert!(storage.has_ref("refs/heads/main").unwrap());
1363
1364 let refs = storage.list_refs().unwrap();
1366 assert_eq!(
1367 refs.get("refs/heads/main"),
1368 Some(&"abc123def456".to_string())
1369 );
1370 }
1371
1372 #[test]
1373 fn test_import_multiple_refs_preserves_all() {
1374 let (storage, _temp) = create_test_storage();
1375
1376 storage.import_ref("refs/heads/main", "sha_main").unwrap();
1378 storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1379 storage
1380 .import_ref("refs/heads/feature", "sha_feature")
1381 .unwrap();
1382
1383 assert!(storage.has_ref("refs/heads/main").unwrap());
1385 assert!(storage.has_ref("refs/heads/dev").unwrap());
1386 assert!(storage.has_ref("refs/heads/feature").unwrap());
1387
1388 storage
1390 .write_ref(
1391 "refs/heads/new-branch",
1392 &Ref::Direct(
1393 ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1394 ),
1395 )
1396 .unwrap();
1397
1398 let refs = storage.list_refs().unwrap();
1400 assert_eq!(refs.len(), 4);
1401 assert!(refs.contains_key("refs/heads/main"));
1402 assert!(refs.contains_key("refs/heads/dev"));
1403 assert!(refs.contains_key("refs/heads/feature"));
1404 assert!(refs.contains_key("refs/heads/new-branch"));
1405 }
1406
1407 #[test]
1408 fn test_import_compressed_object() {
1409 let (storage, _temp) = create_test_storage();
1410
1411 let fake_compressed = vec![0x78, 0x9c, 0x01, 0x02, 0x03]; storage
1415 .import_compressed_object("abc123def456", fake_compressed.clone())
1416 .unwrap();
1417
1418 assert_eq!(storage.object_count().unwrap(), 1);
1420 }
1421
1422 #[test]
1423 fn test_write_ref_overwrites_imported() {
1424 let (storage, _temp) = create_test_storage();
1425
1426 storage.import_ref("refs/heads/main", "old_sha").unwrap();
1428
1429 storage
1431 .write_ref(
1432 "refs/heads/main",
1433 &Ref::Direct(
1434 ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1435 ),
1436 )
1437 .unwrap();
1438
1439 let refs = storage.list_refs().unwrap();
1441 assert_eq!(
1442 refs.get("refs/heads/main"),
1443 Some(&"0123456789abcdef0123456789abcdef01234567".to_string())
1444 );
1445 }
1446
1447 #[test]
1448 fn test_delete_ref_preserves_others() {
1449 let (storage, _temp) = create_test_storage();
1450
1451 storage.import_ref("refs/heads/main", "sha_main").unwrap();
1453 storage.import_ref("refs/heads/dev", "sha_dev").unwrap();
1454
1455 storage.delete_ref("refs/heads/dev").unwrap();
1457
1458 assert!(storage.has_ref("refs/heads/main").unwrap());
1460 assert!(!storage.has_ref("refs/heads/dev").unwrap());
1461 }
1462
1463 #[test]
1464 fn test_clear_removes_all() {
1465 let (storage, _temp) = create_test_storage();
1466
1467 storage.import_ref("refs/heads/main", "sha_main").unwrap();
1469 storage
1470 .import_compressed_object("obj1", vec![1, 2, 3])
1471 .unwrap();
1472
1473 storage.clear().unwrap();
1475
1476 assert!(!storage.has_ref("refs/heads/main").unwrap());
1478 assert_eq!(storage.object_count().unwrap(), 0);
1479 }
1480
1481 #[test]
1482 fn test_evict_if_needed_respects_configured_limit() {
1483 let (storage, _temp) = create_test_storage_with_limit(1_024);
1484
1485 storage
1486 .write_raw_object(ObjectType::Blob, &vec![b'a'; 900])
1487 .unwrap();
1488 storage
1489 .write_raw_object(ObjectType::Blob, &vec![b'b'; 900])
1490 .unwrap();
1491 storage
1492 .write_ref(
1493 "refs/heads/main",
1494 &Ref::Direct(
1495 ObjectId::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
1496 ),
1497 )
1498 .unwrap();
1499
1500 storage.build_tree().unwrap();
1501
1502 let before = local_total_bytes(&storage);
1503 assert!(before > 1_024);
1504
1505 let freed = storage.evict_if_needed().unwrap();
1506 assert!(freed > 0);
1507
1508 let after = local_total_bytes(&storage);
1509 assert!(after <= 1_024);
1510 }
1511
1512 #[test]
1513 fn test_build_tree_evicts_stale_blobs_before_writing_new_tree() {
1514 let max_size_bytes = 16 * 1024;
1515 let (storage, _temp) = create_test_storage_with_limit(max_size_bytes);
1516
1517 let stale_blobs = vec![
1518 vec![b'x'; 7 * 1024],
1519 vec![b'y'; 7 * 1024],
1520 vec![b'z'; 7 * 1024],
1521 ];
1522 let stale_hashes: Vec<Hash> = stale_blobs.iter().map(|blob| sha256(blob)).collect();
1523
1524 for (hash, blob) in stale_hashes.iter().zip(stale_blobs) {
1525 storage
1526 .runtime
1527 .block_on(storage.store().put(*hash, blob))
1528 .unwrap();
1529 }
1530
1531 let before = local_total_bytes(&storage);
1532 assert!(before > max_size_bytes);
1533
1534 let commit_oid = write_test_commit(&storage);
1535 storage
1536 .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1537 .unwrap();
1538 storage
1539 .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1540 .unwrap();
1541
1542 storage.build_tree().unwrap();
1543
1544 let evicted_stale = stale_hashes
1545 .iter()
1546 .filter(|hash| !storage.runtime.block_on(storage.store().has(hash)).unwrap())
1547 .count();
1548
1549 assert!(
1550 evicted_stale > 0,
1551 "expected build_tree preflight eviction to remove stale blobs before writing"
1552 );
1553 }
1554
1555 #[test]
1556 fn test_build_tree_adds_dumb_http_metadata() {
1557 let (storage, _temp) = create_test_storage();
1558 let commit_oid = write_test_commit(&storage);
1559 let tag_content = format!(
1560 "object {}\ntype commit\ntag v1.0.0\ntagger Test User <test@example.com> 0 +0000\n\nrelease\n",
1561 commit_oid.to_hex()
1562 );
1563 let tag_oid = storage
1564 .write_raw_object(ObjectType::Tag, tag_content.as_bytes())
1565 .unwrap();
1566
1567 storage
1568 .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1569 .unwrap();
1570 storage
1571 .write_ref("refs/tags/v1.0.0", &Ref::Direct(tag_oid))
1572 .unwrap();
1573 storage
1574 .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1575 .unwrap();
1576
1577 let root_cid = storage.build_tree().unwrap();
1578
1579 let info_refs_cid = storage
1580 .runtime
1581 .block_on(storage.tree.resolve_path(&root_cid, ".git/info/refs"))
1582 .unwrap()
1583 .expect("info/refs exists");
1584 let info_refs = storage
1585 .runtime
1586 .block_on(storage.tree.get(&info_refs_cid, None))
1587 .unwrap()
1588 .unwrap();
1589 let info_refs = String::from_utf8(info_refs).unwrap();
1590
1591 assert_eq!(
1592 info_refs,
1593 format!(
1594 "{commit}\trefs/heads/main\n{tag}\trefs/tags/v1.0.0\n{commit}\trefs/tags/v1.0.0^{{}}\n",
1595 commit = commit_oid.to_hex(),
1596 tag = tag_oid.to_hex()
1597 )
1598 );
1599
1600 let packs_cid = storage
1601 .runtime
1602 .block_on(
1603 storage
1604 .tree
1605 .resolve_path(&root_cid, ".git/objects/info/packs"),
1606 )
1607 .unwrap()
1608 .expect("objects/info/packs exists");
1609 let packs = storage
1610 .runtime
1611 .block_on(storage.tree.get(&packs_cid, None))
1612 .unwrap()
1613 .unwrap();
1614 assert!(packs.is_empty(), "objects/info/packs should be empty");
1615 }
1616
1617 #[test]
1618 fn test_build_tree_materializes_loose_refs_at_git_paths() {
1619 let (storage, _temp) = create_test_storage();
1620 let commit_oid = write_test_commit(&storage);
1621
1622 storage
1623 .write_ref("refs/heads/master", &Ref::Direct(commit_oid))
1624 .unwrap();
1625 storage
1626 .write_ref("refs/heads/codex/meshrouter-prod", &Ref::Direct(commit_oid))
1627 .unwrap();
1628 storage
1629 .write_ref("refs/tags/v1.0.0", &Ref::Direct(commit_oid))
1630 .unwrap();
1631 storage
1632 .write_ref("HEAD", &Ref::Symbolic("refs/heads/master".to_string()))
1633 .unwrap();
1634
1635 let root_cid = storage.build_tree().unwrap();
1636
1637 for path in [
1638 ".git/refs/heads/master",
1639 ".git/refs/heads/codex/meshrouter-prod",
1640 ".git/refs/tags/v1.0.0",
1641 ] {
1642 let ref_cid = storage
1643 .runtime
1644 .block_on(storage.tree.resolve_path(&root_cid, path))
1645 .unwrap()
1646 .unwrap_or_else(|| panic!("{path} should exist"));
1647 let ref_value = storage
1648 .runtime
1649 .block_on(storage.tree.get(&ref_cid, None))
1650 .unwrap()
1651 .unwrap();
1652 assert_eq!(
1653 String::from_utf8(ref_value).unwrap(),
1654 commit_oid.to_hex(),
1655 "{path} should contain the ref target",
1656 );
1657 }
1658 }
1659
1660 #[test]
1661 fn test_materialized_tree_supports_static_http_clone_from_git_dir() {
1662 let (storage, _temp) = create_test_storage();
1663 let commit_oid = write_test_commit(&storage);
1664 storage
1665 .write_ref("refs/heads/main", &Ref::Direct(commit_oid))
1666 .unwrap();
1667 storage
1668 .write_ref("HEAD", &Ref::Symbolic("refs/heads/main".to_string()))
1669 .unwrap();
1670
1671 let root_cid = storage.build_tree().unwrap();
1672 let export_dir = TempDir::new().unwrap();
1673 let repo_dir = export_dir.path().join("repo");
1674 export_tree_to_fs(&storage.runtime, &storage.tree, &root_cid, &repo_dir);
1675
1676 let listener = TcpListener::bind("127.0.0.1:0").unwrap();
1677 let port = listener.local_addr().unwrap().port();
1678 drop(listener);
1679
1680 let mut server = spawn_http_server(export_dir.path(), port);
1681 wait_for_http_server(&mut server, port, "/repo/.git/HEAD");
1682
1683 let clone_dir = TempDir::new().unwrap();
1684 let clone_path = clone_dir.path().join("clone");
1685 let output = Command::new("git")
1686 .args([
1687 "clone",
1688 &format!("http://127.0.0.1:{port}/repo/.git", port = port),
1689 clone_path.to_str().unwrap(),
1690 ])
1691 .output()
1692 .unwrap();
1693
1694 let _ = server.kill();
1695 let _ = server.wait();
1696
1697 assert!(
1698 output.status.success(),
1699 "git clone failed: {}",
1700 String::from_utf8_lossy(&output.stderr)
1701 );
1702 assert_eq!(
1703 std::fs::read_to_string(clone_path.join("README.md")).unwrap(),
1704 "hello from hashtree\n"
1705 );
1706 }
1707}