1use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::unpack_objects::apply_delta;
9use flate2::read::ZlibDecoder;
10use sha1::{Digest, Sha1};
11use sha2::{Digest as Sha256Digest, Sha256};
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::fs;
14use std::io;
15use std::io::Read;
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18
19#[derive(Debug, Clone)]
21pub struct PackIndexEntry {
22 pub oid: Vec<u8>,
24 pub offset: u64,
26}
27
28#[derive(Debug, Clone)]
30pub struct PackIndex {
31 pub idx_path: PathBuf,
33 pub pack_path: PathBuf,
35 pub hash_bytes: usize,
37 pub entries: Vec<PackIndexEntry>,
39 pub fanout: [u32; 256],
43}
44
45impl PackIndex {
46 #[must_use]
52 pub fn find_offset(&self, oid: &ObjectId) -> Option<u64> {
53 let needle = oid.as_bytes();
54 if self.hash_bytes != needle.len() {
55 return None;
56 }
57 let first_byte = needle[0] as usize;
58 let lo = if first_byte == 0 {
59 0
60 } else {
61 self.fanout[first_byte - 1] as usize
62 };
63 let hi = self.fanout[first_byte] as usize;
64 if lo >= hi || hi > self.entries.len() {
65 return None;
66 }
67 let slice = &self.entries[lo..hi];
68 slice
69 .binary_search_by(|e| e.oid.as_slice().cmp(needle))
70 .ok()
71 .map(|idx| slice[idx].offset)
72 }
73
74 #[must_use]
76 pub fn contains(&self, oid: &ObjectId) -> bool {
77 self.find_offset(oid).is_some()
78 }
79}
80
81#[derive(Debug, Clone)]
86pub struct ShowIndexEntry {
87 pub oid: Vec<u8>,
89 pub offset: u64,
91 pub crc32: Option<u32>,
93}
94
95pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
106 let mut buf = Vec::new();
107 reader.read_to_end(&mut buf).map_err(Error::Io)?;
108
109 if buf.len() < 8 {
110 return Err(Error::CorruptObject(
111 "unable to read header: index file too small".to_owned(),
112 ));
113 }
114
115 let mut pos = 0usize;
116 let first_u32 = read_u32_be(&buf, &mut pos)?;
117
118 const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
119
120 if first_u32 == PACK_IDX_SIGNATURE {
121 let version = read_u32_be(&buf, &mut pos)?;
123 if version != 2 {
124 return Err(Error::CorruptObject(format!(
125 "unknown index version: {version}"
126 )));
127 }
128 show_index_v2(&buf, &mut pos, hash_size)
129 } else {
130 pos = 0;
133 show_index_v1(&buf, &mut pos, hash_size)
134 }
135}
136
137fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
139 if buf.len() < 256 * 4 {
140 return Err(Error::CorruptObject(
141 "unable to read index: v1 fanout too short".to_owned(),
142 ));
143 }
144 let mut fanout = [0u32; 256];
145 for slot in &mut fanout {
146 *slot = read_u32_be(buf, pos)?;
147 }
148 let object_count = fanout[255] as usize;
149
150 let mut entries = Vec::with_capacity(object_count);
151 for i in 0..object_count {
152 if *pos + 4 + hash_size > buf.len() {
154 return Err(Error::CorruptObject(format!(
155 "unable to read entry {i}/{object_count}: truncated"
156 )));
157 }
158 let offset = read_u32_be(buf, pos)? as u64;
159 let oid = buf[*pos..*pos + hash_size].to_vec();
160 *pos += hash_size;
161 entries.push(ShowIndexEntry {
162 oid,
163 offset,
164 crc32: None,
165 });
166 }
167 Ok(entries)
168}
169
170fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
173 if buf.len() < *pos + 256 * 4 {
174 return Err(Error::CorruptObject(
175 "unable to read index: v2 fanout too short".to_owned(),
176 ));
177 }
178 let mut fanout = [0u32; 256];
179 for slot in &mut fanout {
180 *slot = read_u32_be(buf, pos)?;
181 }
182 let object_count = fanout[255] as usize;
183
184 let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
186 for i in 0..object_count {
187 if *pos + hash_size > buf.len() {
188 return Err(Error::CorruptObject(format!(
189 "unable to read oid {i}/{object_count}: truncated"
190 )));
191 }
192 let oid = buf[*pos..*pos + hash_size].to_vec();
193 *pos += hash_size;
194 oids.push(oid);
195 }
196
197 let mut crcs = Vec::with_capacity(object_count);
199 for i in 0..object_count {
200 if *pos + 4 > buf.len() {
201 return Err(Error::CorruptObject(format!(
202 "unable to read crc {i}/{object_count}: truncated"
203 )));
204 }
205 crcs.push(read_u32_be(buf, pos)?);
206 }
207
208 let mut offsets32 = Vec::with_capacity(object_count);
210 let mut large_count = 0usize;
211 for i in 0..object_count {
212 if *pos + 4 > buf.len() {
213 return Err(Error::CorruptObject(format!(
214 "unable to read 32b offset {i}/{object_count}: truncated"
215 )));
216 }
217 let v = read_u32_be(buf, pos)?;
218 if (v & 0x8000_0000) != 0 {
219 large_count += 1;
220 }
221 offsets32.push(v);
222 }
223
224 let mut large_offsets = Vec::with_capacity(large_count);
226 for i in 0..large_count {
227 if *pos + 8 > buf.len() {
228 return Err(Error::CorruptObject(format!(
229 "unable to read 64b offset {i}: truncated"
230 )));
231 }
232 large_offsets.push(read_u64_be(buf, pos)?);
233 }
234
235 let mut next_large = 0usize;
236 let mut entries = Vec::with_capacity(object_count);
237 for (i, oid) in oids.iter().enumerate() {
238 let raw = offsets32[i];
239 let offset = if (raw & 0x8000_0000) == 0 {
240 raw as u64
241 } else {
242 let idx = (raw & 0x7fff_ffff) as usize;
243 if idx != next_large {
244 return Err(Error::CorruptObject(format!(
245 "inconsistent 64b offset index at entry {i}"
246 )));
247 }
248 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
249 Error::CorruptObject(format!("missing large offset entry {next_large}"))
250 })?;
251 next_large += 1;
252 off
253 };
254 entries.push(ShowIndexEntry {
255 oid: oid.clone(),
256 offset,
257 crc32: Some(crcs[i]),
258 });
259 }
260 Ok(entries)
261}
262
263#[derive(Debug, Clone, Default)]
265pub struct LocalPackInfo {
266 pub pack_count: usize,
268 pub object_count: usize,
270 pub size_bytes: u64,
272 pub object_ids: HashSet<ObjectId>,
274}
275
276pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
283 let pack_dir = objects_dir.join("pack");
284 let rd = match fs::read_dir(&pack_dir) {
285 Ok(rd) => rd,
286 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
287 Err(err) => return Err(Error::Io(err)),
288 };
289
290 let mut out = Vec::new();
291 for entry in rd {
292 let entry = entry.map_err(Error::Io)?;
293 let path = entry.path();
294 if path.extension().and_then(|s| s.to_str()) != Some("idx") {
295 continue;
296 }
297 if let Ok(idx) = read_pack_index(&path) {
298 if !idx.pack_path.is_file() {
301 continue;
302 }
303 out.push(idx);
304 }
305 }
306 Ok(out)
307}
308
309mod pack_cache {
323 use super::{read_pack_index_no_verify, Error, ObjectKind, PackIndex, Result};
324 use std::collections::{HashMap, VecDeque};
325 use std::fs;
326 use std::io;
327 use std::path::{Path, PathBuf};
328 use std::sync::{Arc, Mutex, OnceLock};
329 use std::time::SystemTime;
330
331 struct CachedDir {
332 dir_mtime: SystemTime,
333 indexes: Vec<Arc<PackIndex>>,
334 }
335
336 struct CachedIdx {
337 mtime: SystemTime,
338 size: u64,
339 idx: Arc<PackIndex>,
340 }
341
342 struct CachedPack {
343 mtime: SystemTime,
344 size: u64,
345 bytes: Arc<Vec<u8>>,
346 }
347
348 const DELTA_BASE_CACHE_LIMIT: usize = 96 * 1024 * 1024;
351
352 #[derive(Default)]
353 struct State {
354 by_dir: HashMap<PathBuf, CachedDir>,
355 by_idx: HashMap<PathBuf, CachedIdx>,
356 by_pack: HashMap<PathBuf, CachedPack>,
357 delta_bases: HashMap<PathBuf, HashMap<u64, (ObjectKind, Arc<Vec<u8>>)>>,
362 delta_order: VecDeque<(PathBuf, u64)>,
364 delta_bytes: usize,
365 }
366
367 static CACHE: OnceLock<Mutex<State>> = OnceLock::new();
368
369 fn lock() -> std::sync::MutexGuard<'static, State> {
370 CACHE
371 .get_or_init(|| Mutex::new(State::default()))
372 .lock()
373 .unwrap_or_else(|p| p.into_inner())
374 }
375
376 fn dir_mtime(path: &Path) -> SystemTime {
377 fs::metadata(path)
378 .and_then(|m| m.modified())
379 .unwrap_or(SystemTime::UNIX_EPOCH)
380 }
381
382 fn file_signature(path: &Path) -> Option<(SystemTime, u64)> {
383 let m = fs::metadata(path).ok()?;
384 let mtime = m.modified().unwrap_or(SystemTime::UNIX_EPOCH);
385 Some((mtime, m.len()))
386 }
387
388 pub fn get_index(idx_path: &Path) -> Result<Arc<PackIndex>> {
391 let sig = file_signature(idx_path);
392 if let Some((mtime, size)) = sig {
393 {
394 let g = lock();
395 if let Some(c) = g.by_idx.get(idx_path) {
396 if c.mtime == mtime && c.size == size {
397 return Ok(Arc::clone(&c.idx));
398 }
399 }
400 }
401 let parsed = Arc::new(read_pack_index_no_verify(idx_path)?);
402 let mut g = lock();
403 g.by_idx.insert(
404 idx_path.to_path_buf(),
405 CachedIdx {
406 mtime,
407 size,
408 idx: Arc::clone(&parsed),
409 },
410 );
411 Ok(parsed)
412 } else {
413 Err(Error::Io(io::Error::new(
414 io::ErrorKind::NotFound,
415 format!("idx not found: {}", idx_path.display()),
416 )))
417 }
418 }
419
420 pub fn get_dir_indexes(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
423 let pack_dir = objects_dir.join("pack");
424 let dir_mt = dir_mtime(&pack_dir);
425
426 {
427 let g = lock();
428 if let Some(c) = g.by_dir.get(&pack_dir) {
429 if c.dir_mtime == dir_mt {
430 return Ok(c.indexes.clone());
431 }
432 }
433 }
434
435 let rd = match fs::read_dir(&pack_dir) {
436 Ok(rd) => rd,
437 Err(err) if err.kind() == io::ErrorKind::NotFound => {
438 let mut g = lock();
439 g.by_dir.insert(
440 pack_dir.clone(),
441 CachedDir {
442 dir_mtime: dir_mt,
443 indexes: Vec::new(),
444 },
445 );
446 return Ok(Vec::new());
447 }
448 Err(err) => return Err(Error::Io(err)),
449 };
450
451 let mut out = Vec::new();
452 for entry in rd {
453 let entry = entry.map_err(Error::Io)?;
454 let path = entry.path();
455 if path.extension().and_then(|s| s.to_str()) != Some("idx") {
456 continue;
457 }
458 let Ok(idx) = get_index(&path) else { continue };
459 if !idx.pack_path.is_file() {
460 continue;
461 }
462 out.push(idx);
463 }
464
465 let mut g = lock();
466 g.by_dir.insert(
467 pack_dir,
468 CachedDir {
469 dir_mtime: dir_mt,
470 indexes: out.clone(),
471 },
472 );
473 Ok(out)
474 }
475
476 pub fn get_pack_bytes(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
479 let sig = file_signature(pack_path);
480 if let Some((mtime, size)) = sig {
481 {
482 let g = lock();
483 if let Some(c) = g.by_pack.get(pack_path) {
484 if c.mtime == mtime && c.size == size {
485 return Ok(Arc::clone(&c.bytes));
486 }
487 }
488 }
489 let bytes = Arc::new(fs::read(pack_path).map_err(Error::Io)?);
490 let mut g = lock();
491 drop_delta_entries_locked(&mut g, pack_path);
494 g.by_pack.insert(
495 pack_path.to_path_buf(),
496 CachedPack {
497 mtime,
498 size,
499 bytes: Arc::clone(&bytes),
500 },
501 );
502 Ok(bytes)
503 } else {
504 Err(Error::Io(io::Error::new(
505 io::ErrorKind::NotFound,
506 format!("pack not found: {}", pack_path.display()),
507 )))
508 }
509 }
510
511 pub fn clear() {
514 let mut g = lock();
515 g.by_dir.clear();
516 g.by_idx.clear();
517 g.by_pack.clear();
518 g.delta_bases.clear();
519 g.delta_order.clear();
520 g.delta_bytes = 0;
521 }
522
523 fn drop_delta_entries_locked(g: &mut State, pack_path: &Path) {
525 if let Some(per) = g.delta_bases.remove(pack_path) {
526 let removed: usize = per.values().map(|(_, d)| d.len()).sum();
527 g.delta_bytes = g.delta_bytes.saturating_sub(removed);
528 g.delta_order.retain(|(p, _)| p != pack_path);
529 }
530 }
531
532 pub fn get_delta_base(pack_path: &Path, offset: u64) -> Option<(ObjectKind, Arc<Vec<u8>>)> {
534 let g = lock();
535 let (kind, data) = g.delta_bases.get(pack_path)?.get(&offset)?;
536 Some((*kind, Arc::clone(data)))
537 }
538
539 pub fn put_delta_base(pack_path: &Path, offset: u64, kind: ObjectKind, data: Arc<Vec<u8>>) {
541 let sz = data.len();
542 if sz > DELTA_BASE_CACHE_LIMIT {
543 return;
544 }
545 let mut g = lock();
546 while g.delta_bytes.saturating_add(sz) > DELTA_BASE_CACHE_LIMIT {
547 let Some((p, off)) = g.delta_order.pop_front() else {
548 break;
549 };
550 let mut removed = 0;
551 let mut now_empty = false;
552 if let Some(per) = g.delta_bases.get_mut(&p) {
553 if let Some((_, old)) = per.remove(&off) {
554 removed = old.len();
555 }
556 now_empty = per.is_empty();
557 }
558 if now_empty {
559 g.delta_bases.remove(&p);
560 }
561 g.delta_bytes = g.delta_bytes.saturating_sub(removed);
562 }
563 let prev = g
564 .delta_bases
565 .entry(pack_path.to_path_buf())
566 .or_default()
567 .insert(offset, (kind, data));
568 match prev {
569 Some((_, old)) => {
570 g.delta_bytes = g.delta_bytes.saturating_sub(old.len()).saturating_add(sz);
571 }
572 None => {
573 g.delta_order.push_back((pack_path.to_path_buf(), offset));
574 g.delta_bytes = g.delta_bytes.saturating_add(sz);
575 }
576 }
577 }
578
579 pub fn refresh_pack_signature(pack_path: &Path) {
585 if let Some((mtime, size)) = file_signature(pack_path) {
586 let mut g = lock();
587 if let Some(c) = g.by_pack.get_mut(pack_path) {
588 if c.size == size {
589 c.mtime = mtime;
590 }
591 }
592 }
593 }
594}
595
596pub fn read_local_pack_indexes_cached(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
607 pack_cache::get_dir_indexes(objects_dir)
608}
609
610pub fn read_pack_index_cached(idx_path: &Path) -> Result<Arc<PackIndex>> {
618 pack_cache::get_index(idx_path)
619}
620
621pub fn read_pack_bytes_cached(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
627 pack_cache::get_pack_bytes(pack_path)
628}
629
630pub fn clear_pack_cache() {
632 pack_cache::clear();
633}
634
635pub fn refresh_pack_bytes_signature(pack_path: &Path) {
638 pack_cache::refresh_pack_signature(pack_path);
639}
640
641pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
647 let indexes = read_local_pack_indexes(objects_dir)?;
648 let mut info = LocalPackInfo::default();
649 for idx in indexes {
650 let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
651 let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
652 info.pack_count += 1;
653 info.object_count += idx.entries.len();
654 info.size_bytes += pack_meta.len() + idx_meta.len();
655 for entry in idx.entries {
656 if entry.oid.len() == 20 {
657 if let Ok(oid) = ObjectId::from_bytes(&entry.oid) {
658 info.object_ids.insert(oid);
659 }
660 }
661 }
662 }
663 Ok(info)
664}
665
666fn verify_idx_trailing_checksum(idx_path: &Path, bytes: &[u8], hash_bytes: usize) -> Result<()> {
667 if bytes.len() < hash_bytes {
668 return Err(Error::CorruptObject(format!(
669 "index file {} missing checksum",
670 idx_path.display()
671 )));
672 }
673 let idx_body_end = bytes.len() - hash_bytes;
674 let digest: Vec<u8> = if hash_bytes == 32 {
675 let mut h = Sha256::new();
676 Sha256Digest::update(&mut h, &bytes[..idx_body_end]);
677 h.finalize().to_vec()
678 } else {
679 let mut h = Sha1::new();
680 Digest::update(&mut h, &bytes[..idx_body_end]);
681 h.finalize().to_vec()
682 };
683 if digest.as_slice() != &bytes[idx_body_end..] {
684 return Err(Error::CorruptObject(format!(
685 "index checksum mismatch for {}",
686 idx_path.display()
687 )));
688 }
689 Ok(())
690}
691
692fn check_fanout_monotonic(fanout: &[u32; 256], idx_path: &Path) -> Result<()> {
704 let mut prev = 0u32;
705 for &n in fanout {
706 if n < prev {
707 return Err(Error::CorruptObject(format!(
708 "non-monotonic index {}",
709 idx_path.display()
710 )));
711 }
712 prev = n;
713 }
714 Ok(())
715}
716
717fn read_pack_index_v1(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
718 let mut pos = 0usize;
719 if bytes.len() < 256 * 4 + 20 {
720 return Err(Error::CorruptObject(format!(
721 "index file {} is too small",
722 idx_path.display()
723 )));
724 }
725 let mut fanout = [0u32; 256];
726 for slot in &mut fanout {
727 *slot = read_u32_be(bytes, &mut pos)?;
728 }
729 check_fanout_monotonic(&fanout, idx_path)?;
730 let object_count = fanout[255] as usize;
731 let need = pos
732 .saturating_add(object_count.saturating_mul(24))
733 .saturating_add(20);
734 if bytes.len() < need {
735 return Err(Error::CorruptObject(format!(
736 "truncated idx file {}",
737 idx_path.display()
738 )));
739 }
740
741 let mut entries: Vec<PackIndexEntry> = Vec::with_capacity(object_count);
742 for i in 0..object_count {
743 let offset = read_u32_be(bytes, &mut pos)? as u64;
744 let oid = bytes[pos..pos + 20].to_vec();
745 pos += 20;
746 if i > 0 && entries[i - 1].oid.cmp(&oid) != std::cmp::Ordering::Less {
747 return Err(Error::CorruptObject(format!(
748 "oid lookup out of order in {}",
749 idx_path.display()
750 )));
751 }
752 entries.push(PackIndexEntry { oid, offset });
753 }
754
755 if verify {
756 verify_idx_trailing_checksum(idx_path, bytes, 20)?;
758 }
759
760 let mut pack_path = idx_path.to_path_buf();
761 pack_path.set_extension("pack");
762
763 let fanout = compute_fanout_from_entries(&entries);
764 Ok(PackIndex {
765 idx_path: idx_path.to_path_buf(),
766 pack_path,
767 hash_bytes: 20,
768 entries,
769 fanout,
770 })
771}
772
773fn compute_fanout_from_entries(entries: &[PackIndexEntry]) -> [u32; 256] {
776 let mut fanout = [0u32; 256];
777 let mut idx = 0usize;
778 for byte in 0u32..256 {
779 let needle = byte as u8;
780 while idx < entries.len() && entries[idx].oid.first().copied().unwrap_or(0) <= needle {
781 idx += 1;
782 }
783 fanout[byte as usize] = u32::try_from(idx).unwrap_or(u32::MAX);
784 }
785 fanout
786}
787
788fn read_pack_index_v2(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
789 if bytes.len() < 8 + 256 * 4 + 40 {
790 return Err(Error::CorruptObject(format!(
791 "index file {} is too small",
792 idx_path.display()
793 )));
794 }
795
796 let mut pos = 0usize;
797 pos += 4;
798 let version = read_u32_be(bytes, &mut pos)?;
799 if version != 2 {
800 return Err(Error::CorruptObject(format!(
801 "unsupported idx version {} in {}",
802 version,
803 idx_path.display()
804 )));
805 }
806
807 let mut fanout = [0u32; 256];
808 for slot in &mut fanout {
809 *slot = read_u32_be(bytes, &mut pos)?;
810 }
811 check_fanout_monotonic(&fanout, idx_path)?;
812 let object_count = fanout[255] as usize;
813
814 let idx_file_len = bytes.len();
815 let hash_bytes = detect_idx_hash_bytes_v2(idx_file_len, pos, object_count, idx_path)?;
816
817 let need = pos
818 .saturating_add(object_count * hash_bytes)
819 .saturating_add(object_count * 4)
820 .saturating_add(object_count * 4)
821 .saturating_add(40);
822 if bytes.len() < need {
823 return Err(Error::CorruptObject(format!(
824 "truncated idx file {}",
825 idx_path.display()
826 )));
827 }
828
829 let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
830 for _ in 0..object_count {
831 let slice = &bytes[pos..pos + hash_bytes];
832 pos += hash_bytes;
833 oids.push(slice.to_vec());
834 }
835
836 pos += object_count * 4;
837
838 let mut offsets32 = Vec::with_capacity(object_count);
839 let mut large_count = 0usize;
840 for _ in 0..object_count {
841 let v = read_u32_be(bytes, &mut pos)?;
842 if (v & 0x8000_0000) != 0 {
843 large_count += 1;
844 }
845 offsets32.push(v);
846 }
847
848 if bytes.len() < pos + large_count * 8 + 40 {
849 return Err(Error::CorruptObject(format!(
850 "truncated large offset table in {}",
851 idx_path.display()
852 )));
853 }
854 let mut large_offsets = Vec::with_capacity(large_count);
855 for _ in 0..large_count {
856 large_offsets.push(read_u64_be(bytes, &mut pos)?);
857 }
858
859 let mut next_large = 0usize;
860 let mut entries = Vec::with_capacity(object_count);
861 for (i, oid) in oids.into_iter().enumerate() {
862 let raw = offsets32[i];
863 let offset = if (raw & 0x8000_0000) == 0 {
864 raw as u64
865 } else {
866 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
867 Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
868 })?;
869 next_large += 1;
870 off
871 };
872 entries.push(PackIndexEntry { oid, offset });
873 }
874
875 let mut pack_path = idx_path.to_path_buf();
876 pack_path.set_extension("pack");
877
878 if verify {
879 verify_idx_trailing_checksum(idx_path, bytes, hash_bytes)?;
880 }
881
882 Ok(PackIndex {
883 idx_path: idx_path.to_path_buf(),
884 pack_path,
885 hash_bytes,
886 entries,
887 fanout,
888 })
889}
890
891fn detect_idx_hash_bytes_v2(
896 idx_file_len: usize,
897 fanout_end: usize,
898 object_count: usize,
899 idx_path: &Path,
900) -> Result<usize> {
901 if object_count == 0 {
902 return Ok(20);
903 }
904
905 for &hb in &[20usize, 32] {
912 let fixed = fanout_end
913 .saturating_add(object_count.saturating_mul(hb + 4 + 4))
914 .saturating_add(2 * hb);
915 if idx_file_len < fixed {
916 continue;
917 }
918 let extra = idx_file_len - fixed;
919 if extra % 8 != 0 {
920 continue;
921 }
922 if extra / 8 > object_count {
923 continue;
924 }
925 return Ok(hb);
926 }
927
928 Err(Error::CorruptObject(format!(
929 "wrong index v2 file size in {}",
930 idx_path.display()
931 )))
932}
933
934#[must_use]
935pub fn oid_bytes_to_hex(oid: &[u8]) -> String {
936 hex::encode(oid)
937}
938
939#[must_use]
941pub fn pack_index_entry_matches_sha1_oid(entry: &PackIndexEntry, oid: &ObjectId) -> bool {
942 entry.oid.len() == 20 && entry.oid.as_slice() == oid.as_bytes()
943}
944
945pub fn hash_object_bytes(kind: ObjectKind, data: &[u8], hash_bytes: usize) -> Result<Vec<u8>> {
947 let header = format!("{} {}\0", kind, data.len());
948 match hash_bytes {
949 20 => {
950 let mut hasher = Sha1::new();
951 hasher.update(header.as_bytes());
952 hasher.update(data);
953 Ok(hasher.finalize().to_vec())
954 }
955 32 => {
956 use sha2::Digest as _;
957 let mut hasher = Sha256::new();
958 hasher.update(header.as_bytes());
959 hasher.update(data);
960 Ok(hasher.finalize().to_vec())
961 }
962 other => Err(Error::CorruptObject(format!(
963 "unsupported object hash width: {other}"
964 ))),
965 }
966}
967
968pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
979 let bytes = fs::read(idx_path).map_err(Error::Io)?;
980 parse_pack_index_bytes(idx_path, &bytes, true)
981}
982
983pub fn read_pack_index_no_verify(idx_path: &Path) -> Result<PackIndex> {
989 let bytes = fs::read(idx_path).map_err(Error::Io)?;
990 parse_pack_index_bytes(idx_path, &bytes, false)
991}
992
993fn parse_pack_index_bytes(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
994 if bytes.len() < 8 {
995 return Err(Error::CorruptObject(format!(
996 "index file {} is too small",
997 idx_path.display()
998 )));
999 }
1000 let magic = &bytes[0..4];
1001 if magic == [0xff, b't', b'O', b'c'] {
1002 read_pack_index_v2(idx_path, bytes, verify)
1003 } else {
1004 read_pack_index_v1(idx_path, bytes, verify)
1005 }
1006}
1007
1008#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1010pub enum PackedType {
1011 Commit,
1013 Tree,
1015 Blob,
1017 Tag,
1019 OfsDelta,
1021 RefDelta,
1023}
1024
1025impl PackedType {
1026 #[must_use]
1028 pub fn as_str(self) -> &'static str {
1029 match self {
1030 Self::Commit => "commit",
1031 Self::Tree => "tree",
1032 Self::Blob => "blob",
1033 Self::Tag => "tag",
1034 Self::OfsDelta => "ofs-delta",
1035 Self::RefDelta => "ref-delta",
1036 }
1037 }
1038}
1039
1040#[derive(Debug, Clone)]
1042pub struct VerifyObjectRecord {
1043 pub oid: Vec<u8>,
1045 pub packed_type: PackedType,
1047 pub size: u64,
1049 pub size_in_pack: u64,
1051 pub offset: u64,
1053 pub depth: Option<u64>,
1055 pub base_oid: Option<Vec<u8>>,
1057}
1058
1059enum DeltaBaseLink {
1061 Oid(Vec<u8>),
1063 Offset(u64),
1065}
1066
1067fn resolve_delta_depth(
1078 i: usize,
1079 base_links: &[Option<DeltaBaseLink>],
1080 by_oid: &HashMap<Vec<u8>, usize>,
1081 by_offset_idx: &HashMap<u64, usize>,
1082 records: &mut [VerifyObjectRecord],
1083) -> Result<u64> {
1084 if let Some(d) = records[i].depth {
1085 return Ok(d);
1086 }
1087 let Some(link) = &base_links[i] else {
1088 return Ok(0);
1089 };
1090 let base_idx = match link {
1091 DeltaBaseLink::Oid(oid) => by_oid.get(oid).copied(),
1092 DeltaBaseLink::Offset(off) => by_offset_idx.get(off).copied(),
1093 };
1094 records[i].depth = Some(1);
1096 let depth = match base_idx {
1097 Some(b) if b != i => {
1098 resolve_delta_depth(b, base_links, by_oid, by_offset_idx, records)?.saturating_add(1)
1099 }
1100 _ => 1,
1102 };
1103 records[i].depth = Some(depth);
1104 Ok(depth)
1105}
1106
1107pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
1113 let idx = read_pack_index(idx_path)?;
1114 let idx_file_bytes = fs::read(idx_path).map_err(Error::Io)?;
1115 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1116 let hb = idx.hash_bytes;
1117 if pack_bytes.len() < 12 + hb {
1118 return Err(Error::CorruptObject(format!(
1119 "pack file {} is too small",
1120 idx.pack_path.display()
1121 )));
1122 }
1123 let pack_end = pack_bytes.len() - hb;
1124 match hb {
1125 20 => {
1126 let mut h = Sha1::new();
1127 h.update(&pack_bytes[..pack_end]);
1128 let digest = h.finalize();
1129 if digest.as_slice() != &pack_bytes[pack_end..] {
1130 return Err(Error::CorruptObject(format!(
1131 "pack trailing checksum mismatch for {}",
1132 idx.pack_path.display()
1133 )));
1134 }
1135 }
1136 32 => {
1137 use sha2::Digest as _;
1138 let mut h = Sha256::new();
1139 h.update(&pack_bytes[..pack_end]);
1140 let digest = h.finalize();
1141 if digest.as_slice() != &pack_bytes[pack_end..] {
1142 return Err(Error::CorruptObject(format!(
1143 "pack trailing checksum mismatch for {}",
1144 idx.pack_path.display()
1145 )));
1146 }
1147 }
1148 _ => {
1149 return Err(Error::CorruptObject(format!(
1150 "unsupported OID width {} for pack {}",
1151 hb,
1152 idx.pack_path.display()
1153 )));
1154 }
1155 }
1156 if idx_file_bytes.len() >= 2 * hb {
1159 let n = idx_file_bytes.len();
1160 let embedded = &idx_file_bytes[n - 2 * hb..n - hb];
1161 if embedded != &pack_bytes[pack_end..] {
1162 return Err(Error::CorruptObject(format!(
1163 "pack checksum in index does not match {}",
1164 idx.pack_path.display()
1165 )));
1166 }
1167 }
1168 if &pack_bytes[0..4] != b"PACK" {
1169 return Err(Error::CorruptObject(format!(
1170 "pack file {} has invalid signature",
1171 idx.pack_path.display()
1172 )));
1173 }
1174 let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
1175 if version != 2 && version != 3 {
1176 return Err(Error::CorruptObject(format!(
1177 "unsupported pack version {} in {}",
1178 version,
1179 idx.pack_path.display()
1180 )));
1181 }
1182 let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
1183 if count != idx.entries.len() {
1184 return Err(Error::CorruptObject(format!(
1185 "pack/index object count mismatch for {}",
1186 idx.pack_path.display()
1187 )));
1188 }
1189
1190 let mut by_offset: BTreeMap<u64, Vec<u8>> = BTreeMap::new();
1191 for entry in &idx.entries {
1192 by_offset.insert(entry.offset, entry.oid.clone());
1193 }
1194 let offsets: Vec<u64> = by_offset.keys().copied().collect();
1195 if offsets.is_empty() {
1196 return Ok(Vec::new());
1197 }
1198
1199 let mut by_oid: HashMap<Vec<u8>, usize> = HashMap::new();
1200 let mut by_offset_idx: HashMap<u64, usize> = HashMap::new();
1201 let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
1202 let mut base_links: Vec<Option<DeltaBaseLink>> = Vec::with_capacity(offsets.len());
1206 for (i, offset) in offsets.iter().copied().enumerate() {
1207 let oid = by_offset.get(&offset).cloned().ok_or_else(|| {
1208 Error::CorruptObject(format!("missing object id for offset {}", offset))
1209 })?;
1210 let next_off = offsets
1211 .get(i + 1)
1212 .copied()
1213 .unwrap_or((pack_bytes.len() - hb) as u64);
1214 if next_off <= offset || next_off > (pack_bytes.len() - hb) as u64 {
1215 return Err(Error::CorruptObject(format!(
1216 "invalid object boundaries at offset {} in {}",
1217 offset,
1218 idx.pack_path.display()
1219 )));
1220 }
1221 let mut p = offset as usize;
1222 let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
1223 let mut base_oid: Option<Vec<u8>> = None;
1224 let mut base_link: Option<DeltaBaseLink> = None;
1225
1226 match packed_type {
1227 PackedType::RefDelta => {
1228 if p + hb > pack_bytes.len() {
1229 return Err(Error::CorruptObject(format!(
1230 "truncated ref-delta base at offset {}",
1231 offset
1232 )));
1233 }
1234 let raw = pack_bytes[p..p + hb].to_vec();
1235 base_oid = Some(raw.clone());
1236 base_link = Some(DeltaBaseLink::Oid(raw));
1237 }
1238 PackedType::OfsDelta => {
1239 let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
1240 base_link = Some(DeltaBaseLink::Offset(base_offset));
1241 }
1242 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
1243 }
1244
1245 let size_in_pack = next_off - offset;
1246 records.push(VerifyObjectRecord {
1247 oid: oid.clone(),
1248 packed_type,
1249 size,
1250 size_in_pack,
1251 offset,
1252 depth: None,
1253 base_oid,
1254 });
1255 base_links.push(base_link);
1256 by_oid.insert(oid, i);
1257 by_offset_idx.insert(offset, i);
1258 }
1259
1260 for i in 0..records.len() {
1264 if base_links[i].is_some() {
1265 let _ = resolve_delta_depth(i, &base_links, &by_oid, &by_offset_idx, &mut records)?;
1266 }
1267 }
1268
1269 for entry in &idx.entries {
1270 let obj = read_object_from_pack_bytes(&pack_bytes, &idx, &entry.oid)?;
1271 let computed = hash_object_bytes(obj.kind, &obj.data, hb)?;
1272 if computed.as_slice() != entry.oid.as_slice() {
1273 return Err(Error::CorruptObject(format!(
1274 "pack object hash mismatch at offset {} (index says {})",
1275 entry.offset,
1276 oid_bytes_to_hex(&entry.oid)
1277 )));
1278 }
1279 }
1280
1281 Ok(records)
1282}
1283
1284pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
1290 let mut visited = HashSet::new();
1291 let mut out = Vec::new();
1292 read_alternates_inner(objects_dir, &mut visited, &mut out, 0)?;
1293 Ok(out)
1294}
1295
1296const MAX_ALTERNATE_DEPTH: usize = 5;
1298
1299fn read_alternates_inner(
1300 objects_dir: &Path,
1301 visited: &mut HashSet<PathBuf>,
1302 out: &mut Vec<PathBuf>,
1303 depth: usize,
1304) -> Result<()> {
1305 if depth > MAX_ALTERNATE_DEPTH {
1306 return Ok(());
1307 }
1308 let canonical = canonical_or_self(objects_dir);
1309 let alt_file = canonical.join("info").join("alternates");
1310 let text = match fs::read_to_string(&alt_file) {
1311 Ok(text) => text,
1312 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
1313 Err(err) => return Err(Error::Io(err)),
1314 };
1315
1316 for raw in text.lines() {
1317 let line = raw.trim();
1318 if line.is_empty() {
1319 continue;
1320 }
1321 let candidate = if Path::new(line).is_absolute() {
1322 PathBuf::from(line)
1323 } else {
1324 canonical.join(line)
1325 };
1326 let candidate = canonical_or_self(&candidate);
1327 if visited.insert(candidate.clone()) {
1328 out.push(candidate.clone());
1329 read_alternates_inner(&candidate, visited, out, depth + 1)?;
1330 }
1331 }
1332 Ok(())
1333}
1334
1335fn canonical_or_self(path: &Path) -> PathBuf {
1336 fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
1337}
1338
1339fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
1341 match pt {
1342 PackedType::Commit => Ok(ObjectKind::Commit),
1343 PackedType::Tree => Ok(ObjectKind::Tree),
1344 PackedType::Blob => Ok(ObjectKind::Blob),
1345 PackedType::Tag => Ok(ObjectKind::Tag),
1346 PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
1347 "cannot convert delta type to object kind directly".to_owned(),
1348 )),
1349 }
1350}
1351
1352fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
1357 let slice = &bytes[*pos..];
1358 let mut decoder = ZlibDecoder::new(slice);
1359 let mut out = Vec::with_capacity(expected_size as usize);
1360 decoder
1361 .read_to_end(&mut out)
1362 .map_err(|e| Error::Zlib(e.to_string()))?;
1363 *pos += decoder.total_in() as usize;
1364 if out.len() as u64 != expected_size {
1365 return Err(Error::CorruptObject(format!(
1366 "pack object size mismatch: expected {expected_size}, got {}",
1367 out.len()
1368 )));
1369 }
1370 Ok(out)
1371}
1372
1373fn read_pack_base_cached(
1383 pack_bytes: &[u8],
1384 base_offset: u64,
1385 idx: &PackIndex,
1386 objects_dir: Option<&Path>,
1387 depth: usize,
1388) -> Result<(ObjectKind, Arc<Vec<u8>>)> {
1389 if let Some(hit) = pack_cache::get_delta_base(&idx.pack_path, base_offset) {
1390 return Ok(hit);
1391 }
1392 let (kind, data) = read_pack_object_at(pack_bytes, base_offset, idx, objects_dir, depth + 1)?;
1393 let data = Arc::new(data);
1394 pack_cache::put_delta_base(&idx.pack_path, base_offset, kind, Arc::clone(&data));
1395 Ok((kind, data))
1396}
1397
1398fn read_pack_object_at(
1399 pack_bytes: &[u8],
1400 offset: u64,
1401 idx: &PackIndex,
1402 objects_dir: Option<&Path>,
1403 depth: usize,
1404) -> Result<(ObjectKind, Vec<u8>)> {
1405 if depth > 50 {
1406 return Err(Error::CorruptObject(
1407 "delta chain too deep (>50)".to_owned(),
1408 ));
1409 }
1410 let mut pos = offset as usize;
1411 let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
1412
1413 match packed_type {
1414 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
1415 let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1416 let kind = packed_type_to_kind(packed_type)?;
1417 Ok((kind, data))
1418 }
1419 PackedType::OfsDelta => {
1420 let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
1421 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1422 let in_pack = read_pack_base_cached(pack_bytes, base_offset, idx, objects_dir, depth);
1427 match in_pack {
1428 Ok((base_kind, base_data)) => {
1429 let result = apply_delta(&base_data, &delta_data)?;
1430 Ok((base_kind, result))
1431 }
1432 Err(err) => {
1433 if let Some(dir) = objects_dir {
1434 if let Some(base_entry) =
1436 idx.entries.iter().find(|e| e.offset == base_offset)
1437 {
1438 if base_entry.oid.len() == 20 {
1439 if let Ok(base_oid) =
1440 ObjectId::from_bytes(base_entry.oid.as_slice())
1441 {
1442 let loose = dir
1443 .join(base_oid.loose_prefix())
1444 .join(base_oid.loose_suffix());
1445 if loose.is_file() {
1446 if let Ok(obj) = crate::odb::Odb::read_loose_verify_oid(
1447 &loose, &base_oid,
1448 ) {
1449 let result = apply_delta(&obj.data, &delta_data)?;
1450 return Ok((obj.kind, result));
1451 }
1452 }
1453 if let Ok(obj) =
1454 read_object_from_other_pack(dir, idx, &base_oid, depth + 1)
1455 {
1456 let result = apply_delta(&obj.data, &delta_data)?;
1457 return Ok((obj.kind, result));
1458 }
1459 }
1460 }
1461 }
1462 }
1463 Err(err)
1464 }
1465 }
1466 }
1467 PackedType::RefDelta => {
1468 let hb = idx.hash_bytes;
1469 if pos + hb > pack_bytes.len() {
1470 return Err(Error::CorruptObject(
1471 "truncated ref-delta base OID".to_owned(),
1472 ));
1473 }
1474 let base_raw = pack_bytes[pos..pos + hb].to_vec();
1475 pos += hb;
1476 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1477 let in_pack_offset = idx
1480 .entries
1481 .binary_search_by(|e| e.oid.as_slice().cmp(base_raw.as_slice()))
1482 .ok()
1483 .map(|i| idx.entries[i].offset);
1484 let mut in_pack_err = None;
1485 if let Some(base_offset) = in_pack_offset {
1486 match read_pack_base_cached(pack_bytes, base_offset, idx, objects_dir, depth) {
1487 Ok((base_kind, base_data)) => {
1488 let result = apply_delta(&base_data, &delta_data)?;
1489 return Ok((base_kind, result));
1490 }
1491 Err(err) => in_pack_err = Some(err),
1492 }
1493 }
1494 if hb == 20 {
1495 if let (Some(dir), Ok(base_oid)) =
1496 (objects_dir, ObjectId::from_bytes(base_raw.as_slice()))
1497 {
1498 let loose = dir
1499 .join(base_oid.loose_prefix())
1500 .join(base_oid.loose_suffix());
1501 if loose.is_file() {
1502 if let Ok(obj) = crate::odb::Odb::read_loose_verify_oid(&loose, &base_oid) {
1503 let result = apply_delta(&obj.data, &delta_data)?;
1504 return Ok((obj.kind, result));
1505 }
1506 }
1507 if let Ok(obj) = read_object_from_other_pack(dir, idx, &base_oid, depth + 1) {
1508 let result = apply_delta(&obj.data, &delta_data)?;
1509 return Ok((obj.kind, result));
1510 }
1511 }
1512 }
1513 if let Some(err) = in_pack_err {
1514 return Err(err);
1515 }
1516 if idx.entries.len() > 100 {
1521 return Ok((ObjectKind::Blob, delta_data));
1522 }
1523 Err(Error::CorruptObject(format!(
1524 "ref-delta base {} not found in pack",
1525 oid_bytes_to_hex(&base_raw)
1526 )))
1527 }
1528 }
1529}
1530
1531fn read_object_from_other_pack(
1532 objects_dir: &Path,
1533 current_idx: &PackIndex,
1534 oid: &ObjectId,
1535 depth: usize,
1536) -> Result<Object> {
1537 for idx in read_local_pack_indexes_cached(objects_dir)? {
1538 if idx.idx_path == current_idx.idx_path {
1539 continue;
1540 }
1541 if idx.contains(oid) {
1542 return read_object_from_pack_at_depth(&idx, oid, depth);
1545 }
1546 }
1547 Err(Error::ObjectNotFound(oid.to_hex()))
1548}
1549
1550pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
1559 read_object_from_pack_at_depth(idx, oid, 0)
1560}
1561
1562fn read_object_from_pack_at_depth(idx: &PackIndex, oid: &ObjectId, depth: usize) -> Result<Object> {
1565 let Some(offset) = idx.find_offset(oid) else {
1566 return Err(Error::ObjectNotFound(oid.to_hex()));
1567 };
1568
1569 let pack_bytes = read_pack_bytes_cached(&idx.pack_path)?;
1570 validate_pack_index_object_count(&pack_bytes, idx)?;
1571 let objects_dir = idx.pack_path.parent().and_then(Path::parent);
1572 let (kind, data) = read_pack_object_at(&pack_bytes, offset, idx, objects_dir, depth)?;
1573 Ok(Object::new(kind, data))
1574}
1575
1576pub fn read_object_from_pack_bytes(
1578 pack_bytes: &[u8],
1579 idx: &PackIndex,
1580 oid: &[u8],
1581) -> Result<Object> {
1582 validate_pack_index_object_count(pack_bytes, idx)?;
1583 let entry_offset = idx
1584 .entries
1585 .binary_search_by(|e| e.oid.as_slice().cmp(oid))
1586 .ok()
1587 .map(|i| idx.entries[i].offset)
1588 .ok_or_else(|| Error::ObjectNotFound(oid_bytes_to_hex(oid)))?;
1589 let (kind, data) = read_pack_object_at(pack_bytes, entry_offset, idx, None, 0)?;
1590 verify_packed_object_hash(kind, &data, oid)?;
1591 Ok(Object::new(kind, data))
1592}
1593
1594fn validate_pack_index_object_count(pack_bytes: &[u8], idx: &PackIndex) -> Result<()> {
1595 if pack_bytes.len() < 12 || &pack_bytes[0..4] != b"PACK" {
1596 return Err(Error::CorruptObject("bad pack header".to_owned()));
1597 }
1598 let count =
1599 u32::from_be_bytes([pack_bytes[8], pack_bytes[9], pack_bytes[10], pack_bytes[11]]) as usize;
1600 if count != idx.entries.len() {
1601 return Err(Error::CorruptObject(format!(
1602 "pack object count mismatch: pack has {count}, index has {}",
1603 idx.entries.len()
1604 )));
1605 }
1606 Ok(())
1607}
1608
1609fn verify_packed_object_hash(kind: ObjectKind, data: &[u8], expected_oid: &[u8]) -> Result<()> {
1610 if expected_oid.len() != 20 {
1611 return Ok(());
1612 }
1613 let header = format!("{kind} {}\0", data.len());
1614 let mut hasher = Sha1::new();
1615 hasher.update(header.as_bytes());
1616 hasher.update(data);
1617 let actual = hasher.finalize();
1618 if actual.as_slice() != expected_oid {
1619 return Err(Error::CorruptObject(format!(
1620 "packed object {} hashes to {}",
1621 oid_bytes_to_hex(expected_oid),
1622 oid_bytes_to_hex(actual.as_slice())
1623 )));
1624 }
1625 Ok(())
1626}
1627
1628pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
1640 let indexes = read_local_pack_indexes_cached(objects_dir)?;
1641 let mut last_err: Option<Error> = None;
1642 for idx in &indexes {
1643 if idx.find_offset(oid).is_none() {
1644 continue;
1645 }
1646 match read_object_from_pack(idx, oid) {
1647 Ok(obj) => return Ok(obj),
1648 Err(Error::ObjectNotFound(_)) => {}
1651 Err(err) => last_err = Some(err),
1654 }
1655 }
1656 Err(last_err.unwrap_or_else(|| Error::ObjectNotFound(oid.to_hex())))
1657}
1658
1659pub fn packed_ref_delta_reuse_slice(
1670 objects_dir: &Path,
1671 oid: &ObjectId,
1672 packed_set: &HashSet<ObjectId>,
1673) -> Result<Option<(ObjectId, Vec<u8>)>> {
1674 let mut indexes = read_local_pack_indexes(objects_dir)?;
1675 sort_pack_indexes_oldest_first(&mut indexes);
1676 for idx in indexes {
1677 let Some(entry) = idx
1678 .entries
1679 .iter()
1680 .find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes())
1681 else {
1682 continue;
1683 };
1684 let hb = idx.hash_bytes;
1685 if hb != 20 {
1686 continue;
1687 }
1688 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1689 let mut p = entry.offset as usize;
1690 let (packed_type, _size) = parse_pack_object_header(&pack_bytes, &mut p)?;
1691 let base = match packed_type {
1692 PackedType::RefDelta => {
1693 if p + hb > pack_bytes.len() {
1694 return Err(Error::CorruptObject(
1695 "truncated ref-delta base oid while scanning for reuse".to_owned(),
1696 ));
1697 }
1698 let bo = ObjectId::from_bytes(&pack_bytes[p..p + hb])?;
1699 p += hb;
1700 bo
1701 }
1702 PackedType::OfsDelta => {
1703 let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
1704 let Some(base_entry) = idx.entries.iter().find(|e| e.offset == base_off) else {
1705 continue;
1706 };
1707 if base_entry.oid.len() != 20 {
1708 continue;
1709 }
1710 ObjectId::from_bytes(base_entry.oid.as_slice())?
1711 }
1712 _ => {
1713 continue;
1716 }
1717 };
1718 if !packed_set.contains(&base) {
1719 continue;
1720 }
1721 let zlib_start = p;
1722 let mut end_pos = zlib_start;
1723 if skip_one_pack_object(&pack_bytes, &mut end_pos, entry.offset, hb).is_err() {
1724 continue;
1725 }
1726 let compressed = &pack_bytes[zlib_start..end_pos];
1727 let mut dec = ZlibDecoder::new(compressed);
1728 let mut delta = Vec::new();
1729 if dec.read_to_end(&mut delta).is_err() {
1730 continue;
1731 }
1732 return Ok(Some((base, delta)));
1733 }
1734 Ok(None)
1735}
1736
1737fn sort_pack_indexes_oldest_first(indexes: &mut [PackIndex]) {
1740 indexes.sort_by(|a, b| {
1741 let ta = fs::metadata(&a.pack_path)
1742 .and_then(|m| m.modified())
1743 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1744 let tb = fs::metadata(&b.pack_path)
1745 .and_then(|m| m.modified())
1746 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1747 ta.cmp(&tb).then_with(|| a.pack_path.cmp(&b.pack_path))
1748 });
1749}
1750
1751fn sort_pack_indexes_newest_first(indexes: &mut [PackIndex]) {
1752 indexes.sort_by(|a, b| {
1753 let ta = fs::metadata(&a.pack_path)
1754 .and_then(|m| m.modified())
1755 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1756 let tb = fs::metadata(&b.pack_path)
1757 .and_then(|m| m.modified())
1758 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1759 tb.cmp(&ta).then_with(|| b.pack_path.cmp(&a.pack_path))
1760 });
1761}
1762
1763pub fn packed_delta_base_oid(objects_dir: &Path, oid: &ObjectId) -> Result<Option<ObjectId>> {
1764 let mut indexes = read_local_pack_indexes(objects_dir)?;
1765 sort_pack_indexes_newest_first(&mut indexes);
1766 for idx in &indexes {
1767 if idx.hash_bytes != 20 {
1768 continue;
1769 }
1770 let Some(entry) = idx
1771 .entries
1772 .iter()
1773 .find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes())
1774 else {
1775 continue;
1776 };
1777 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1778 let mut p = entry.offset as usize;
1779 let (packed_type, _) = parse_pack_object_header(&pack_bytes, &mut p)?;
1780 match packed_type {
1781 PackedType::RefDelta => {
1782 let hb = idx.hash_bytes;
1783 if p + hb > pack_bytes.len() {
1784 return Err(Error::CorruptObject("truncated ref-delta base".to_owned()));
1785 }
1786 return Ok(Some(ObjectId::from_bytes(&pack_bytes[p..p + hb])?));
1787 }
1788 PackedType::OfsDelta => {
1789 let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
1790 return Ok(idx
1791 .entries
1792 .iter()
1793 .find(|e| e.offset == base_off)
1794 .and_then(|e| ObjectId::from_bytes(e.oid.as_slice()).ok()));
1795 }
1796 _ => continue,
1797 }
1798 }
1799 Ok(None)
1800}
1801
1802fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
1803 let first = *bytes.get(*pos).ok_or_else(|| {
1804 Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
1805 })?;
1806 *pos += 1;
1807
1808 let type_code = (first >> 4) & 0x7;
1809 let mut size = (first & 0x0f) as u64;
1810 let mut shift = 4u32;
1811 let mut c = first;
1812 while (c & 0x80) != 0 {
1813 c = *bytes.get(*pos).ok_or_else(|| {
1814 Error::CorruptObject("unexpected end of variable size header".to_owned())
1815 })?;
1816 *pos += 1;
1817 size |= ((c & 0x7f) as u64) << shift;
1818 shift += 7;
1819 }
1820
1821 let packed_type = match type_code {
1822 1 => PackedType::Commit,
1823 2 => PackedType::Tree,
1824 3 => PackedType::Blob,
1825 4 => PackedType::Tag,
1826 6 => PackedType::OfsDelta,
1827 7 => PackedType::RefDelta,
1828 _ => {
1829 return Err(Error::CorruptObject(format!(
1830 "unsupported packed object type {}",
1831 type_code
1832 )))
1833 }
1834 };
1835 Ok((packed_type, size))
1836}
1837
1838#[derive(Debug, Clone, Copy)]
1840pub enum PackedDeltaDependency {
1841 OfsBase {
1843 base_offset: u64,
1845 },
1846 RefBase {
1848 base_oid: ObjectId,
1850 },
1851}
1852
1853pub fn read_packed_delta_dependency(
1855 pack_bytes: &[u8],
1856 object_offset: u64,
1857) -> Result<Option<PackedDeltaDependency>> {
1858 let mut pos = object_offset as usize;
1859 let (ty, _) = parse_pack_object_header(pack_bytes, &mut pos)?;
1860 match ty {
1861 PackedType::OfsDelta => {
1862 let base = parse_ofs_delta_base(pack_bytes, &mut pos, object_offset)?;
1863 Ok(Some(PackedDeltaDependency::OfsBase { base_offset: base }))
1864 }
1865 PackedType::RefDelta => {
1866 if pos + 20 > pack_bytes.len() {
1867 return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
1868 }
1869 let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
1870 Ok(Some(PackedDeltaDependency::RefBase { base_oid }))
1871 }
1872 _ => Ok(None),
1873 }
1874}
1875
1876fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
1877 let mut c = *bytes
1878 .get(*pos)
1879 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
1880 *pos += 1;
1881 let mut value = (c & 0x7f) as u64;
1882 while (c & 0x80) != 0 {
1883 c = *bytes
1884 .get(*pos)
1885 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
1886 *pos += 1;
1887 value = ((value + 1) << 7) | (c & 0x7f) as u64;
1888 }
1889 this_offset
1890 .checked_sub(value)
1891 .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
1892}
1893
1894#[must_use]
1902pub fn slice_one_pack_object(
1903 bytes: &[u8],
1904 object_start_offset: u64,
1905 hash_bytes: usize,
1906) -> Result<&[u8]> {
1907 let start = object_start_offset as usize;
1908 let mut pos = start;
1909 skip_one_pack_object(bytes, &mut pos, object_start_offset, hash_bytes)?;
1910 Ok(&bytes[start..pos])
1911}
1912
1913pub fn skip_one_pack_object(
1914 bytes: &[u8],
1915 pos: &mut usize,
1916 object_start_offset: u64,
1917 hash_bytes: usize,
1918) -> Result<()> {
1919 let (packed_type, size) = parse_pack_object_header(bytes, pos)?;
1920 match packed_type {
1921 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
1922 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1923 let mut tmp = Vec::with_capacity(size as usize);
1924 dec.read_to_end(&mut tmp)
1925 .map_err(|e| Error::Zlib(e.to_string()))?;
1926 *pos += dec.total_in() as usize;
1927 }
1928 PackedType::RefDelta => {
1929 if *pos + hash_bytes > bytes.len() {
1930 return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
1931 }
1932 *pos += hash_bytes;
1933 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1934 let mut tmp = Vec::with_capacity(size as usize);
1935 dec.read_to_end(&mut tmp)
1936 .map_err(|e| Error::Zlib(e.to_string()))?;
1937 *pos += dec.total_in() as usize;
1938 }
1939 PackedType::OfsDelta => {
1940 let _base_off = parse_ofs_delta_base(bytes, pos, object_start_offset)?;
1941 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1942 let mut tmp = Vec::with_capacity(size as usize);
1943 dec.read_to_end(&mut tmp)
1944 .map_err(|e| Error::Zlib(e.to_string()))?;
1945 *pos += dec.total_in() as usize;
1946 }
1947 }
1948 Ok(())
1949}
1950
1951fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
1952 if bytes.len() < *pos + 4 {
1953 return Err(Error::CorruptObject(
1954 "unexpected end of idx while reading u32".to_owned(),
1955 ));
1956 }
1957 let v = u32::from_be_bytes(
1958 bytes[*pos..*pos + 4]
1959 .try_into()
1960 .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
1961 );
1962 *pos += 4;
1963 Ok(v)
1964}
1965
1966fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
1967 if bytes.len() < *pos + 8 {
1968 return Err(Error::CorruptObject(
1969 "unexpected end of idx while reading u64".to_owned(),
1970 ));
1971 }
1972 let v = u64::from_be_bytes(
1973 bytes[*pos..*pos + 8]
1974 .try_into()
1975 .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
1976 );
1977 *pos += 8;
1978 Ok(v)
1979}
1980
1981pub fn read_idx_object_ids(idx_path: &Path) -> Result<Vec<ObjectId>> {
1983 let index = read_pack_index(idx_path)?;
1984 let mut out = Vec::new();
1985 for e in index.entries {
1986 if e.oid.len() == 20 {
1987 out.push(ObjectId::from_bytes(&e.oid)?);
1988 }
1989 }
1990 Ok(out)
1991}