1use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::unpack_objects::apply_delta;
9use flate2::read::ZlibDecoder;
10use sha1::{Digest, Sha1};
11use sha2::Sha256;
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::fs;
14use std::io;
15use std::io::Read;
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18
19#[derive(Debug, Clone)]
21pub struct PackIndexEntry {
22 pub oid: Vec<u8>,
24 pub offset: u64,
26}
27
28#[derive(Debug, Clone)]
30pub struct PackIndex {
31 pub idx_path: PathBuf,
33 pub pack_path: PathBuf,
35 pub hash_bytes: usize,
37 pub entries: Vec<PackIndexEntry>,
39 pub fanout: [u32; 256],
43}
44
45impl PackIndex {
46 #[must_use]
52 pub fn find_offset(&self, oid: &ObjectId) -> Option<u64> {
53 if self.hash_bytes != 20 {
54 return None;
55 }
56 let needle = oid.as_bytes();
57 let first_byte = needle[0] as usize;
58 let lo = if first_byte == 0 {
59 0
60 } else {
61 self.fanout[first_byte - 1] as usize
62 };
63 let hi = self.fanout[first_byte] as usize;
64 if lo >= hi || hi > self.entries.len() {
65 return None;
66 }
67 let slice = &self.entries[lo..hi];
68 slice
69 .binary_search_by(|e| e.oid.as_slice().cmp(needle.as_slice()))
70 .ok()
71 .map(|idx| slice[idx].offset)
72 }
73
74 #[must_use]
76 pub fn contains(&self, oid: &ObjectId) -> bool {
77 self.find_offset(oid).is_some()
78 }
79}
80
81#[derive(Debug, Clone)]
86pub struct ShowIndexEntry {
87 pub oid: Vec<u8>,
89 pub offset: u64,
91 pub crc32: Option<u32>,
93}
94
95pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
106 let mut buf = Vec::new();
107 reader.read_to_end(&mut buf).map_err(Error::Io)?;
108
109 if buf.len() < 8 {
110 return Err(Error::CorruptObject(
111 "unable to read header: index file too small".to_owned(),
112 ));
113 }
114
115 let mut pos = 0usize;
116 let first_u32 = read_u32_be(&buf, &mut pos)?;
117
118 const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
119
120 if first_u32 == PACK_IDX_SIGNATURE {
121 let version = read_u32_be(&buf, &mut pos)?;
123 if version != 2 {
124 return Err(Error::CorruptObject(format!(
125 "unknown index version: {version}"
126 )));
127 }
128 show_index_v2(&buf, &mut pos, hash_size)
129 } else {
130 pos = 0;
133 show_index_v1(&buf, &mut pos, hash_size)
134 }
135}
136
137fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
139 if buf.len() < 256 * 4 {
140 return Err(Error::CorruptObject(
141 "unable to read index: v1 fanout too short".to_owned(),
142 ));
143 }
144 let mut fanout = [0u32; 256];
145 for slot in &mut fanout {
146 *slot = read_u32_be(buf, pos)?;
147 }
148 let object_count = fanout[255] as usize;
149
150 let mut entries = Vec::with_capacity(object_count);
151 for i in 0..object_count {
152 if *pos + 4 + hash_size > buf.len() {
154 return Err(Error::CorruptObject(format!(
155 "unable to read entry {i}/{object_count}: truncated"
156 )));
157 }
158 let offset = read_u32_be(buf, pos)? as u64;
159 let oid = buf[*pos..*pos + hash_size].to_vec();
160 *pos += hash_size;
161 entries.push(ShowIndexEntry {
162 oid,
163 offset,
164 crc32: None,
165 });
166 }
167 Ok(entries)
168}
169
170fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
173 if buf.len() < *pos + 256 * 4 {
174 return Err(Error::CorruptObject(
175 "unable to read index: v2 fanout too short".to_owned(),
176 ));
177 }
178 let mut fanout = [0u32; 256];
179 for slot in &mut fanout {
180 *slot = read_u32_be(buf, pos)?;
181 }
182 let object_count = fanout[255] as usize;
183
184 let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
186 for i in 0..object_count {
187 if *pos + hash_size > buf.len() {
188 return Err(Error::CorruptObject(format!(
189 "unable to read oid {i}/{object_count}: truncated"
190 )));
191 }
192 let oid = buf[*pos..*pos + hash_size].to_vec();
193 *pos += hash_size;
194 oids.push(oid);
195 }
196
197 let mut crcs = Vec::with_capacity(object_count);
199 for i in 0..object_count {
200 if *pos + 4 > buf.len() {
201 return Err(Error::CorruptObject(format!(
202 "unable to read crc {i}/{object_count}: truncated"
203 )));
204 }
205 crcs.push(read_u32_be(buf, pos)?);
206 }
207
208 let mut offsets32 = Vec::with_capacity(object_count);
210 let mut large_count = 0usize;
211 for i in 0..object_count {
212 if *pos + 4 > buf.len() {
213 return Err(Error::CorruptObject(format!(
214 "unable to read 32b offset {i}/{object_count}: truncated"
215 )));
216 }
217 let v = read_u32_be(buf, pos)?;
218 if (v & 0x8000_0000) != 0 {
219 large_count += 1;
220 }
221 offsets32.push(v);
222 }
223
224 let mut large_offsets = Vec::with_capacity(large_count);
226 for i in 0..large_count {
227 if *pos + 8 > buf.len() {
228 return Err(Error::CorruptObject(format!(
229 "unable to read 64b offset {i}: truncated"
230 )));
231 }
232 large_offsets.push(read_u64_be(buf, pos)?);
233 }
234
235 let mut next_large = 0usize;
236 let mut entries = Vec::with_capacity(object_count);
237 for (i, oid) in oids.iter().enumerate() {
238 let raw = offsets32[i];
239 let offset = if (raw & 0x8000_0000) == 0 {
240 raw as u64
241 } else {
242 let idx = (raw & 0x7fff_ffff) as usize;
243 if idx != next_large {
244 return Err(Error::CorruptObject(format!(
245 "inconsistent 64b offset index at entry {i}"
246 )));
247 }
248 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
249 Error::CorruptObject(format!("missing large offset entry {next_large}"))
250 })?;
251 next_large += 1;
252 off
253 };
254 entries.push(ShowIndexEntry {
255 oid: oid.clone(),
256 offset,
257 crc32: Some(crcs[i]),
258 });
259 }
260 Ok(entries)
261}
262
263#[derive(Debug, Clone, Default)]
265pub struct LocalPackInfo {
266 pub pack_count: usize,
268 pub object_count: usize,
270 pub size_bytes: u64,
272 pub object_ids: HashSet<ObjectId>,
274}
275
276pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
283 let pack_dir = objects_dir.join("pack");
284 let rd = match fs::read_dir(&pack_dir) {
285 Ok(rd) => rd,
286 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
287 Err(err) => return Err(Error::Io(err)),
288 };
289
290 let mut out = Vec::new();
291 for entry in rd {
292 let entry = entry.map_err(Error::Io)?;
293 let path = entry.path();
294 if path.extension().and_then(|s| s.to_str()) != Some("idx") {
295 continue;
296 }
297 if let Ok(idx) = read_pack_index(&path) {
298 if !idx.pack_path.is_file() {
301 continue;
302 }
303 out.push(idx);
304 }
305 }
306 Ok(out)
307}
308
309mod pack_cache {
323 use super::{read_pack_index_no_verify, Error, PackIndex, Result};
324 use std::collections::HashMap;
325 use std::fs;
326 use std::io;
327 use std::path::{Path, PathBuf};
328 use std::sync::{Arc, Mutex, OnceLock};
329 use std::time::SystemTime;
330
331 struct CachedDir {
332 dir_mtime: SystemTime,
333 indexes: Vec<Arc<PackIndex>>,
334 }
335
336 struct CachedIdx {
337 mtime: SystemTime,
338 size: u64,
339 idx: Arc<PackIndex>,
340 }
341
342 struct CachedPack {
343 mtime: SystemTime,
344 size: u64,
345 bytes: Arc<Vec<u8>>,
346 }
347
348 #[derive(Default)]
349 struct State {
350 by_dir: HashMap<PathBuf, CachedDir>,
351 by_idx: HashMap<PathBuf, CachedIdx>,
352 by_pack: HashMap<PathBuf, CachedPack>,
353 }
354
355 static CACHE: OnceLock<Mutex<State>> = OnceLock::new();
356
357 fn lock() -> std::sync::MutexGuard<'static, State> {
358 CACHE
359 .get_or_init(|| Mutex::new(State::default()))
360 .lock()
361 .unwrap_or_else(|p| p.into_inner())
362 }
363
364 fn dir_mtime(path: &Path) -> SystemTime {
365 fs::metadata(path)
366 .and_then(|m| m.modified())
367 .unwrap_or(SystemTime::UNIX_EPOCH)
368 }
369
370 fn file_signature(path: &Path) -> Option<(SystemTime, u64)> {
371 let m = fs::metadata(path).ok()?;
372 let mtime = m.modified().unwrap_or(SystemTime::UNIX_EPOCH);
373 Some((mtime, m.len()))
374 }
375
376 pub fn get_index(idx_path: &Path) -> Result<Arc<PackIndex>> {
379 let sig = file_signature(idx_path);
380 if let Some((mtime, size)) = sig {
381 {
382 let g = lock();
383 if let Some(c) = g.by_idx.get(idx_path) {
384 if c.mtime == mtime && c.size == size {
385 return Ok(Arc::clone(&c.idx));
386 }
387 }
388 }
389 let parsed = Arc::new(read_pack_index_no_verify(idx_path)?);
390 let mut g = lock();
391 g.by_idx.insert(
392 idx_path.to_path_buf(),
393 CachedIdx {
394 mtime,
395 size,
396 idx: Arc::clone(&parsed),
397 },
398 );
399 Ok(parsed)
400 } else {
401 Err(Error::Io(io::Error::new(
402 io::ErrorKind::NotFound,
403 format!("idx not found: {}", idx_path.display()),
404 )))
405 }
406 }
407
408 pub fn get_dir_indexes(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
411 let pack_dir = objects_dir.join("pack");
412 let dir_mt = dir_mtime(&pack_dir);
413
414 {
415 let g = lock();
416 if let Some(c) = g.by_dir.get(&pack_dir) {
417 if c.dir_mtime == dir_mt {
418 return Ok(c.indexes.clone());
419 }
420 }
421 }
422
423 let rd = match fs::read_dir(&pack_dir) {
424 Ok(rd) => rd,
425 Err(err) if err.kind() == io::ErrorKind::NotFound => {
426 let mut g = lock();
427 g.by_dir.insert(
428 pack_dir.clone(),
429 CachedDir {
430 dir_mtime: dir_mt,
431 indexes: Vec::new(),
432 },
433 );
434 return Ok(Vec::new());
435 }
436 Err(err) => return Err(Error::Io(err)),
437 };
438
439 let mut out = Vec::new();
440 for entry in rd {
441 let entry = entry.map_err(Error::Io)?;
442 let path = entry.path();
443 if path.extension().and_then(|s| s.to_str()) != Some("idx") {
444 continue;
445 }
446 let Ok(idx) = get_index(&path) else { continue };
447 if !idx.pack_path.is_file() {
448 continue;
449 }
450 out.push(idx);
451 }
452
453 let mut g = lock();
454 g.by_dir.insert(
455 pack_dir,
456 CachedDir {
457 dir_mtime: dir_mt,
458 indexes: out.clone(),
459 },
460 );
461 Ok(out)
462 }
463
464 pub fn get_pack_bytes(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
467 let sig = file_signature(pack_path);
468 if let Some((mtime, size)) = sig {
469 {
470 let g = lock();
471 if let Some(c) = g.by_pack.get(pack_path) {
472 if c.mtime == mtime && c.size == size {
473 return Ok(Arc::clone(&c.bytes));
474 }
475 }
476 }
477 let bytes = Arc::new(fs::read(pack_path).map_err(Error::Io)?);
478 let mut g = lock();
479 g.by_pack.insert(
480 pack_path.to_path_buf(),
481 CachedPack {
482 mtime,
483 size,
484 bytes: Arc::clone(&bytes),
485 },
486 );
487 Ok(bytes)
488 } else {
489 Err(Error::Io(io::Error::new(
490 io::ErrorKind::NotFound,
491 format!("pack not found: {}", pack_path.display()),
492 )))
493 }
494 }
495
496 pub fn clear() {
499 let mut g = lock();
500 g.by_dir.clear();
501 g.by_idx.clear();
502 g.by_pack.clear();
503 }
504
505 pub fn refresh_pack_signature(pack_path: &Path) {
511 if let Some((mtime, size)) = file_signature(pack_path) {
512 let mut g = lock();
513 if let Some(c) = g.by_pack.get_mut(pack_path) {
514 if c.size == size {
515 c.mtime = mtime;
516 }
517 }
518 }
519 }
520}
521
522pub fn read_local_pack_indexes_cached(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
533 pack_cache::get_dir_indexes(objects_dir)
534}
535
536pub fn read_pack_index_cached(idx_path: &Path) -> Result<Arc<PackIndex>> {
544 pack_cache::get_index(idx_path)
545}
546
547pub fn read_pack_bytes_cached(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
553 pack_cache::get_pack_bytes(pack_path)
554}
555
556pub fn clear_pack_cache() {
558 pack_cache::clear();
559}
560
561pub fn refresh_pack_bytes_signature(pack_path: &Path) {
564 pack_cache::refresh_pack_signature(pack_path);
565}
566
567pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
573 let indexes = read_local_pack_indexes(objects_dir)?;
574 let mut info = LocalPackInfo::default();
575 for idx in indexes {
576 let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
577 let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
578 info.pack_count += 1;
579 info.object_count += idx.entries.len();
580 info.size_bytes += pack_meta.len() + idx_meta.len();
581 for entry in idx.entries {
582 if entry.oid.len() == 20 {
583 if let Ok(oid) = ObjectId::from_bytes(&entry.oid) {
584 info.object_ids.insert(oid);
585 }
586 }
587 }
588 }
589 Ok(info)
590}
591
592fn verify_idx_trailing_checksum(idx_path: &Path, bytes: &[u8]) -> Result<()> {
593 if bytes.len() < 20 {
594 return Err(Error::CorruptObject(format!(
595 "index file {} missing checksum",
596 idx_path.display()
597 )));
598 }
599 let idx_body_end = bytes.len() - 20;
600 let mut h = Sha1::new();
601 h.update(&bytes[..idx_body_end]);
602 let digest = h.finalize();
603 if digest.as_slice() != &bytes[idx_body_end..] {
604 return Err(Error::CorruptObject(format!(
605 "index checksum mismatch for {}",
606 idx_path.display()
607 )));
608 }
609 Ok(())
610}
611
612fn check_fanout_monotonic(fanout: &[u32; 256], idx_path: &Path) -> Result<()> {
624 let mut prev = 0u32;
625 for &n in fanout {
626 if n < prev {
627 return Err(Error::CorruptObject(format!(
628 "non-monotonic index {}",
629 idx_path.display()
630 )));
631 }
632 prev = n;
633 }
634 Ok(())
635}
636
637fn read_pack_index_v1(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
638 let mut pos = 0usize;
639 if bytes.len() < 256 * 4 + 20 {
640 return Err(Error::CorruptObject(format!(
641 "index file {} is too small",
642 idx_path.display()
643 )));
644 }
645 let mut fanout = [0u32; 256];
646 for slot in &mut fanout {
647 *slot = read_u32_be(bytes, &mut pos)?;
648 }
649 check_fanout_monotonic(&fanout, idx_path)?;
650 let object_count = fanout[255] as usize;
651 let need = pos
652 .saturating_add(object_count.saturating_mul(24))
653 .saturating_add(20);
654 if bytes.len() < need {
655 return Err(Error::CorruptObject(format!(
656 "truncated idx file {}",
657 idx_path.display()
658 )));
659 }
660
661 let mut entries: Vec<PackIndexEntry> = Vec::with_capacity(object_count);
662 for i in 0..object_count {
663 let offset = read_u32_be(bytes, &mut pos)? as u64;
664 let oid = bytes[pos..pos + 20].to_vec();
665 pos += 20;
666 if i > 0 && entries[i - 1].oid.cmp(&oid) != std::cmp::Ordering::Less {
667 return Err(Error::CorruptObject(format!(
668 "oid lookup out of order in {}",
669 idx_path.display()
670 )));
671 }
672 entries.push(PackIndexEntry { oid, offset });
673 }
674
675 if verify {
676 verify_idx_trailing_checksum(idx_path, bytes)?;
677 }
678
679 let mut pack_path = idx_path.to_path_buf();
680 pack_path.set_extension("pack");
681
682 let fanout = compute_fanout_from_entries(&entries);
683 Ok(PackIndex {
684 idx_path: idx_path.to_path_buf(),
685 pack_path,
686 hash_bytes: 20,
687 entries,
688 fanout,
689 })
690}
691
692fn compute_fanout_from_entries(entries: &[PackIndexEntry]) -> [u32; 256] {
695 let mut fanout = [0u32; 256];
696 let mut idx = 0usize;
697 for byte in 0u32..256 {
698 let needle = byte as u8;
699 while idx < entries.len() && entries[idx].oid.first().copied().unwrap_or(0) <= needle {
700 idx += 1;
701 }
702 fanout[byte as usize] = u32::try_from(idx).unwrap_or(u32::MAX);
703 }
704 fanout
705}
706
707fn read_pack_index_v2(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
708 if bytes.len() < 8 + 256 * 4 + 40 {
709 return Err(Error::CorruptObject(format!(
710 "index file {} is too small",
711 idx_path.display()
712 )));
713 }
714
715 let mut pos = 0usize;
716 pos += 4;
717 let version = read_u32_be(bytes, &mut pos)?;
718 if version != 2 {
719 return Err(Error::CorruptObject(format!(
720 "unsupported idx version {} in {}",
721 version,
722 idx_path.display()
723 )));
724 }
725
726 let mut fanout = [0u32; 256];
727 for slot in &mut fanout {
728 *slot = read_u32_be(bytes, &mut pos)?;
729 }
730 check_fanout_monotonic(&fanout, idx_path)?;
731 let object_count = fanout[255] as usize;
732
733 let idx_file_len = bytes.len();
734 let hash_bytes = detect_idx_hash_bytes_v2(idx_file_len, pos, object_count, idx_path)?;
735
736 let need = pos
737 .saturating_add(object_count * hash_bytes)
738 .saturating_add(object_count * 4)
739 .saturating_add(object_count * 4)
740 .saturating_add(40);
741 if bytes.len() < need {
742 return Err(Error::CorruptObject(format!(
743 "truncated idx file {}",
744 idx_path.display()
745 )));
746 }
747
748 let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
749 for _ in 0..object_count {
750 let slice = &bytes[pos..pos + hash_bytes];
751 pos += hash_bytes;
752 oids.push(slice.to_vec());
753 }
754
755 pos += object_count * 4;
756
757 let mut offsets32 = Vec::with_capacity(object_count);
758 let mut large_count = 0usize;
759 for _ in 0..object_count {
760 let v = read_u32_be(bytes, &mut pos)?;
761 if (v & 0x8000_0000) != 0 {
762 large_count += 1;
763 }
764 offsets32.push(v);
765 }
766
767 if bytes.len() < pos + large_count * 8 + 40 {
768 return Err(Error::CorruptObject(format!(
769 "truncated large offset table in {}",
770 idx_path.display()
771 )));
772 }
773 let mut large_offsets = Vec::with_capacity(large_count);
774 for _ in 0..large_count {
775 large_offsets.push(read_u64_be(bytes, &mut pos)?);
776 }
777
778 let mut next_large = 0usize;
779 let mut entries = Vec::with_capacity(object_count);
780 for (i, oid) in oids.into_iter().enumerate() {
781 let raw = offsets32[i];
782 let offset = if (raw & 0x8000_0000) == 0 {
783 raw as u64
784 } else {
785 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
786 Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
787 })?;
788 next_large += 1;
789 off
790 };
791 entries.push(PackIndexEntry { oid, offset });
792 }
793
794 let mut pack_path = idx_path.to_path_buf();
795 pack_path.set_extension("pack");
796
797 if verify {
798 verify_idx_trailing_checksum(idx_path, bytes)?;
799 }
800
801 Ok(PackIndex {
802 idx_path: idx_path.to_path_buf(),
803 pack_path,
804 hash_bytes,
805 entries,
806 fanout,
807 })
808}
809
810fn detect_idx_hash_bytes_v2(
815 idx_file_len: usize,
816 fanout_end: usize,
817 object_count: usize,
818 idx_path: &Path,
819) -> Result<usize> {
820 if object_count == 0 {
821 return Ok(20);
822 }
823 if idx_file_len < 20 {
824 return Err(Error::CorruptObject(format!(
825 "index file {} missing checksum",
826 idx_path.display()
827 )));
828 }
829 let body_without_checksum = idx_file_len.saturating_sub(20);
830
831 for &hb in &[20usize, 32] {
832 let min_body = fanout_end
835 .saturating_add(object_count.saturating_mul(hb + 4 + 4))
836 .saturating_add(hb);
837 if body_without_checksum < min_body {
838 continue;
839 }
840 let mut max_body = min_body;
841 if object_count > 0 {
842 max_body = max_body.saturating_add((object_count - 1).saturating_mul(8));
843 }
844 if body_without_checksum > max_body {
845 continue;
846 }
847 let extra = body_without_checksum.saturating_sub(min_body);
848 if extra % 8 != 0 {
849 continue;
850 }
851 return Ok(hb);
852 }
853
854 Err(Error::CorruptObject(format!(
855 "wrong index v2 file size in {}",
856 idx_path.display()
857 )))
858}
859
860#[must_use]
861pub fn oid_bytes_to_hex(oid: &[u8]) -> String {
862 hex::encode(oid)
863}
864
865#[must_use]
867pub fn pack_index_entry_matches_sha1_oid(entry: &PackIndexEntry, oid: &ObjectId) -> bool {
868 entry.oid.len() == 20 && entry.oid.as_slice() == oid.as_bytes().as_slice()
869}
870
871pub fn hash_object_bytes(kind: ObjectKind, data: &[u8], hash_bytes: usize) -> Result<Vec<u8>> {
873 let header = format!("{} {}\0", kind, data.len());
874 match hash_bytes {
875 20 => {
876 let mut hasher = Sha1::new();
877 hasher.update(header.as_bytes());
878 hasher.update(data);
879 Ok(hasher.finalize().to_vec())
880 }
881 32 => {
882 use sha2::Digest as _;
883 let mut hasher = Sha256::new();
884 hasher.update(header.as_bytes());
885 hasher.update(data);
886 Ok(hasher.finalize().to_vec())
887 }
888 other => Err(Error::CorruptObject(format!(
889 "unsupported object hash width: {other}"
890 ))),
891 }
892}
893
894pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
905 let bytes = fs::read(idx_path).map_err(Error::Io)?;
906 parse_pack_index_bytes(idx_path, &bytes, true)
907}
908
909pub fn read_pack_index_no_verify(idx_path: &Path) -> Result<PackIndex> {
915 let bytes = fs::read(idx_path).map_err(Error::Io)?;
916 parse_pack_index_bytes(idx_path, &bytes, false)
917}
918
919fn parse_pack_index_bytes(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
920 if bytes.len() < 8 {
921 return Err(Error::CorruptObject(format!(
922 "index file {} is too small",
923 idx_path.display()
924 )));
925 }
926 let magic = &bytes[0..4];
927 if magic == [0xff, b't', b'O', b'c'] {
928 read_pack_index_v2(idx_path, bytes, verify)
929 } else {
930 read_pack_index_v1(idx_path, bytes, verify)
931 }
932}
933
934#[derive(Debug, Clone, Copy, PartialEq, Eq)]
936pub enum PackedType {
937 Commit,
939 Tree,
941 Blob,
943 Tag,
945 OfsDelta,
947 RefDelta,
949}
950
951impl PackedType {
952 #[must_use]
954 pub fn as_str(self) -> &'static str {
955 match self {
956 Self::Commit => "commit",
957 Self::Tree => "tree",
958 Self::Blob => "blob",
959 Self::Tag => "tag",
960 Self::OfsDelta => "ofs-delta",
961 Self::RefDelta => "ref-delta",
962 }
963 }
964}
965
966#[derive(Debug, Clone)]
968pub struct VerifyObjectRecord {
969 pub oid: Vec<u8>,
971 pub packed_type: PackedType,
973 pub size: u64,
975 pub size_in_pack: u64,
977 pub offset: u64,
979 pub depth: Option<u64>,
981 pub base_oid: Option<Vec<u8>>,
983}
984
985enum DeltaBaseLink {
987 Oid(Vec<u8>),
989 Offset(u64),
991}
992
993fn resolve_delta_depth(
1004 i: usize,
1005 base_links: &[Option<DeltaBaseLink>],
1006 by_oid: &HashMap<Vec<u8>, usize>,
1007 by_offset_idx: &HashMap<u64, usize>,
1008 records: &mut [VerifyObjectRecord],
1009) -> Result<u64> {
1010 if let Some(d) = records[i].depth {
1011 return Ok(d);
1012 }
1013 let Some(link) = &base_links[i] else {
1014 return Ok(0);
1015 };
1016 let base_idx = match link {
1017 DeltaBaseLink::Oid(oid) => by_oid.get(oid).copied(),
1018 DeltaBaseLink::Offset(off) => by_offset_idx.get(off).copied(),
1019 };
1020 records[i].depth = Some(1);
1022 let depth = match base_idx {
1023 Some(b) if b != i => {
1024 resolve_delta_depth(b, base_links, by_oid, by_offset_idx, records)?.saturating_add(1)
1025 }
1026 _ => 1,
1028 };
1029 records[i].depth = Some(depth);
1030 Ok(depth)
1031}
1032
1033pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
1039 let idx = read_pack_index(idx_path)?;
1040 let idx_file_bytes = fs::read(idx_path).map_err(Error::Io)?;
1041 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1042 let hb = idx.hash_bytes;
1043 if pack_bytes.len() < 12 + hb {
1044 return Err(Error::CorruptObject(format!(
1045 "pack file {} is too small",
1046 idx.pack_path.display()
1047 )));
1048 }
1049 let pack_end = pack_bytes.len() - hb;
1050 match hb {
1051 20 => {
1052 let mut h = Sha1::new();
1053 h.update(&pack_bytes[..pack_end]);
1054 let digest = h.finalize();
1055 if digest.as_slice() != &pack_bytes[pack_end..] {
1056 return Err(Error::CorruptObject(format!(
1057 "pack trailing checksum mismatch for {}",
1058 idx.pack_path.display()
1059 )));
1060 }
1061 }
1062 32 => {
1063 use sha2::Digest as _;
1064 let mut h = Sha256::new();
1065 h.update(&pack_bytes[..pack_end]);
1066 let digest = h.finalize();
1067 if digest.as_slice() != &pack_bytes[pack_end..] {
1068 return Err(Error::CorruptObject(format!(
1069 "pack trailing checksum mismatch for {}",
1070 idx.pack_path.display()
1071 )));
1072 }
1073 }
1074 _ => {
1075 return Err(Error::CorruptObject(format!(
1076 "unsupported OID width {} for pack {}",
1077 hb,
1078 idx.pack_path.display()
1079 )));
1080 }
1081 }
1082 if idx_file_bytes.len() >= hb + 20 {
1083 let embedded = &idx_file_bytes[idx_file_bytes.len() - (hb + 20)..idx_file_bytes.len() - 20];
1084 if embedded != &pack_bytes[pack_end..] {
1085 return Err(Error::CorruptObject(format!(
1086 "pack checksum in index does not match {}",
1087 idx.pack_path.display()
1088 )));
1089 }
1090 }
1091 if &pack_bytes[0..4] != b"PACK" {
1092 return Err(Error::CorruptObject(format!(
1093 "pack file {} has invalid signature",
1094 idx.pack_path.display()
1095 )));
1096 }
1097 let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
1098 if version != 2 && version != 3 {
1099 return Err(Error::CorruptObject(format!(
1100 "unsupported pack version {} in {}",
1101 version,
1102 idx.pack_path.display()
1103 )));
1104 }
1105 let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
1106 if count != idx.entries.len() {
1107 return Err(Error::CorruptObject(format!(
1108 "pack/index object count mismatch for {}",
1109 idx.pack_path.display()
1110 )));
1111 }
1112
1113 let mut by_offset: BTreeMap<u64, Vec<u8>> = BTreeMap::new();
1114 for entry in &idx.entries {
1115 by_offset.insert(entry.offset, entry.oid.clone());
1116 }
1117 let offsets: Vec<u64> = by_offset.keys().copied().collect();
1118 if offsets.is_empty() {
1119 return Ok(Vec::new());
1120 }
1121
1122 let mut by_oid: HashMap<Vec<u8>, usize> = HashMap::new();
1123 let mut by_offset_idx: HashMap<u64, usize> = HashMap::new();
1124 let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
1125 let mut base_links: Vec<Option<DeltaBaseLink>> = Vec::with_capacity(offsets.len());
1129 for (i, offset) in offsets.iter().copied().enumerate() {
1130 let oid = by_offset.get(&offset).cloned().ok_or_else(|| {
1131 Error::CorruptObject(format!("missing object id for offset {}", offset))
1132 })?;
1133 let next_off = offsets
1134 .get(i + 1)
1135 .copied()
1136 .unwrap_or((pack_bytes.len() - hb) as u64);
1137 if next_off <= offset || next_off > (pack_bytes.len() - hb) as u64 {
1138 return Err(Error::CorruptObject(format!(
1139 "invalid object boundaries at offset {} in {}",
1140 offset,
1141 idx.pack_path.display()
1142 )));
1143 }
1144 let mut p = offset as usize;
1145 let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
1146 let mut base_oid: Option<Vec<u8>> = None;
1147 let mut base_link: Option<DeltaBaseLink> = None;
1148
1149 match packed_type {
1150 PackedType::RefDelta => {
1151 if p + hb > pack_bytes.len() {
1152 return Err(Error::CorruptObject(format!(
1153 "truncated ref-delta base at offset {}",
1154 offset
1155 )));
1156 }
1157 let raw = pack_bytes[p..p + hb].to_vec();
1158 base_oid = Some(raw.clone());
1159 base_link = Some(DeltaBaseLink::Oid(raw));
1160 }
1161 PackedType::OfsDelta => {
1162 let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
1163 base_link = Some(DeltaBaseLink::Offset(base_offset));
1164 }
1165 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
1166 }
1167
1168 let size_in_pack = next_off - offset;
1169 records.push(VerifyObjectRecord {
1170 oid: oid.clone(),
1171 packed_type,
1172 size,
1173 size_in_pack,
1174 offset,
1175 depth: None,
1176 base_oid,
1177 });
1178 base_links.push(base_link);
1179 by_oid.insert(oid, i);
1180 by_offset_idx.insert(offset, i);
1181 }
1182
1183 for i in 0..records.len() {
1187 if base_links[i].is_some() {
1188 let _ = resolve_delta_depth(i, &base_links, &by_oid, &by_offset_idx, &mut records)?;
1189 }
1190 }
1191
1192 for entry in &idx.entries {
1193 let obj = read_object_from_pack_bytes(&pack_bytes, &idx, &entry.oid)?;
1194 let computed = hash_object_bytes(obj.kind, &obj.data, hb)?;
1195 if computed.as_slice() != entry.oid.as_slice() {
1196 return Err(Error::CorruptObject(format!(
1197 "pack object hash mismatch at offset {} (index says {})",
1198 entry.offset,
1199 oid_bytes_to_hex(&entry.oid)
1200 )));
1201 }
1202 }
1203
1204 Ok(records)
1205}
1206
1207pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
1213 let mut visited = HashSet::new();
1214 let mut out = Vec::new();
1215 read_alternates_inner(objects_dir, &mut visited, &mut out, 0)?;
1216 Ok(out)
1217}
1218
1219const MAX_ALTERNATE_DEPTH: usize = 5;
1221
1222fn read_alternates_inner(
1223 objects_dir: &Path,
1224 visited: &mut HashSet<PathBuf>,
1225 out: &mut Vec<PathBuf>,
1226 depth: usize,
1227) -> Result<()> {
1228 if depth > MAX_ALTERNATE_DEPTH {
1229 return Ok(());
1230 }
1231 let canonical = canonical_or_self(objects_dir);
1232 let alt_file = canonical.join("info").join("alternates");
1233 let text = match fs::read_to_string(&alt_file) {
1234 Ok(text) => text,
1235 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
1236 Err(err) => return Err(Error::Io(err)),
1237 };
1238
1239 for raw in text.lines() {
1240 let line = raw.trim();
1241 if line.is_empty() {
1242 continue;
1243 }
1244 let candidate = if Path::new(line).is_absolute() {
1245 PathBuf::from(line)
1246 } else {
1247 canonical.join(line)
1248 };
1249 let candidate = canonical_or_self(&candidate);
1250 if visited.insert(candidate.clone()) {
1251 out.push(candidate.clone());
1252 read_alternates_inner(&candidate, visited, out, depth + 1)?;
1253 }
1254 }
1255 Ok(())
1256}
1257
1258fn canonical_or_self(path: &Path) -> PathBuf {
1259 fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
1260}
1261
1262fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
1264 match pt {
1265 PackedType::Commit => Ok(ObjectKind::Commit),
1266 PackedType::Tree => Ok(ObjectKind::Tree),
1267 PackedType::Blob => Ok(ObjectKind::Blob),
1268 PackedType::Tag => Ok(ObjectKind::Tag),
1269 PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
1270 "cannot convert delta type to object kind directly".to_owned(),
1271 )),
1272 }
1273}
1274
1275fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
1280 let slice = &bytes[*pos..];
1281 let mut decoder = ZlibDecoder::new(slice);
1282 let mut out = Vec::with_capacity(expected_size as usize);
1283 decoder
1284 .read_to_end(&mut out)
1285 .map_err(|e| Error::Zlib(e.to_string()))?;
1286 *pos += decoder.total_in() as usize;
1287 if out.len() as u64 != expected_size {
1288 return Err(Error::CorruptObject(format!(
1289 "pack object size mismatch: expected {expected_size}, got {}",
1290 out.len()
1291 )));
1292 }
1293 Ok(out)
1294}
1295
1296fn read_pack_object_at(
1301 pack_bytes: &[u8],
1302 offset: u64,
1303 idx: &PackIndex,
1304 objects_dir: Option<&Path>,
1305 depth: usize,
1306) -> Result<(ObjectKind, Vec<u8>)> {
1307 if depth > 50 {
1308 return Err(Error::CorruptObject(
1309 "delta chain too deep (>50)".to_owned(),
1310 ));
1311 }
1312 let mut pos = offset as usize;
1313 let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
1314
1315 match packed_type {
1316 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
1317 let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1318 let kind = packed_type_to_kind(packed_type)?;
1319 Ok((kind, data))
1320 }
1321 PackedType::OfsDelta => {
1322 let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
1323 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1324 let in_pack = read_pack_object_at(pack_bytes, base_offset, idx, objects_dir, depth + 1);
1329 match in_pack {
1330 Ok((base_kind, base_data)) => {
1331 let result = apply_delta(&base_data, &delta_data)?;
1332 Ok((base_kind, result))
1333 }
1334 Err(err) => {
1335 if let Some(dir) = objects_dir {
1336 if let Some(base_entry) =
1338 idx.entries.iter().find(|e| e.offset == base_offset)
1339 {
1340 if base_entry.oid.len() == 20 {
1341 if let Ok(base_oid) =
1342 ObjectId::from_bytes(base_entry.oid.as_slice())
1343 {
1344 let loose = dir
1345 .join(base_oid.loose_prefix())
1346 .join(base_oid.loose_suffix());
1347 if loose.is_file() {
1348 if let Ok(obj) = crate::odb::Odb::read_loose_verify_oid(
1349 &loose, &base_oid,
1350 ) {
1351 let result = apply_delta(&obj.data, &delta_data)?;
1352 return Ok((obj.kind, result));
1353 }
1354 }
1355 if let Ok(obj) =
1356 read_object_from_other_pack(dir, idx, &base_oid, depth + 1)
1357 {
1358 let result = apply_delta(&obj.data, &delta_data)?;
1359 return Ok((obj.kind, result));
1360 }
1361 }
1362 }
1363 }
1364 }
1365 Err(err)
1366 }
1367 }
1368 }
1369 PackedType::RefDelta => {
1370 let hb = idx.hash_bytes;
1371 if pos + hb > pack_bytes.len() {
1372 return Err(Error::CorruptObject(
1373 "truncated ref-delta base OID".to_owned(),
1374 ));
1375 }
1376 let base_raw = pack_bytes[pos..pos + hb].to_vec();
1377 pos += hb;
1378 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1379 let in_pack_offset = idx
1382 .entries
1383 .binary_search_by(|e| e.oid.as_slice().cmp(base_raw.as_slice()))
1384 .ok()
1385 .map(|i| idx.entries[i].offset);
1386 let mut in_pack_err = None;
1387 if let Some(base_offset) = in_pack_offset {
1388 match read_pack_object_at(pack_bytes, base_offset, idx, objects_dir, depth + 1) {
1389 Ok((base_kind, base_data)) => {
1390 let result = apply_delta(&base_data, &delta_data)?;
1391 return Ok((base_kind, result));
1392 }
1393 Err(err) => in_pack_err = Some(err),
1394 }
1395 }
1396 if hb == 20 {
1397 if let (Some(dir), Ok(base_oid)) =
1398 (objects_dir, ObjectId::from_bytes(base_raw.as_slice()))
1399 {
1400 let loose = dir
1401 .join(base_oid.loose_prefix())
1402 .join(base_oid.loose_suffix());
1403 if loose.is_file() {
1404 if let Ok(obj) = crate::odb::Odb::read_loose_verify_oid(&loose, &base_oid) {
1405 let result = apply_delta(&obj.data, &delta_data)?;
1406 return Ok((obj.kind, result));
1407 }
1408 }
1409 if let Ok(obj) = read_object_from_other_pack(dir, idx, &base_oid, depth + 1) {
1410 let result = apply_delta(&obj.data, &delta_data)?;
1411 return Ok((obj.kind, result));
1412 }
1413 }
1414 }
1415 if let Some(err) = in_pack_err {
1416 return Err(err);
1417 }
1418 if idx.entries.len() > 100 {
1423 return Ok((ObjectKind::Blob, delta_data));
1424 }
1425 Err(Error::CorruptObject(format!(
1426 "ref-delta base {} not found in pack",
1427 oid_bytes_to_hex(&base_raw)
1428 )))
1429 }
1430 }
1431}
1432
1433fn read_object_from_other_pack(
1434 objects_dir: &Path,
1435 current_idx: &PackIndex,
1436 oid: &ObjectId,
1437 depth: usize,
1438) -> Result<Object> {
1439 for idx in read_local_pack_indexes_cached(objects_dir)? {
1440 if idx.idx_path == current_idx.idx_path {
1441 continue;
1442 }
1443 if idx.contains(oid) {
1444 return read_object_from_pack_at_depth(&idx, oid, depth);
1447 }
1448 }
1449 Err(Error::ObjectNotFound(oid.to_hex()))
1450}
1451
1452pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
1461 read_object_from_pack_at_depth(idx, oid, 0)
1462}
1463
1464fn read_object_from_pack_at_depth(idx: &PackIndex, oid: &ObjectId, depth: usize) -> Result<Object> {
1467 let Some(offset) = idx.find_offset(oid) else {
1468 return Err(Error::ObjectNotFound(oid.to_hex()));
1469 };
1470
1471 let pack_bytes = read_pack_bytes_cached(&idx.pack_path)?;
1472 validate_pack_index_object_count(&pack_bytes, idx)?;
1473 let objects_dir = idx.pack_path.parent().and_then(Path::parent);
1474 let (kind, data) = read_pack_object_at(&pack_bytes, offset, idx, objects_dir, depth)?;
1475 Ok(Object::new(kind, data))
1476}
1477
1478pub fn read_object_from_pack_bytes(
1480 pack_bytes: &[u8],
1481 idx: &PackIndex,
1482 oid: &[u8],
1483) -> Result<Object> {
1484 validate_pack_index_object_count(pack_bytes, idx)?;
1485 let entry_offset = idx
1486 .entries
1487 .binary_search_by(|e| e.oid.as_slice().cmp(oid))
1488 .ok()
1489 .map(|i| idx.entries[i].offset)
1490 .ok_or_else(|| Error::ObjectNotFound(oid_bytes_to_hex(oid)))?;
1491 let (kind, data) = read_pack_object_at(pack_bytes, entry_offset, idx, None, 0)?;
1492 verify_packed_object_hash(kind, &data, oid)?;
1493 Ok(Object::new(kind, data))
1494}
1495
1496fn validate_pack_index_object_count(pack_bytes: &[u8], idx: &PackIndex) -> Result<()> {
1497 if pack_bytes.len() < 12 || &pack_bytes[0..4] != b"PACK" {
1498 return Err(Error::CorruptObject("bad pack header".to_owned()));
1499 }
1500 let count =
1501 u32::from_be_bytes([pack_bytes[8], pack_bytes[9], pack_bytes[10], pack_bytes[11]]) as usize;
1502 if count != idx.entries.len() {
1503 return Err(Error::CorruptObject(format!(
1504 "pack object count mismatch: pack has {count}, index has {}",
1505 idx.entries.len()
1506 )));
1507 }
1508 Ok(())
1509}
1510
1511fn verify_packed_object_hash(kind: ObjectKind, data: &[u8], expected_oid: &[u8]) -> Result<()> {
1512 if expected_oid.len() != 20 {
1513 return Ok(());
1514 }
1515 let header = format!("{kind} {}\0", data.len());
1516 let mut hasher = Sha1::new();
1517 hasher.update(header.as_bytes());
1518 hasher.update(data);
1519 let actual = hasher.finalize();
1520 if actual.as_slice() != expected_oid {
1521 return Err(Error::CorruptObject(format!(
1522 "packed object {} hashes to {}",
1523 oid_bytes_to_hex(expected_oid),
1524 oid_bytes_to_hex(actual.as_slice())
1525 )));
1526 }
1527 Ok(())
1528}
1529
1530pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
1542 let indexes = read_local_pack_indexes_cached(objects_dir)?;
1543 let mut last_err: Option<Error> = None;
1544 for idx in &indexes {
1545 if idx.find_offset(oid).is_none() {
1546 continue;
1547 }
1548 match read_object_from_pack(idx, oid) {
1549 Ok(obj) => return Ok(obj),
1550 Err(Error::ObjectNotFound(_)) => {}
1553 Err(err) => last_err = Some(err),
1556 }
1557 }
1558 Err(last_err.unwrap_or_else(|| Error::ObjectNotFound(oid.to_hex())))
1559}
1560
1561pub fn packed_ref_delta_reuse_slice(
1572 objects_dir: &Path,
1573 oid: &ObjectId,
1574 packed_set: &HashSet<ObjectId>,
1575) -> Result<Option<(ObjectId, Vec<u8>)>> {
1576 let mut indexes = read_local_pack_indexes(objects_dir)?;
1577 sort_pack_indexes_oldest_first(&mut indexes);
1578 for idx in indexes {
1579 let Some(entry) = idx
1580 .entries
1581 .iter()
1582 .find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes().as_slice())
1583 else {
1584 continue;
1585 };
1586 let hb = idx.hash_bytes;
1587 if hb != 20 {
1588 continue;
1589 }
1590 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1591 let mut p = entry.offset as usize;
1592 let (packed_type, _size) = parse_pack_object_header(&pack_bytes, &mut p)?;
1593 let base = match packed_type {
1594 PackedType::RefDelta => {
1595 if p + hb > pack_bytes.len() {
1596 return Err(Error::CorruptObject(
1597 "truncated ref-delta base oid while scanning for reuse".to_owned(),
1598 ));
1599 }
1600 let bo = ObjectId::from_bytes(&pack_bytes[p..p + hb])?;
1601 p += hb;
1602 bo
1603 }
1604 PackedType::OfsDelta => {
1605 let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
1606 let Some(base_entry) = idx.entries.iter().find(|e| e.offset == base_off) else {
1607 continue;
1608 };
1609 if base_entry.oid.len() != 20 {
1610 continue;
1611 }
1612 ObjectId::from_bytes(base_entry.oid.as_slice())?
1613 }
1614 _ => {
1615 continue;
1618 }
1619 };
1620 if !packed_set.contains(&base) {
1621 continue;
1622 }
1623 let zlib_start = p;
1624 let mut end_pos = zlib_start;
1625 if skip_one_pack_object(&pack_bytes, &mut end_pos, entry.offset, hb).is_err() {
1626 continue;
1627 }
1628 let compressed = &pack_bytes[zlib_start..end_pos];
1629 let mut dec = ZlibDecoder::new(compressed);
1630 let mut delta = Vec::new();
1631 if dec.read_to_end(&mut delta).is_err() {
1632 continue;
1633 }
1634 return Ok(Some((base, delta)));
1635 }
1636 Ok(None)
1637}
1638
1639fn sort_pack_indexes_oldest_first(indexes: &mut [PackIndex]) {
1642 indexes.sort_by(|a, b| {
1643 let ta = fs::metadata(&a.pack_path)
1644 .and_then(|m| m.modified())
1645 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1646 let tb = fs::metadata(&b.pack_path)
1647 .and_then(|m| m.modified())
1648 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1649 ta.cmp(&tb).then_with(|| a.pack_path.cmp(&b.pack_path))
1650 });
1651}
1652
1653fn sort_pack_indexes_newest_first(indexes: &mut [PackIndex]) {
1654 indexes.sort_by(|a, b| {
1655 let ta = fs::metadata(&a.pack_path)
1656 .and_then(|m| m.modified())
1657 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1658 let tb = fs::metadata(&b.pack_path)
1659 .and_then(|m| m.modified())
1660 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1661 tb.cmp(&ta).then_with(|| b.pack_path.cmp(&a.pack_path))
1662 });
1663}
1664
1665pub fn packed_delta_base_oid(objects_dir: &Path, oid: &ObjectId) -> Result<Option<ObjectId>> {
1666 let mut indexes = read_local_pack_indexes(objects_dir)?;
1667 sort_pack_indexes_newest_first(&mut indexes);
1668 for idx in &indexes {
1669 if idx.hash_bytes != 20 {
1670 continue;
1671 }
1672 let Some(entry) = idx
1673 .entries
1674 .iter()
1675 .find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes().as_slice())
1676 else {
1677 continue;
1678 };
1679 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1680 let mut p = entry.offset as usize;
1681 let (packed_type, _) = parse_pack_object_header(&pack_bytes, &mut p)?;
1682 match packed_type {
1683 PackedType::RefDelta => {
1684 let hb = idx.hash_bytes;
1685 if p + hb > pack_bytes.len() {
1686 return Err(Error::CorruptObject("truncated ref-delta base".to_owned()));
1687 }
1688 return Ok(Some(ObjectId::from_bytes(&pack_bytes[p..p + hb])?));
1689 }
1690 PackedType::OfsDelta => {
1691 let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
1692 return Ok(idx
1693 .entries
1694 .iter()
1695 .find(|e| e.offset == base_off)
1696 .and_then(|e| ObjectId::from_bytes(e.oid.as_slice()).ok()));
1697 }
1698 _ => continue,
1699 }
1700 }
1701 Ok(None)
1702}
1703
1704fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
1705 let first = *bytes.get(*pos).ok_or_else(|| {
1706 Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
1707 })?;
1708 *pos += 1;
1709
1710 let type_code = (first >> 4) & 0x7;
1711 let mut size = (first & 0x0f) as u64;
1712 let mut shift = 4u32;
1713 let mut c = first;
1714 while (c & 0x80) != 0 {
1715 c = *bytes.get(*pos).ok_or_else(|| {
1716 Error::CorruptObject("unexpected end of variable size header".to_owned())
1717 })?;
1718 *pos += 1;
1719 size |= ((c & 0x7f) as u64) << shift;
1720 shift += 7;
1721 }
1722
1723 let packed_type = match type_code {
1724 1 => PackedType::Commit,
1725 2 => PackedType::Tree,
1726 3 => PackedType::Blob,
1727 4 => PackedType::Tag,
1728 6 => PackedType::OfsDelta,
1729 7 => PackedType::RefDelta,
1730 _ => {
1731 return Err(Error::CorruptObject(format!(
1732 "unsupported packed object type {}",
1733 type_code
1734 )))
1735 }
1736 };
1737 Ok((packed_type, size))
1738}
1739
1740#[derive(Debug, Clone, Copy)]
1742pub enum PackedDeltaDependency {
1743 OfsBase {
1745 base_offset: u64,
1747 },
1748 RefBase {
1750 base_oid: ObjectId,
1752 },
1753}
1754
1755pub fn read_packed_delta_dependency(
1757 pack_bytes: &[u8],
1758 object_offset: u64,
1759) -> Result<Option<PackedDeltaDependency>> {
1760 let mut pos = object_offset as usize;
1761 let (ty, _) = parse_pack_object_header(pack_bytes, &mut pos)?;
1762 match ty {
1763 PackedType::OfsDelta => {
1764 let base = parse_ofs_delta_base(pack_bytes, &mut pos, object_offset)?;
1765 Ok(Some(PackedDeltaDependency::OfsBase { base_offset: base }))
1766 }
1767 PackedType::RefDelta => {
1768 if pos + 20 > pack_bytes.len() {
1769 return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
1770 }
1771 let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
1772 Ok(Some(PackedDeltaDependency::RefBase { base_oid }))
1773 }
1774 _ => Ok(None),
1775 }
1776}
1777
1778fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
1779 let mut c = *bytes
1780 .get(*pos)
1781 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
1782 *pos += 1;
1783 let mut value = (c & 0x7f) as u64;
1784 while (c & 0x80) != 0 {
1785 c = *bytes
1786 .get(*pos)
1787 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
1788 *pos += 1;
1789 value = ((value + 1) << 7) | (c & 0x7f) as u64;
1790 }
1791 this_offset
1792 .checked_sub(value)
1793 .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
1794}
1795
1796#[must_use]
1804pub fn slice_one_pack_object(
1805 bytes: &[u8],
1806 object_start_offset: u64,
1807 hash_bytes: usize,
1808) -> Result<&[u8]> {
1809 let start = object_start_offset as usize;
1810 let mut pos = start;
1811 skip_one_pack_object(bytes, &mut pos, object_start_offset, hash_bytes)?;
1812 Ok(&bytes[start..pos])
1813}
1814
1815pub fn skip_one_pack_object(
1816 bytes: &[u8],
1817 pos: &mut usize,
1818 object_start_offset: u64,
1819 hash_bytes: usize,
1820) -> Result<()> {
1821 let (packed_type, size) = parse_pack_object_header(bytes, pos)?;
1822 match packed_type {
1823 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
1824 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1825 let mut tmp = Vec::with_capacity(size as usize);
1826 dec.read_to_end(&mut tmp)
1827 .map_err(|e| Error::Zlib(e.to_string()))?;
1828 *pos += dec.total_in() as usize;
1829 }
1830 PackedType::RefDelta => {
1831 if *pos + hash_bytes > bytes.len() {
1832 return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
1833 }
1834 *pos += hash_bytes;
1835 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1836 let mut tmp = Vec::with_capacity(size as usize);
1837 dec.read_to_end(&mut tmp)
1838 .map_err(|e| Error::Zlib(e.to_string()))?;
1839 *pos += dec.total_in() as usize;
1840 }
1841 PackedType::OfsDelta => {
1842 let _base_off = parse_ofs_delta_base(bytes, pos, object_start_offset)?;
1843 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1844 let mut tmp = Vec::with_capacity(size as usize);
1845 dec.read_to_end(&mut tmp)
1846 .map_err(|e| Error::Zlib(e.to_string()))?;
1847 *pos += dec.total_in() as usize;
1848 }
1849 }
1850 Ok(())
1851}
1852
1853fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
1854 if bytes.len() < *pos + 4 {
1855 return Err(Error::CorruptObject(
1856 "unexpected end of idx while reading u32".to_owned(),
1857 ));
1858 }
1859 let v = u32::from_be_bytes(
1860 bytes[*pos..*pos + 4]
1861 .try_into()
1862 .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
1863 );
1864 *pos += 4;
1865 Ok(v)
1866}
1867
1868fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
1869 if bytes.len() < *pos + 8 {
1870 return Err(Error::CorruptObject(
1871 "unexpected end of idx while reading u64".to_owned(),
1872 ));
1873 }
1874 let v = u64::from_be_bytes(
1875 bytes[*pos..*pos + 8]
1876 .try_into()
1877 .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
1878 );
1879 *pos += 8;
1880 Ok(v)
1881}
1882
1883pub fn read_idx_object_ids(idx_path: &Path) -> Result<Vec<ObjectId>> {
1885 let index = read_pack_index(idx_path)?;
1886 let mut out = Vec::new();
1887 for e in index.entries {
1888 if e.oid.len() == 20 {
1889 out.push(ObjectId::from_bytes(&e.oid)?);
1890 }
1891 }
1892 Ok(out)
1893}