1use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::odb::Odb;
9use crate::unpack_objects::apply_delta;
10use flate2::read::ZlibDecoder;
11use sha1::{Digest, Sha1};
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::fs;
14use std::io;
15use std::io::Read;
16use std::path::{Path, PathBuf};
17
18#[derive(Debug, Clone)]
20pub struct PackIndexEntry {
21 pub oid: ObjectId,
23 pub offset: u64,
25}
26
27#[derive(Debug, Clone)]
29pub struct PackIndex {
30 pub idx_path: PathBuf,
32 pub pack_path: PathBuf,
34 pub entries: Vec<PackIndexEntry>,
36}
37
38#[derive(Debug, Clone)]
43pub struct ShowIndexEntry {
44 pub oid: ObjectId,
46 pub offset: u64,
48 pub crc32: Option<u32>,
50}
51
52pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
63 let mut buf = Vec::new();
64 reader.read_to_end(&mut buf).map_err(Error::Io)?;
65
66 if buf.len() < 8 {
67 return Err(Error::CorruptObject(
68 "unable to read header: index file too small".to_owned(),
69 ));
70 }
71
72 let mut pos = 0usize;
73 let first_u32 = read_u32_be(&buf, &mut pos)?;
74
75 const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
76
77 if first_u32 == PACK_IDX_SIGNATURE {
78 let version = read_u32_be(&buf, &mut pos)?;
80 if version != 2 {
81 return Err(Error::CorruptObject(format!(
82 "unknown index version: {version}"
83 )));
84 }
85 show_index_v2(&buf, &mut pos, hash_size)
86 } else {
87 pos = 0;
90 show_index_v1(&buf, &mut pos, hash_size)
91 }
92}
93
94fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
96 if buf.len() < 256 * 4 {
97 return Err(Error::CorruptObject(
98 "unable to read index: v1 fanout too short".to_owned(),
99 ));
100 }
101 let mut fanout = [0u32; 256];
102 for slot in &mut fanout {
103 *slot = read_u32_be(buf, pos)?;
104 }
105 let object_count = fanout[255] as usize;
106
107 let mut entries = Vec::with_capacity(object_count);
108 for i in 0..object_count {
109 if *pos + 4 + hash_size > buf.len() {
111 return Err(Error::CorruptObject(format!(
112 "unable to read entry {i}/{object_count}: truncated"
113 )));
114 }
115 let offset = read_u32_be(buf, pos)? as u64;
116 let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
117 *pos += hash_size;
118 entries.push(ShowIndexEntry {
119 oid,
120 offset,
121 crc32: None,
122 });
123 }
124 Ok(entries)
125}
126
127fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
130 if buf.len() < *pos + 256 * 4 {
131 return Err(Error::CorruptObject(
132 "unable to read index: v2 fanout too short".to_owned(),
133 ));
134 }
135 let mut fanout = [0u32; 256];
136 for slot in &mut fanout {
137 *slot = read_u32_be(buf, pos)?;
138 }
139 let object_count = fanout[255] as usize;
140
141 let mut oids = Vec::with_capacity(object_count);
143 for i in 0..object_count {
144 if *pos + hash_size > buf.len() {
145 return Err(Error::CorruptObject(format!(
146 "unable to read sha1 {i}/{object_count}: truncated"
147 )));
148 }
149 let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
150 *pos += hash_size;
151 oids.push(oid);
152 }
153
154 let mut crcs = Vec::with_capacity(object_count);
156 for i in 0..object_count {
157 if *pos + 4 > buf.len() {
158 return Err(Error::CorruptObject(format!(
159 "unable to read crc {i}/{object_count}: truncated"
160 )));
161 }
162 crcs.push(read_u32_be(buf, pos)?);
163 }
164
165 let mut offsets32 = Vec::with_capacity(object_count);
167 let mut large_count = 0usize;
168 for i in 0..object_count {
169 if *pos + 4 > buf.len() {
170 return Err(Error::CorruptObject(format!(
171 "unable to read 32b offset {i}/{object_count}: truncated"
172 )));
173 }
174 let v = read_u32_be(buf, pos)?;
175 if (v & 0x8000_0000) != 0 {
176 large_count += 1;
177 }
178 offsets32.push(v);
179 }
180
181 let mut large_offsets = Vec::with_capacity(large_count);
183 for i in 0..large_count {
184 if *pos + 8 > buf.len() {
185 return Err(Error::CorruptObject(format!(
186 "unable to read 64b offset {i}: truncated"
187 )));
188 }
189 large_offsets.push(read_u64_be(buf, pos)?);
190 }
191
192 let mut next_large = 0usize;
193 let mut entries = Vec::with_capacity(object_count);
194 for (i, oid) in oids.into_iter().enumerate() {
195 let raw = offsets32[i];
196 let offset = if (raw & 0x8000_0000) == 0 {
197 raw as u64
198 } else {
199 let idx = (raw & 0x7fff_ffff) as usize;
200 if idx != next_large {
201 return Err(Error::CorruptObject(format!(
202 "inconsistent 64b offset index at entry {i}"
203 )));
204 }
205 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
206 Error::CorruptObject(format!("missing large offset entry {next_large}"))
207 })?;
208 next_large += 1;
209 off
210 };
211 entries.push(ShowIndexEntry {
212 oid,
213 offset,
214 crc32: Some(crcs[i]),
215 });
216 }
217 Ok(entries)
218}
219
220#[derive(Debug, Clone, Default)]
222pub struct LocalPackInfo {
223 pub pack_count: usize,
225 pub object_count: usize,
227 pub size_bytes: u64,
229 pub object_ids: HashSet<ObjectId>,
231}
232
233pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
240 let pack_dir = objects_dir.join("pack");
241 let rd = match fs::read_dir(&pack_dir) {
242 Ok(rd) => rd,
243 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
244 Err(err) => return Err(Error::Io(err)),
245 };
246
247 let mut out = Vec::new();
248 for entry in rd {
249 let entry = entry.map_err(Error::Io)?;
250 let path = entry.path();
251 if path.extension().and_then(|s| s.to_str()) != Some("idx") {
252 continue;
253 }
254 if let Ok(idx) = read_pack_index(&path) {
255 out.push(idx);
256 }
257 }
258 Ok(out)
259}
260
261pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
267 let indexes = read_local_pack_indexes(objects_dir)?;
268 let mut info = LocalPackInfo::default();
269 for idx in indexes {
270 let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
271 let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
272 info.pack_count += 1;
273 info.object_count += idx.entries.len();
274 info.size_bytes += pack_meta.len() + idx_meta.len();
275 for entry in idx.entries {
276 info.object_ids.insert(entry.oid);
277 }
278 }
279 Ok(info)
280}
281
282pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
288 let bytes = fs::read(idx_path).map_err(Error::Io)?;
289 if bytes.len() < 8 + 256 * 4 + 40 {
290 return Err(Error::CorruptObject(format!(
291 "index file {} is too small",
292 idx_path.display()
293 )));
294 }
295
296 let mut pos = 0usize;
297 let magic = &bytes[pos..pos + 4];
298 pos += 4;
299 if magic != [0xff, b't', b'O', b'c'] {
300 return Err(Error::CorruptObject(format!(
301 "unsupported idx signature in {}",
302 idx_path.display()
303 )));
304 }
305 let version = read_u32_be(&bytes, &mut pos)?;
306 if version != 2 {
307 return Err(Error::CorruptObject(format!(
308 "unsupported idx version {} in {}",
309 version,
310 idx_path.display()
311 )));
312 }
313
314 let mut fanout = [0u32; 256];
315 for slot in &mut fanout {
316 *slot = read_u32_be(&bytes, &mut pos)?;
317 }
318 let object_count = fanout[255] as usize;
319
320 let need = pos
321 .saturating_add(object_count * 20)
322 .saturating_add(object_count * 4)
323 .saturating_add(object_count * 4)
324 .saturating_add(40);
325 if bytes.len() < need {
326 return Err(Error::CorruptObject(format!(
327 "truncated idx file {}",
328 idx_path.display()
329 )));
330 }
331
332 let mut oids = Vec::with_capacity(object_count);
333 for _ in 0..object_count {
334 let oid = ObjectId::from_bytes(&bytes[pos..pos + 20])?;
335 pos += 20;
336 oids.push(oid);
337 }
338
339 pos += object_count * 4;
341
342 let mut offsets32 = Vec::with_capacity(object_count);
343 let mut large_count = 0usize;
344 for _ in 0..object_count {
345 let v = read_u32_be(&bytes, &mut pos)?;
346 if (v & 0x8000_0000) != 0 {
347 large_count += 1;
348 }
349 offsets32.push(v);
350 }
351
352 if bytes.len() < pos + large_count * 8 + 40 {
353 return Err(Error::CorruptObject(format!(
354 "truncated large offset table in {}",
355 idx_path.display()
356 )));
357 }
358 let mut large_offsets = Vec::with_capacity(large_count);
359 for _ in 0..large_count {
360 large_offsets.push(read_u64_be(&bytes, &mut pos)?);
361 }
362
363 let mut next_large = 0usize;
364 let mut entries = Vec::with_capacity(object_count);
365 for (i, oid) in oids.into_iter().enumerate() {
366 let raw = offsets32[i];
367 let offset = if (raw & 0x8000_0000) == 0 {
368 raw as u64
369 } else {
370 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
371 Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
372 })?;
373 next_large += 1;
374 off
375 };
376 entries.push(PackIndexEntry { oid, offset });
377 }
378
379 let mut pack_path = idx_path.to_path_buf();
380 pack_path.set_extension("pack");
381
382 if bytes.len() < 20 {
384 return Err(Error::CorruptObject(format!(
385 "index file {} missing checksum",
386 idx_path.display()
387 )));
388 }
389 let idx_body_end = bytes.len() - 20;
390 let mut h = Sha1::new();
391 h.update(&bytes[..idx_body_end]);
392 let digest = h.finalize();
393 if digest.as_slice() != &bytes[idx_body_end..] {
394 return Err(Error::CorruptObject(format!(
395 "index checksum mismatch for {}",
396 idx_path.display()
397 )));
398 }
399
400 Ok(PackIndex {
401 idx_path: idx_path.to_path_buf(),
402 pack_path,
403 entries,
404 })
405}
406
407#[derive(Debug, Clone, Copy, PartialEq, Eq)]
409pub enum PackedType {
410 Commit,
412 Tree,
414 Blob,
416 Tag,
418 OfsDelta,
420 RefDelta,
422}
423
424impl PackedType {
425 #[must_use]
427 pub fn as_str(self) -> &'static str {
428 match self {
429 Self::Commit => "commit",
430 Self::Tree => "tree",
431 Self::Blob => "blob",
432 Self::Tag => "tag",
433 Self::OfsDelta => "ofs-delta",
434 Self::RefDelta => "ref-delta",
435 }
436 }
437}
438
439#[derive(Debug, Clone)]
441pub struct VerifyObjectRecord {
442 pub oid: ObjectId,
444 pub packed_type: PackedType,
446 pub size: u64,
448 pub size_in_pack: u64,
450 pub offset: u64,
452 pub depth: Option<u64>,
454 pub base_oid: Option<ObjectId>,
456}
457
458pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
464 let idx = read_pack_index(idx_path)?;
465 let idx_file_bytes = fs::read(idx_path).map_err(Error::Io)?;
466 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
467 if pack_bytes.len() < 12 + 20 {
468 return Err(Error::CorruptObject(format!(
469 "pack file {} is too small",
470 idx.pack_path.display()
471 )));
472 }
473 let pack_end = pack_bytes.len() - 20;
474 {
475 let mut h = Sha1::new();
476 h.update(&pack_bytes[..pack_end]);
477 let digest = h.finalize();
478 if digest.as_slice() != &pack_bytes[pack_end..] {
479 return Err(Error::CorruptObject(format!(
480 "pack trailing checksum mismatch for {}",
481 idx.pack_path.display()
482 )));
483 }
484 }
485 if idx_file_bytes.len() >= 40 {
486 let embedded = &idx_file_bytes[idx_file_bytes.len() - 40..idx_file_bytes.len() - 20];
487 if embedded != &pack_bytes[pack_end..] {
488 return Err(Error::CorruptObject(format!(
489 "pack checksum in index does not match {}",
490 idx.pack_path.display()
491 )));
492 }
493 }
494 if &pack_bytes[0..4] != b"PACK" {
495 return Err(Error::CorruptObject(format!(
496 "pack file {} has invalid signature",
497 idx.pack_path.display()
498 )));
499 }
500 let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
501 if version != 2 && version != 3 {
502 return Err(Error::CorruptObject(format!(
503 "unsupported pack version {} in {}",
504 version,
505 idx.pack_path.display()
506 )));
507 }
508 let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
509 if count != idx.entries.len() {
510 return Err(Error::CorruptObject(format!(
511 "pack/index object count mismatch for {}",
512 idx.pack_path.display()
513 )));
514 }
515
516 let mut by_offset: BTreeMap<u64, ObjectId> = BTreeMap::new();
517 for entry in &idx.entries {
518 by_offset.insert(entry.offset, entry.oid);
519 }
520 let offsets: Vec<u64> = by_offset.keys().copied().collect();
521 if offsets.is_empty() {
522 return Ok(Vec::new());
523 }
524
525 let mut by_oid: HashMap<ObjectId, usize> = HashMap::new();
526 let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
527 for (i, offset) in offsets.iter().copied().enumerate() {
528 let oid = by_offset.get(&offset).copied().ok_or_else(|| {
529 Error::CorruptObject(format!("missing object id for offset {}", offset))
530 })?;
531 let next_off = offsets
532 .get(i + 1)
533 .copied()
534 .unwrap_or((pack_bytes.len() - 20) as u64);
535 if next_off <= offset || next_off > (pack_bytes.len() - 20) as u64 {
536 return Err(Error::CorruptObject(format!(
537 "invalid object boundaries at offset {} in {}",
538 offset,
539 idx.pack_path.display()
540 )));
541 }
542 let mut p = offset as usize;
543 let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
544 let mut base_oid = None;
545 let mut depth = None;
546
547 match packed_type {
548 PackedType::RefDelta => {
549 if p + 20 > pack_bytes.len() {
550 return Err(Error::CorruptObject(format!(
551 "truncated ref-delta base at offset {}",
552 offset
553 )));
554 }
555 base_oid = Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?);
556 }
557 PackedType::OfsDelta => {
558 let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
559 let base_depth = records
560 .iter()
561 .find(|r| r.offset == base_offset)
562 .and_then(|r| r.depth)
563 .unwrap_or(0);
564 depth = Some(base_depth + 1);
565 }
566 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
567 }
568
569 let size_in_pack = next_off - offset;
570 records.push(VerifyObjectRecord {
571 oid,
572 packed_type,
573 size,
574 size_in_pack,
575 offset,
576 depth,
577 base_oid,
578 });
579 by_oid.insert(oid, i);
580 }
581
582 for i in 0..records.len() {
584 if records[i].packed_type != PackedType::RefDelta {
585 continue;
586 }
587 let base = records[i]
588 .base_oid
589 .ok_or_else(|| Error::CorruptObject("ref-delta missing base oid".to_owned()))?;
590 let base_depth = by_oid
591 .get(&base)
592 .and_then(|idx| records.get(*idx))
593 .and_then(|r| r.depth)
594 .unwrap_or(0);
595 records[i].depth = Some(base_depth + 1);
596 }
597
598 for entry in &idx.entries {
600 let obj = read_object_from_pack(&idx, &entry.oid)?;
601 let computed = Odb::hash_object_data(obj.kind, &obj.data);
602 if computed != entry.oid {
603 return Err(Error::CorruptObject(format!(
604 "pack object hash mismatch at offset {} (index says {})",
605 entry.offset, entry.oid
606 )));
607 }
608 }
609
610 Ok(records)
611}
612
613pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
619 let mut visited = HashSet::new();
620 let mut out = Vec::new();
621 read_alternates_inner(objects_dir, &mut visited, &mut out, 0)?;
622 Ok(out)
623}
624
625const MAX_ALTERNATE_DEPTH: usize = 5;
627
628fn read_alternates_inner(
629 objects_dir: &Path,
630 visited: &mut HashSet<PathBuf>,
631 out: &mut Vec<PathBuf>,
632 depth: usize,
633) -> Result<()> {
634 if depth > MAX_ALTERNATE_DEPTH {
635 return Ok(());
636 }
637 let canonical = canonical_or_self(objects_dir);
638 let alt_file = canonical.join("info").join("alternates");
639 let text = match fs::read_to_string(&alt_file) {
640 Ok(text) => text,
641 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
642 Err(err) => return Err(Error::Io(err)),
643 };
644
645 for raw in text.lines() {
646 let line = raw.trim();
647 if line.is_empty() {
648 continue;
649 }
650 let candidate = if Path::new(line).is_absolute() {
651 PathBuf::from(line)
652 } else {
653 canonical.join(line)
654 };
655 let candidate = canonical_or_self(&candidate);
656 if visited.insert(candidate.clone()) {
657 out.push(candidate.clone());
658 read_alternates_inner(&candidate, visited, out, depth + 1)?;
659 }
660 }
661 Ok(())
662}
663
664fn canonical_or_self(path: &Path) -> PathBuf {
665 fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
666}
667
668fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
670 match pt {
671 PackedType::Commit => Ok(ObjectKind::Commit),
672 PackedType::Tree => Ok(ObjectKind::Tree),
673 PackedType::Blob => Ok(ObjectKind::Blob),
674 PackedType::Tag => Ok(ObjectKind::Tag),
675 PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
676 "cannot convert delta type to object kind directly".to_owned(),
677 )),
678 }
679}
680
681fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
686 let slice = &bytes[*pos..];
687 let mut decoder = ZlibDecoder::new(slice);
688 let mut out = Vec::with_capacity(expected_size as usize);
689 decoder
690 .read_to_end(&mut out)
691 .map_err(|e| Error::Zlib(e.to_string()))?;
692 *pos += decoder.total_in() as usize;
693 Ok(out)
694}
695
696fn read_pack_object_at(
701 pack_bytes: &[u8],
702 offset: u64,
703 idx: &PackIndex,
704 depth: usize,
705) -> Result<(ObjectKind, Vec<u8>)> {
706 if depth > 50 {
707 return Err(Error::CorruptObject(
708 "delta chain too deep (>50)".to_owned(),
709 ));
710 }
711 let mut pos = offset as usize;
712 let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
713
714 match packed_type {
715 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
716 let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
717 let kind = packed_type_to_kind(packed_type)?;
718 Ok((kind, data))
719 }
720 PackedType::OfsDelta => {
721 let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
722 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
723 let (base_kind, base_data) =
724 read_pack_object_at(pack_bytes, base_offset, idx, depth + 1)?;
725 let result = apply_delta(&base_data, &delta_data)?;
726 Ok((base_kind, result))
727 }
728 PackedType::RefDelta => {
729 if pos + 20 > pack_bytes.len() {
730 return Err(Error::CorruptObject(
731 "truncated ref-delta base OID".to_owned(),
732 ));
733 }
734 let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
735 pos += 20;
736 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
737 let base_entry = idx
739 .entries
740 .iter()
741 .find(|e| e.oid == base_oid)
742 .ok_or_else(|| {
743 Error::CorruptObject(format!("ref-delta base {} not found in pack", base_oid))
744 })?;
745 let (base_kind, base_data) =
746 read_pack_object_at(pack_bytes, base_entry.offset, idx, depth + 1)?;
747 let result = apply_delta(&base_data, &delta_data)?;
748 Ok((base_kind, result))
749 }
750 }
751}
752
753pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
762 let entry = idx
763 .entries
764 .iter()
765 .find(|e| e.oid == *oid)
766 .ok_or_else(|| Error::ObjectNotFound(oid.to_hex()))?;
767
768 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
769 let (kind, data) = read_pack_object_at(&pack_bytes, entry.offset, idx, 0)?;
770 Ok(Object::new(kind, data))
771}
772
773pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
779 let indexes = read_local_pack_indexes(objects_dir)?;
780 for idx in &indexes {
781 if idx.entries.iter().any(|e| e.oid == *oid) {
782 return read_object_from_pack(idx, oid);
783 }
784 }
785 Err(Error::ObjectNotFound(oid.to_hex()))
786}
787
788pub fn packed_ref_delta_reuse_slice(
799 objects_dir: &Path,
800 oid: &ObjectId,
801 packed_set: &HashSet<ObjectId>,
802) -> Result<Option<(ObjectId, Vec<u8>)>> {
803 let mut indexes = read_local_pack_indexes(objects_dir)?;
804 sort_pack_indexes_oldest_first(&mut indexes);
805 for idx in indexes {
806 let Some(entry) = idx.entries.iter().find(|e| e.oid == *oid) else {
807 continue;
808 };
809 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
810 let mut p = entry.offset as usize;
811 let (packed_type, _size) = parse_pack_object_header(&pack_bytes, &mut p)?;
812 let base = match packed_type {
813 PackedType::RefDelta => {
814 if p + 20 > pack_bytes.len() {
815 return Err(Error::CorruptObject(
816 "truncated ref-delta base oid while scanning for reuse".to_owned(),
817 ));
818 }
819 let oid = ObjectId::from_bytes(&pack_bytes[p..p + 20])?;
820 p += 20;
821 oid
822 }
823 PackedType::OfsDelta => {
824 let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
825 let Some(base_entry) = idx.entries.iter().find(|e| e.offset == base_off) else {
826 continue;
827 };
828 base_entry.oid
829 }
830 _ => {
831 continue;
834 }
835 };
836 if !packed_set.contains(&base) {
837 continue;
838 }
839 let zlib_start = p;
840 let mut end_pos = zlib_start;
841 if skip_one_pack_object(&pack_bytes, &mut end_pos, entry.offset).is_err() {
842 continue;
843 }
844 let compressed = &pack_bytes[zlib_start..end_pos];
845 let mut dec = ZlibDecoder::new(compressed);
846 let mut delta = Vec::new();
847 if dec.read_to_end(&mut delta).is_err() {
848 continue;
849 }
850 return Ok(Some((base, delta)));
851 }
852 Ok(None)
853}
854
855fn sort_pack_indexes_oldest_first(indexes: &mut [PackIndex]) {
858 indexes.sort_by(|a, b| {
859 let ta = fs::metadata(&a.pack_path)
860 .and_then(|m| m.modified())
861 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
862 let tb = fs::metadata(&b.pack_path)
863 .and_then(|m| m.modified())
864 .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
865 ta.cmp(&tb).then_with(|| a.pack_path.cmp(&b.pack_path))
866 });
867}
868
869pub fn packed_delta_base_oid(objects_dir: &Path, oid: &ObjectId) -> Result<Option<ObjectId>> {
870 let mut indexes = read_local_pack_indexes(objects_dir)?;
871 sort_pack_indexes_oldest_first(&mut indexes);
872 for idx in &indexes {
873 let Some(entry) = idx.entries.iter().find(|e| e.oid == *oid) else {
874 continue;
875 };
876 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
877 let mut p = entry.offset as usize;
878 let (packed_type, _) = parse_pack_object_header(&pack_bytes, &mut p)?;
879 match packed_type {
880 PackedType::RefDelta => {
881 if p + 20 > pack_bytes.len() {
882 return Err(Error::CorruptObject("truncated ref-delta base".to_owned()));
883 }
884 return Ok(Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?));
885 }
886 PackedType::OfsDelta => {
887 let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
888 return Ok(idx
889 .entries
890 .iter()
891 .find(|e| e.offset == base_off)
892 .map(|e| e.oid));
893 }
894 _ => continue,
895 }
896 }
897 Ok(None)
898}
899
900fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
901 let first = *bytes.get(*pos).ok_or_else(|| {
902 Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
903 })?;
904 *pos += 1;
905
906 let type_code = (first >> 4) & 0x7;
907 let mut size = (first & 0x0f) as u64;
908 let mut shift = 4u32;
909 let mut c = first;
910 while (c & 0x80) != 0 {
911 c = *bytes.get(*pos).ok_or_else(|| {
912 Error::CorruptObject("unexpected end of variable size header".to_owned())
913 })?;
914 *pos += 1;
915 size |= ((c & 0x7f) as u64) << shift;
916 shift += 7;
917 }
918
919 let packed_type = match type_code {
920 1 => PackedType::Commit,
921 2 => PackedType::Tree,
922 3 => PackedType::Blob,
923 4 => PackedType::Tag,
924 6 => PackedType::OfsDelta,
925 7 => PackedType::RefDelta,
926 _ => {
927 return Err(Error::CorruptObject(format!(
928 "unsupported packed object type {}",
929 type_code
930 )))
931 }
932 };
933 Ok((packed_type, size))
934}
935
936fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
937 let mut c = *bytes
938 .get(*pos)
939 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
940 *pos += 1;
941 let mut value = (c & 0x7f) as u64;
942 while (c & 0x80) != 0 {
943 c = *bytes
944 .get(*pos)
945 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
946 *pos += 1;
947 value = ((value + 1) << 7) | (c & 0x7f) as u64;
948 }
949 this_offset
950 .checked_sub(value)
951 .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
952}
953
954pub fn skip_one_pack_object(bytes: &[u8], pos: &mut usize, object_start_offset: u64) -> Result<()> {
959 let (packed_type, size) = parse_pack_object_header(bytes, pos)?;
960 match packed_type {
961 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
962 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
963 let mut tmp = Vec::with_capacity(size as usize);
964 dec.read_to_end(&mut tmp)
965 .map_err(|e| Error::Zlib(e.to_string()))?;
966 *pos += dec.total_in() as usize;
967 }
968 PackedType::RefDelta => {
969 if *pos + 20 > bytes.len() {
970 return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
971 }
972 *pos += 20;
973 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
974 let mut tmp = Vec::with_capacity(size as usize);
975 dec.read_to_end(&mut tmp)
976 .map_err(|e| Error::Zlib(e.to_string()))?;
977 *pos += dec.total_in() as usize;
978 }
979 PackedType::OfsDelta => {
980 let _base_off = parse_ofs_delta_base(bytes, pos, object_start_offset)?;
981 let mut dec = ZlibDecoder::new(&bytes[*pos..]);
982 let mut tmp = Vec::with_capacity(size as usize);
983 dec.read_to_end(&mut tmp)
984 .map_err(|e| Error::Zlib(e.to_string()))?;
985 *pos += dec.total_in() as usize;
986 }
987 }
988 Ok(())
989}
990
991fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
992 if bytes.len() < *pos + 4 {
993 return Err(Error::CorruptObject(
994 "unexpected end of idx while reading u32".to_owned(),
995 ));
996 }
997 let v = u32::from_be_bytes(
998 bytes[*pos..*pos + 4]
999 .try_into()
1000 .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
1001 );
1002 *pos += 4;
1003 Ok(v)
1004}
1005
1006fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
1007 if bytes.len() < *pos + 8 {
1008 return Err(Error::CorruptObject(
1009 "unexpected end of idx while reading u64".to_owned(),
1010 ));
1011 }
1012 let v = u64::from_be_bytes(
1013 bytes[*pos..*pos + 8]
1014 .try_into()
1015 .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
1016 );
1017 *pos += 8;
1018 Ok(v)
1019}
1020
1021pub fn read_idx_object_ids(idx_path: &Path) -> Result<Vec<ObjectId>> {
1023 let index = read_pack_index(idx_path)?;
1024 Ok(index.entries.into_iter().map(|e| e.oid).collect())
1025}