1use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::unpack_objects::apply_delta;
9use flate2::read::ZlibDecoder;
10use std::collections::{BTreeMap, HashMap, HashSet};
11use std::fs;
12use std::io;
13use std::io::Read;
14use std::path::{Path, PathBuf};
15
16#[derive(Debug, Clone)]
18pub struct PackIndexEntry {
19 pub oid: ObjectId,
21 pub offset: u64,
23}
24
25#[derive(Debug, Clone)]
27pub struct PackIndex {
28 pub idx_path: PathBuf,
30 pub pack_path: PathBuf,
32 pub entries: Vec<PackIndexEntry>,
34}
35
36#[derive(Debug, Clone)]
41pub struct ShowIndexEntry {
42 pub oid: ObjectId,
44 pub offset: u64,
46 pub crc32: Option<u32>,
48}
49
50pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
61 let mut buf = Vec::new();
62 reader.read_to_end(&mut buf).map_err(Error::Io)?;
63
64 if buf.len() < 8 {
65 return Err(Error::CorruptObject(
66 "unable to read header: index file too small".to_owned(),
67 ));
68 }
69
70 let mut pos = 0usize;
71 let first_u32 = read_u32_be(&buf, &mut pos)?;
72
73 const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
74
75 if first_u32 == PACK_IDX_SIGNATURE {
76 let version = read_u32_be(&buf, &mut pos)?;
78 if version != 2 {
79 return Err(Error::CorruptObject(format!(
80 "unknown index version: {version}"
81 )));
82 }
83 show_index_v2(&buf, &mut pos, hash_size)
84 } else {
85 pos = 0;
88 show_index_v1(&buf, &mut pos, hash_size)
89 }
90}
91
92fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
94 if buf.len() < 256 * 4 {
95 return Err(Error::CorruptObject(
96 "unable to read index: v1 fanout too short".to_owned(),
97 ));
98 }
99 let mut fanout = [0u32; 256];
100 for slot in &mut fanout {
101 *slot = read_u32_be(buf, pos)?;
102 }
103 let object_count = fanout[255] as usize;
104
105 let mut entries = Vec::with_capacity(object_count);
106 for i in 0..object_count {
107 if *pos + 4 + hash_size > buf.len() {
109 return Err(Error::CorruptObject(format!(
110 "unable to read entry {i}/{object_count}: truncated"
111 )));
112 }
113 let offset = read_u32_be(buf, pos)? as u64;
114 let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
115 *pos += hash_size;
116 entries.push(ShowIndexEntry {
117 oid,
118 offset,
119 crc32: None,
120 });
121 }
122 Ok(entries)
123}
124
125fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
128 if buf.len() < *pos + 256 * 4 {
129 return Err(Error::CorruptObject(
130 "unable to read index: v2 fanout too short".to_owned(),
131 ));
132 }
133 let mut fanout = [0u32; 256];
134 for slot in &mut fanout {
135 *slot = read_u32_be(buf, pos)?;
136 }
137 let object_count = fanout[255] as usize;
138
139 let mut oids = Vec::with_capacity(object_count);
141 for i in 0..object_count {
142 if *pos + hash_size > buf.len() {
143 return Err(Error::CorruptObject(format!(
144 "unable to read sha1 {i}/{object_count}: truncated"
145 )));
146 }
147 let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
148 *pos += hash_size;
149 oids.push(oid);
150 }
151
152 let mut crcs = Vec::with_capacity(object_count);
154 for i in 0..object_count {
155 if *pos + 4 > buf.len() {
156 return Err(Error::CorruptObject(format!(
157 "unable to read crc {i}/{object_count}: truncated"
158 )));
159 }
160 crcs.push(read_u32_be(buf, pos)?);
161 }
162
163 let mut offsets32 = Vec::with_capacity(object_count);
165 let mut large_count = 0usize;
166 for i in 0..object_count {
167 if *pos + 4 > buf.len() {
168 return Err(Error::CorruptObject(format!(
169 "unable to read 32b offset {i}/{object_count}: truncated"
170 )));
171 }
172 let v = read_u32_be(buf, pos)?;
173 if (v & 0x8000_0000) != 0 {
174 large_count += 1;
175 }
176 offsets32.push(v);
177 }
178
179 let mut large_offsets = Vec::with_capacity(large_count);
181 for i in 0..large_count {
182 if *pos + 8 > buf.len() {
183 return Err(Error::CorruptObject(format!(
184 "unable to read 64b offset {i}: truncated"
185 )));
186 }
187 large_offsets.push(read_u64_be(buf, pos)?);
188 }
189
190 let mut next_large = 0usize;
191 let mut entries = Vec::with_capacity(object_count);
192 for (i, oid) in oids.into_iter().enumerate() {
193 let raw = offsets32[i];
194 let offset = if (raw & 0x8000_0000) == 0 {
195 raw as u64
196 } else {
197 let idx = (raw & 0x7fff_ffff) as usize;
198 if idx != next_large {
199 return Err(Error::CorruptObject(format!(
200 "inconsistent 64b offset index at entry {i}"
201 )));
202 }
203 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
204 Error::CorruptObject(format!("missing large offset entry {next_large}"))
205 })?;
206 next_large += 1;
207 off
208 };
209 entries.push(ShowIndexEntry {
210 oid,
211 offset,
212 crc32: Some(crcs[i]),
213 });
214 }
215 Ok(entries)
216}
217
218#[derive(Debug, Clone, Default)]
220pub struct LocalPackInfo {
221 pub pack_count: usize,
223 pub object_count: usize,
225 pub size_bytes: u64,
227 pub object_ids: HashSet<ObjectId>,
229}
230
231pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
238 let pack_dir = objects_dir.join("pack");
239 let rd = match fs::read_dir(&pack_dir) {
240 Ok(rd) => rd,
241 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
242 Err(err) => return Err(Error::Io(err)),
243 };
244
245 let mut out = Vec::new();
246 for entry in rd {
247 let entry = entry.map_err(Error::Io)?;
248 let path = entry.path();
249 if path.extension().and_then(|s| s.to_str()) != Some("idx") {
250 continue;
251 }
252 if let Ok(idx) = read_pack_index(&path) {
253 out.push(idx);
254 }
255 }
256 Ok(out)
257}
258
259pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
265 let indexes = read_local_pack_indexes(objects_dir)?;
266 let mut info = LocalPackInfo::default();
267 for idx in indexes {
268 let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
269 let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
270 info.pack_count += 1;
271 info.object_count += idx.entries.len();
272 info.size_bytes += pack_meta.len() + idx_meta.len();
273 for entry in idx.entries {
274 info.object_ids.insert(entry.oid);
275 }
276 }
277 Ok(info)
278}
279
280pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
286 let bytes = fs::read(idx_path).map_err(Error::Io)?;
287 if bytes.len() < 8 + 256 * 4 + 40 {
288 return Err(Error::CorruptObject(format!(
289 "index file {} is too small",
290 idx_path.display()
291 )));
292 }
293
294 let mut pos = 0usize;
295 let magic = &bytes[pos..pos + 4];
296 pos += 4;
297 if magic != [0xff, b't', b'O', b'c'] {
298 return Err(Error::CorruptObject(format!(
299 "unsupported idx signature in {}",
300 idx_path.display()
301 )));
302 }
303 let version = read_u32_be(&bytes, &mut pos)?;
304 if version != 2 {
305 return Err(Error::CorruptObject(format!(
306 "unsupported idx version {} in {}",
307 version,
308 idx_path.display()
309 )));
310 }
311
312 let mut fanout = [0u32; 256];
313 for slot in &mut fanout {
314 *slot = read_u32_be(&bytes, &mut pos)?;
315 }
316 let object_count = fanout[255] as usize;
317
318 let need = pos
319 .saturating_add(object_count * 20)
320 .saturating_add(object_count * 4)
321 .saturating_add(object_count * 4)
322 .saturating_add(40);
323 if bytes.len() < need {
324 return Err(Error::CorruptObject(format!(
325 "truncated idx file {}",
326 idx_path.display()
327 )));
328 }
329
330 let mut oids = Vec::with_capacity(object_count);
331 for _ in 0..object_count {
332 let oid = ObjectId::from_bytes(&bytes[pos..pos + 20])?;
333 pos += 20;
334 oids.push(oid);
335 }
336
337 pos += object_count * 4;
339
340 let mut offsets32 = Vec::with_capacity(object_count);
341 let mut large_count = 0usize;
342 for _ in 0..object_count {
343 let v = read_u32_be(&bytes, &mut pos)?;
344 if (v & 0x8000_0000) != 0 {
345 large_count += 1;
346 }
347 offsets32.push(v);
348 }
349
350 if bytes.len() < pos + large_count * 8 + 40 {
351 return Err(Error::CorruptObject(format!(
352 "truncated large offset table in {}",
353 idx_path.display()
354 )));
355 }
356 let mut large_offsets = Vec::with_capacity(large_count);
357 for _ in 0..large_count {
358 large_offsets.push(read_u64_be(&bytes, &mut pos)?);
359 }
360
361 let mut next_large = 0usize;
362 let mut entries = Vec::with_capacity(object_count);
363 for (i, oid) in oids.into_iter().enumerate() {
364 let raw = offsets32[i];
365 let offset = if (raw & 0x8000_0000) == 0 {
366 raw as u64
367 } else {
368 let off = large_offsets.get(next_large).copied().ok_or_else(|| {
369 Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
370 })?;
371 next_large += 1;
372 off
373 };
374 entries.push(PackIndexEntry { oid, offset });
375 }
376
377 let mut pack_path = idx_path.to_path_buf();
378 pack_path.set_extension("pack");
379 Ok(PackIndex {
380 idx_path: idx_path.to_path_buf(),
381 pack_path,
382 entries,
383 })
384}
385
386#[derive(Debug, Clone, Copy, PartialEq, Eq)]
388pub enum PackedType {
389 Commit,
391 Tree,
393 Blob,
395 Tag,
397 OfsDelta,
399 RefDelta,
401}
402
403impl PackedType {
404 #[must_use]
406 pub fn as_str(self) -> &'static str {
407 match self {
408 Self::Commit => "commit",
409 Self::Tree => "tree",
410 Self::Blob => "blob",
411 Self::Tag => "tag",
412 Self::OfsDelta => "ofs-delta",
413 Self::RefDelta => "ref-delta",
414 }
415 }
416}
417
418#[derive(Debug, Clone)]
420pub struct VerifyObjectRecord {
421 pub oid: ObjectId,
423 pub packed_type: PackedType,
425 pub size: u64,
427 pub size_in_pack: u64,
429 pub offset: u64,
431 pub depth: Option<u64>,
433 pub base_oid: Option<ObjectId>,
435}
436
437pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
443 let idx = read_pack_index(idx_path)?;
444 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
445 if pack_bytes.len() < 12 + 20 {
446 return Err(Error::CorruptObject(format!(
447 "pack file {} is too small",
448 idx.pack_path.display()
449 )));
450 }
451 if &pack_bytes[0..4] != b"PACK" {
452 return Err(Error::CorruptObject(format!(
453 "pack file {} has invalid signature",
454 idx.pack_path.display()
455 )));
456 }
457 let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
458 if version != 2 && version != 3 {
459 return Err(Error::CorruptObject(format!(
460 "unsupported pack version {} in {}",
461 version,
462 idx.pack_path.display()
463 )));
464 }
465 let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
466 if count != idx.entries.len() {
467 return Err(Error::CorruptObject(format!(
468 "pack/index object count mismatch for {}",
469 idx.pack_path.display()
470 )));
471 }
472
473 let mut by_offset: BTreeMap<u64, ObjectId> = BTreeMap::new();
474 for entry in &idx.entries {
475 by_offset.insert(entry.offset, entry.oid);
476 }
477 let offsets: Vec<u64> = by_offset.keys().copied().collect();
478 if offsets.is_empty() {
479 return Ok(Vec::new());
480 }
481
482 let mut by_oid: HashMap<ObjectId, usize> = HashMap::new();
483 let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
484 for (i, offset) in offsets.iter().copied().enumerate() {
485 let oid = by_offset.get(&offset).copied().ok_or_else(|| {
486 Error::CorruptObject(format!("missing object id for offset {}", offset))
487 })?;
488 let next_off = offsets
489 .get(i + 1)
490 .copied()
491 .unwrap_or((pack_bytes.len() - 20) as u64);
492 if next_off <= offset || next_off > (pack_bytes.len() - 20) as u64 {
493 return Err(Error::CorruptObject(format!(
494 "invalid object boundaries at offset {} in {}",
495 offset,
496 idx.pack_path.display()
497 )));
498 }
499 let mut p = offset as usize;
500 let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
501 let mut base_oid = None;
502 let mut depth = None;
503
504 match packed_type {
505 PackedType::RefDelta => {
506 if p + 20 > pack_bytes.len() {
507 return Err(Error::CorruptObject(format!(
508 "truncated ref-delta base at offset {}",
509 offset
510 )));
511 }
512 base_oid = Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?);
513 }
514 PackedType::OfsDelta => {
515 let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
516 let base_depth = records
517 .iter()
518 .find(|r| r.offset == base_offset)
519 .and_then(|r| r.depth)
520 .unwrap_or(0);
521 depth = Some(base_depth + 1);
522 }
523 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
524 }
525
526 let size_in_pack = next_off - offset;
527 records.push(VerifyObjectRecord {
528 oid,
529 packed_type,
530 size,
531 size_in_pack,
532 offset,
533 depth,
534 base_oid,
535 });
536 by_oid.insert(oid, i);
537 }
538
539 for i in 0..records.len() {
541 if records[i].packed_type != PackedType::RefDelta {
542 continue;
543 }
544 let base = records[i]
545 .base_oid
546 .ok_or_else(|| Error::CorruptObject("ref-delta missing base oid".to_owned()))?;
547 let base_depth = by_oid
548 .get(&base)
549 .and_then(|idx| records.get(*idx))
550 .and_then(|r| r.depth)
551 .unwrap_or(0);
552 records[i].depth = Some(base_depth + 1);
553 }
554
555 Ok(records)
556}
557
558pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
564 let mut visited = HashSet::new();
565 let mut out = Vec::new();
566 read_alternates_inner(objects_dir, &mut visited, &mut out)?;
567 Ok(out)
568}
569
570fn read_alternates_inner(
571 objects_dir: &Path,
572 visited: &mut HashSet<PathBuf>,
573 out: &mut Vec<PathBuf>,
574) -> Result<()> {
575 let canonical = canonical_or_self(objects_dir);
576 let alt_file = canonical.join("info").join("alternates");
577 let text = match fs::read_to_string(&alt_file) {
578 Ok(text) => text,
579 Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
580 Err(err) => return Err(Error::Io(err)),
581 };
582
583 for raw in text.lines() {
584 let line = raw.trim();
585 if line.is_empty() {
586 continue;
587 }
588 let candidate = if Path::new(line).is_absolute() {
589 PathBuf::from(line)
590 } else {
591 canonical.join(line)
592 };
593 let candidate = canonical_or_self(&candidate);
594 if visited.insert(candidate.clone()) {
595 out.push(candidate.clone());
596 read_alternates_inner(&candidate, visited, out)?;
597 }
598 }
599 Ok(())
600}
601
602fn canonical_or_self(path: &Path) -> PathBuf {
603 fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
604}
605
606fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
608 match pt {
609 PackedType::Commit => Ok(ObjectKind::Commit),
610 PackedType::Tree => Ok(ObjectKind::Tree),
611 PackedType::Blob => Ok(ObjectKind::Blob),
612 PackedType::Tag => Ok(ObjectKind::Tag),
613 PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
614 "cannot convert delta type to object kind directly".to_owned(),
615 )),
616 }
617}
618
619fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
624 let slice = &bytes[*pos..];
625 let mut decoder = ZlibDecoder::new(slice);
626 let mut out = Vec::with_capacity(expected_size as usize);
627 decoder
628 .read_to_end(&mut out)
629 .map_err(|e| Error::Zlib(e.to_string()))?;
630 *pos += decoder.total_in() as usize;
631 Ok(out)
632}
633
634fn read_pack_object_at(
639 pack_bytes: &[u8],
640 offset: u64,
641 idx: &PackIndex,
642 depth: usize,
643) -> Result<(ObjectKind, Vec<u8>)> {
644 if depth > 50 {
645 return Err(Error::CorruptObject(
646 "delta chain too deep (>50)".to_owned(),
647 ));
648 }
649 let mut pos = offset as usize;
650 let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
651
652 match packed_type {
653 PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
654 let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
655 let kind = packed_type_to_kind(packed_type)?;
656 Ok((kind, data))
657 }
658 PackedType::OfsDelta => {
659 let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
660 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
661 let (base_kind, base_data) =
662 read_pack_object_at(pack_bytes, base_offset, idx, depth + 1)?;
663 let result = apply_delta(&base_data, &delta_data)?;
664 Ok((base_kind, result))
665 }
666 PackedType::RefDelta => {
667 if pos + 20 > pack_bytes.len() {
668 return Err(Error::CorruptObject(
669 "truncated ref-delta base OID".to_owned(),
670 ));
671 }
672 let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
673 pos += 20;
674 let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
675 let base_entry = idx
677 .entries
678 .iter()
679 .find(|e| e.oid == base_oid)
680 .ok_or_else(|| {
681 Error::CorruptObject(format!(
682 "ref-delta base {} not found in pack",
683 base_oid
684 ))
685 })?;
686 let (base_kind, base_data) =
687 read_pack_object_at(pack_bytes, base_entry.offset, idx, depth + 1)?;
688 let result = apply_delta(&base_data, &delta_data)?;
689 Ok((base_kind, result))
690 }
691 }
692}
693
694pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
703 let entry = idx
704 .entries
705 .iter()
706 .find(|e| e.oid == *oid)
707 .ok_or_else(|| Error::ObjectNotFound(oid.to_hex()))?;
708
709 let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
710 let (kind, data) = read_pack_object_at(&pack_bytes, entry.offset, idx, 0)?;
711 Ok(Object::new(kind, data))
712}
713
714pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
720 let indexes = read_local_pack_indexes(objects_dir)?;
721 for idx in &indexes {
722 if idx.entries.iter().any(|e| e.oid == *oid) {
723 return read_object_from_pack(idx, oid);
724 }
725 }
726 Err(Error::ObjectNotFound(oid.to_hex()))
727}
728
729fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
730 let first = *bytes.get(*pos).ok_or_else(|| {
731 Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
732 })?;
733 *pos += 1;
734
735 let type_code = (first >> 4) & 0x7;
736 let mut size = (first & 0x0f) as u64;
737 let mut shift = 4u32;
738 let mut c = first;
739 while (c & 0x80) != 0 {
740 c = *bytes.get(*pos).ok_or_else(|| {
741 Error::CorruptObject("unexpected end of variable size header".to_owned())
742 })?;
743 *pos += 1;
744 size |= ((c & 0x7f) as u64) << shift;
745 shift += 7;
746 }
747
748 let packed_type = match type_code {
749 1 => PackedType::Commit,
750 2 => PackedType::Tree,
751 3 => PackedType::Blob,
752 4 => PackedType::Tag,
753 6 => PackedType::OfsDelta,
754 7 => PackedType::RefDelta,
755 _ => {
756 return Err(Error::CorruptObject(format!(
757 "unsupported packed object type {}",
758 type_code
759 )))
760 }
761 };
762 Ok((packed_type, size))
763}
764
765fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
766 let mut c = *bytes
767 .get(*pos)
768 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
769 *pos += 1;
770 let mut value = (c & 0x7f) as u64;
771 while (c & 0x80) != 0 {
772 c = *bytes
773 .get(*pos)
774 .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
775 *pos += 1;
776 value = ((value + 1) << 7) | (c & 0x7f) as u64;
777 }
778 this_offset
779 .checked_sub(value)
780 .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
781}
782
783fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
784 if bytes.len() < *pos + 4 {
785 return Err(Error::CorruptObject(
786 "unexpected end of idx while reading u32".to_owned(),
787 ));
788 }
789 let v = u32::from_be_bytes(
790 bytes[*pos..*pos + 4]
791 .try_into()
792 .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
793 );
794 *pos += 4;
795 Ok(v)
796}
797
798fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
799 if bytes.len() < *pos + 8 {
800 return Err(Error::CorruptObject(
801 "unexpected end of idx while reading u64".to_owned(),
802 ));
803 }
804 let v = u64::from_be_bytes(
805 bytes[*pos..*pos + 8]
806 .try_into()
807 .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
808 );
809 *pos += 8;
810 Ok(v)
811}