1use nom::{
2 IResult, Parser,
3 bytes::complete::{tag, take},
4 error::{Error, ErrorKind},
5 multi::count,
6 number::complete::{be_u32, be_u64},
7};
8use std::fmt;
9
10use crate::git::pack::PackError;
11
12#[derive(Debug, Clone)]
17pub struct PackIndex {
18 pub version: u32,
20 pub fan_out: [u32; 256],
22 pub object_names: Vec<[u8; 20]>,
24 pub crc32_checksums: Vec<u32>,
26 pub offsets: Vec<u32>,
28 pub large_offsets: Option<Vec<u64>>,
30 pub pack_checksum: [u8; 20],
32 pub index_checksum: [u8; 20],
34 pub raw_data: Vec<u8>,
36}
37
38impl PackIndex {
39 pub fn parse(input: &[u8]) -> IResult<&[u8], Self> {
41 let original_input = input;
42
43 let (input, _) = Self::parse_header(input)?;
45
46 let (input, fan_out) = Self::parse_fan_out_table(input)?;
48
49 let total_objects = fan_out[255] as usize;
51
52 let (input, object_names) = count(Self::parse_object_name, total_objects).parse(input)?;
54
55 let (input, crc32_checksums) = count(be_u32, total_objects).parse(input)?;
57
58 let (input, offsets) = count(be_u32, total_objects).parse(input)?;
60
61 let needs_large_offsets = offsets.iter().any(|&offset| offset & 0x80000000 != 0);
63 let large_offset_count = offsets
64 .iter()
65 .filter(|&&offset| offset & 0x80000000 != 0)
66 .count();
67
68 let (input, large_offsets) = if needs_large_offsets {
70 let (input, large_offsets) = count(be_u64, large_offset_count).parse(input)?;
71 (input, Some(large_offsets))
72 } else {
73 (input, None)
74 };
75
76 let (input, pack_checksum_bytes) = take(20usize)(input)?;
78 let mut pack_checksum = [0u8; 20];
79 pack_checksum.copy_from_slice(pack_checksum_bytes);
80
81 let (input, index_checksum_bytes) = take(20usize)(input)?;
83 let mut index_checksum = [0u8; 20];
84 index_checksum.copy_from_slice(index_checksum_bytes);
85
86 let consumed = original_input.len() - input.len();
88 let raw_data = original_input[..consumed].to_vec();
89
90 Ok((
91 input,
92 PackIndex {
93 version: 2, fan_out,
95 object_names,
96 crc32_checksums,
97 offsets,
98 large_offsets,
99 pack_checksum,
100 index_checksum,
101 raw_data,
102 },
103 ))
104 }
105
106 fn parse_header(input: &[u8]) -> IResult<&[u8], ()> {
108 let magic_bytes: &[u8] = &[0xff, 0x74, 0x4f, 0x63];
110 let (input, _) = tag(magic_bytes)(input)?;
111
112 let (input, version) = be_u32(input)?;
114
115 if version != 2 {
116 return Err(nom::Err::Error(Error::new(input, ErrorKind::Tag)));
117 }
118
119 Ok((input, ()))
120 }
121
122 fn parse_fan_out_table(input: &[u8]) -> IResult<&[u8], [u32; 256]> {
124 let (input, fan_out_vec) = count(be_u32, 256).parse(input)?;
125
126 let mut fan_out = [0u32; 256];
128 for (i, &value) in fan_out_vec.iter().enumerate() {
129 fan_out[i] = value;
130 }
131
132 for i in 1..256 {
134 if fan_out[i] < fan_out[i - 1] {
135 return Err(nom::Err::Error(Error::new(input, ErrorKind::Verify)));
136 }
137 }
138
139 Ok((input, fan_out))
140 }
141
142 fn parse_object_name(input: &[u8]) -> IResult<&[u8], [u8; 20]> {
144 let (input, name_bytes) = take(20usize)(input)?;
145 let mut name = [0u8; 20];
146 name.copy_from_slice(name_bytes);
147 Ok((input, name))
148 }
149
150 pub fn object_count(&self) -> usize {
152 self.object_names.len()
153 }
154
155 pub fn lookup_object(&self, sha1: &[u8; 20]) -> Option<u64> {
158 let first_byte = sha1[0] as usize;
160
161 let start_idx = if first_byte == 0 {
163 0
164 } else {
165 self.fan_out[first_byte - 1] as usize
166 };
167 let end_idx = self.fan_out[first_byte] as usize;
168
169 let search_slice = &self.object_names[start_idx..end_idx];
171
172 match search_slice.binary_search(sha1) {
173 Ok(relative_idx) => {
174 let absolute_idx = start_idx + relative_idx;
175 Some(self.get_object_offset(absolute_idx))
176 }
177 Err(_) => None,
178 }
179 }
180
181 pub fn get_object_offset(&self, index: usize) -> u64 {
183 if index >= self.offsets.len() {
184 return 0;
185 }
186
187 let offset = self.offsets[index];
188
189 if offset & 0x80000000 != 0 {
191 let large_offset_index = (offset & 0x7fffffff) as usize;
193 if let Some(ref large_offsets) = self.large_offsets
194 && large_offset_index < large_offsets.len()
195 {
196 return large_offsets[large_offset_index];
197 }
198 }
199
200 offset as u64
201 }
202
203 pub fn get_object_crc32(&self, index: usize) -> Option<u32> {
205 self.crc32_checksums.get(index).copied()
206 }
207
208 pub fn verify_checksum(&self) -> Result<(), PackError> {
210 Ok(())
214 }
215}
216
217impl fmt::Display for PackIndex {
218 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
219 writeln!(f, "Pack Index (version {})", self.version)?;
220 writeln!(f, "Total objects: {}", self.object_count())?;
221 writeln!(f, "Pack checksum: {}", hex::encode(self.pack_checksum))?;
222 writeln!(f, "Index checksum: {}", hex::encode(self.index_checksum))?;
223
224 if let Some(ref large_offsets) = self.large_offsets {
225 writeln!(f, "Large offsets: {} entries", large_offsets.len())?;
226 }
227
228 writeln!(f, "\nObject distribution by first byte:")?;
230 let mut prev_count = 0;
231 for (byte, &count) in self.fan_out.iter().enumerate() {
232 let objects_for_byte = count - prev_count;
233 if objects_for_byte > 0 {
234 writeln!(f, " 0x{:02x}: {} objects", byte, objects_for_byte)?;
235 }
236 prev_count = count;
237 }
238
239 Ok(())
240 }
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246
247 #[test]
248 fn test_parse_header() {
249 let header_data = [
250 0xff, 0x74, 0x4f, 0x63, 0x00, 0x00, 0x00, 0x02, ];
253
254 let (remaining, _) = PackIndex::parse_header(&header_data).unwrap();
255 assert!(remaining.is_empty());
256 }
257
258 #[test]
259 fn test_parse_object_name() {
260 let name_data = [0x01; 20]; let (remaining, name) = PackIndex::parse_object_name(&name_data).unwrap();
263 assert!(remaining.is_empty());
264 assert_eq!(name, [0x01; 20]);
265 }
266
267 #[test]
268 fn test_fan_out_validation() {
269 let mut fan_out_data = Vec::new();
271 for i in 0..256 {
272 fan_out_data.extend_from_slice(&(i as u32).to_be_bytes());
273 }
274
275 let (_, fan_out) = PackIndex::parse_fan_out_table(&fan_out_data).unwrap();
276 assert_eq!(fan_out[0], 0);
277 assert_eq!(fan_out[255], 255);
278 }
279
280 #[test]
281 fn test_lookup_object() {
282 let mut fan_out = [0u32; 256];
284 fan_out[0] = 1; for i in 1..256 {
286 fan_out[i] = 1; }
288
289 let object_names = vec![[0x00; 20]]; let crc32_checksums = vec![0x12345678];
291 let offsets = vec![100]; let index = PackIndex {
294 version: 2,
295 fan_out,
296 object_names,
297 crc32_checksums,
298 offsets,
299 large_offsets: None,
300 pack_checksum: [0; 20],
301 index_checksum: [0; 20],
302 raw_data: vec![],
303 };
304
305 assert_eq!(index.lookup_object(&[0x00; 20]), Some(100));
307
308 assert_eq!(index.lookup_object(&[0xff; 20]), None);
310 }
311
312 #[test]
313 fn test_multiple_objects_lookup() {
314 let mut fan_out = [0u32; 256];
316
317 fan_out[0] = 2; fan_out[1] = 3; for i in 2..0xaa {
321 fan_out[i] = 3; }
323 fan_out[0xaa] = 4; for i in 0xab..0xff {
325 fan_out[i] = 4; }
327 fan_out[0xff] = 5; let mut obj1 = [0u8; 20];
331 obj1[0] = 0x00;
332 let mut obj2 = [0u8; 20];
333 obj2[0] = 0x00;
334 obj2[1] = 0x01; let mut obj3 = [0u8; 20];
336 obj3[0] = 0x01;
337 let mut obj4 = [0u8; 20];
338 obj4[0] = 0xaa;
339 let mut obj5 = [0u8; 20];
340 obj5[0] = 0xff;
341
342 let object_names = vec![obj1, obj2, obj3, obj4, obj5];
343 let crc32_checksums = vec![0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555];
344 let offsets = vec![100, 200, 300, 400, 500];
345
346 let index = PackIndex {
347 version: 2,
348 fan_out,
349 object_names,
350 crc32_checksums,
351 offsets,
352 large_offsets: None,
353 pack_checksum: [0; 20],
354 index_checksum: [0; 20],
355 raw_data: vec![],
356 };
357
358 assert_eq!(index.lookup_object(&obj1), Some(100));
360 assert_eq!(index.lookup_object(&obj2), Some(200));
361 assert_eq!(index.lookup_object(&obj3), Some(300));
362 assert_eq!(index.lookup_object(&obj4), Some(400));
363 assert_eq!(index.lookup_object(&obj5), Some(500));
364
365 let mut nonexistent = [0u8; 20];
367 nonexistent[0] = 0x80; assert_eq!(index.lookup_object(&nonexistent), None);
369
370 assert_eq!(index.get_object_crc32(0), Some(0x11111111));
372 assert_eq!(index.get_object_crc32(4), Some(0x55555555));
373 assert_eq!(index.get_object_crc32(10), None); }
375
376 #[test]
377 fn test_large_offsets() {
378 let mut fan_out = [0u32; 256];
380 fan_out[0] = 1;
381 for i in 1..256 {
382 fan_out[i] = 1;
383 }
384
385 let object_names = vec![[0x00; 20]];
386 let crc32_checksums = vec![0x12345678];
387 let offsets = vec![0x80000000]; let large_offsets = Some(vec![0x123456789abcdef0]); let index = PackIndex {
391 version: 2,
392 fan_out,
393 object_names,
394 crc32_checksums,
395 offsets,
396 large_offsets,
397 pack_checksum: [0; 20],
398 index_checksum: [0; 20],
399 raw_data: vec![],
400 };
401
402 assert_eq!(index.get_object_offset(0), 0x123456789abcdef0);
404 assert_eq!(index.lookup_object(&[0x00; 20]), Some(0x123456789abcdef0));
405 }
406}