1use std::io::{Read, Seek, SeekFrom};
2use miniz_oxide::{DataFormat, MZFlush};
3use miniz_oxide::inflate::TINFLStatus;
4use miniz_oxide::inflate::stream::{InflateState, MinReset};
5use thiserror::Error;
6use sha1::Digest;
7use std::collections::BTreeMap;
8use crate::io::{token, take_sized, u32_be, u8};
9
10const INPUT_BUFFER_SIZE: usize = 8 * 1024;
11const OUTPUT_BUFFER_SIZE: usize = 16 * 1024;
12
13#[derive(Debug, Error)]
14pub enum UnpackError {
15 #[error("invalid object type")]
16 InvalidObjectType,
17 #[error("invalid TINFL status")]
18 InvalidTINFLStatus(TINFLStatus),
19 #[error("invalid hash")]
20 InvalidHash,
21 #[error(transparent)]
22 IOError(#[from] std::io::Error),
23}
24
25fn vint_from_reader<R: Read>(reader: &mut R) -> std::io::Result<(u8, usize, usize)> {
27 let mut n = u8(reader)?;
28 let object_type = (n >> 4) & 0b00000111;
29 let mut len = (n as usize) & 0b00001111;
30
31 let mut shift = 4;
32 let mut used = 1;
33 while n & 0b10000000 != 0 {
34 n = u8(reader)?;
35 len |= ((n as usize) & 0b01111111) << shift;
36 shift += 7;
37 used += 1;
38 }
39 Ok((object_type, len, used))
40}
41
42fn ofs_from_reader<R: Read>(reader: &mut R) -> std::io::Result<(usize, usize)> {
44 let mut n = u8(reader)?;
45 let mut used = 1;
46 let mut distance = n as usize & 0b01111111;
47 while n & 0b10000000 != 0 {
48 n = u8(reader)?;
49 distance += 1;
50 distance = (distance << 7) + (n & 0b01111111) as usize;
51 used += 1;
52 }
53 Ok((distance, used))
54}
55
56#[derive(Debug)]
57pub struct Pack {
58 pub version: u32,
59 pub objects: BTreeMap<usize, Object>,
60 pub sha1: Vec<u8>,
61}
62
63#[derive(Debug, PartialEq)]
64pub struct Object {
65 pub object_type: ObjectType,
66 pub data: Vec<u8>,
67 pub compressed_length: usize,
68 pub offset: usize,
69}
70
71#[derive(Debug, PartialEq)]
72pub enum ObjectType {
73 Commit,
74 Tree,
75 Blob,
76 Tag,
77 OfsDelta(usize),
78 RefDelta(Vec<u8>),
79}
80
81impl Pack {
82 pub fn from_reader<R: Read + Seek>(reader: &mut R) -> std::result::Result<Self, UnpackError> {
83 token(reader, b"PACK")?;
84 let version = u32_be(reader)?;
85 let objects = u32_be(reader)?;
86
87 let mut offset = 12;
88 let mut result = BTreeMap::new();
89
90 let mut state = InflateState::new_boxed(DataFormat::Zlib);
91 let mut input_buf = vec![0u8; INPUT_BUFFER_SIZE];
92 let mut output_buf = vec![0u8; OUTPUT_BUFFER_SIZE];
93
94 for _ in 0..objects {
95 use crate::pack::ObjectType::*;
96 let (object_type, decompressed_length, mut object_size) = vint_from_reader(reader)?;
97 let object_type = match object_type {
98 1 => Commit,
99 2 => Tree,
100 3 => Blob,
101 4 => Tag,
102 6 => {
103 let (d, u) = ofs_from_reader(reader)?;
104 object_size += u;
105 OfsDelta(d)
106 }
107 7 => RefDelta(Vec::new()), _ => return Err(UnpackError::InvalidObjectType),
109 };
110
111 let mut compressed_length = 0;
112 let mut data = Vec::with_capacity(decompressed_length);
113 loop {
114 let bytes_available = reader.read(&mut input_buf)?;
115
116 let (consumed, backseek, _) = Pack::extract_from(&mut state, bytes_available, &input_buf, &mut output_buf);
117 compressed_length += consumed;
118 data.append(&mut output_buf);
119 reader.seek(SeekFrom::Current(backseek))?;
120
121 input_buf.resize(2048, 0);
122 output_buf.resize(4096, 0);
123 match state.last_status() {
124 TINFLStatus::NeedsMoreInput => {
127 continue;
128 }
129 TINFLStatus::HasMoreOutput => {
132 loop {
133 let (_, _, produced) = Pack::extract_from(&mut state, 0, &[], &mut output_buf);
134 data.append(&mut output_buf);
135 output_buf.resize(4096, 0);
136 if produced < OUTPUT_BUFFER_SIZE {
137 break;
138 }
139 }
140 continue;
141 }
142 TINFLStatus::Done => {
146 while data.len() < decompressed_length {
147 Pack::extract_from(&mut state, 0, &[], &mut output_buf);
148 data.append(&mut output_buf);
149 output_buf.resize(4096, 0);
150 }
151 assert_eq!(data.len(), decompressed_length, "data length larget than expected decompressed length");
152 state.reset_as(MinReset);
153 break;
154 }
155 s => return Err(UnpackError::InvalidTINFLStatus(s)),
156 }
157 }
158 object_size += compressed_length;
159
160 let object = Object {
161 object_type,
162 data,
163 compressed_length,
164 offset,
165 };
166 result.insert(offset, object);
167 offset += object_size;
168 }
169
170 let mut hasher = sha1::Sha1::new();
172 reader.seek(SeekFrom::Start(0))?;
173 std::io::copy(&mut reader.take(offset as u64), &mut hasher)?;
174 let hash_result = hasher.finalize();
175 let (checksum, got) = take_sized(reader, 20)?;
176 if got != 20 || hash_result[..] != checksum[..] {
177 return Err(UnpackError::InvalidHash);
178 }
179
180 Ok(Self {
184 version,
185 objects: result,
186 sha1: checksum,
187 })
188 }
189
190 fn extract_from(mut state: &mut Box<InflateState>, bytes_available: usize, input_buf: &[u8], mut output_buf: &mut Vec<u8>) -> (usize, i64, usize) {
191 let r = miniz_oxide::inflate::stream::inflate(
192 &mut state,
193 &input_buf[..bytes_available],
194 &mut output_buf,
195 MZFlush::Partial,
196 );
197 let consumed = r.bytes_consumed;
198 let backseek = (consumed as i64) - (bytes_available as i64);
199 let produced = r.bytes_written;
200 if produced != output_buf.len() {
201 output_buf.truncate(produced);
202 }
203 (consumed, backseek, produced)
204 }
205}
206
207#[cfg(test)]
208mod tests {
209 use crate::pack::{vint_from_reader, Object, ObjectType};
210 use crate::Pack;
211 use std::io::Cursor;
212
213 #[test]
214 fn test_vint() {
215 assert_eq!(vint_from_reader(&mut Cursor::new(&[0b00101111])).unwrap(), (0b010, 0b1111, 1));
216 assert_eq!(vint_from_reader(&mut Cursor::new(&[0b10010101, 0b00001010])).unwrap(),
217 (0b001, 0b0101 + (0b1010 << 4), 2)
218 );
219 assert_eq!(vint_from_reader(&mut Cursor::new(
220 &[0b10101111, 0b10101100, 0b10010010, 0b01110101])).unwrap(),
221 (0b010, 0b1111 + (0b0101100 << 4) + (0b0010010 << 11) + (0b1110101 << 18), 4),
222 );
223 }
224
225 #[test]
226 fn test_unpack() {
227 let data = [
228 0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
229 0x95, 0x0a, 0x78, 0x9c, 0x95, 0x8b, 0x3b, 0x0a, 0x42, 0x31, 0x10, 0x00,
230 0xfb, 0x9c, 0x62, 0x7b, 0x41, 0x36, 0xcf, 0x7c, 0x41, 0xc4, 0xd6, 0x63,
231 0x6c, 0xcc, 0x06, 0x03, 0xae, 0x81, 0xb8, 0x16, 0xef, 0xf6, 0x06, 0x3c,
232 0x81, 0xc5, 0x54, 0x33, 0xa3, 0x93, 0x19, 0x32, 0xd6, 0x74, 0xaa, 0xa5,
233 0x05, 0xf2, 0x39, 0xd5, 0x10, 0x1c, 0x7a, 0x2e, 0x58, 0x5c, 0x21, 0xaa,
234 0xd6, 0xe5, 0xa5, 0xb1, 0x6d, 0xd1, 0x7b, 0x43, 0x1f, 0x7d, 0x8c, 0x09,
235 0x3b, 0xbf, 0x95, 0x67, 0xa5, 0xdd, 0x46, 0x38, 0x8b, 0xb4, 0xeb, 0xe2,
236 0x28, 0x83, 0x2f, 0x60, 0x83, 0xf5, 0x29, 0x06, 0xb7, 0x65, 0x38, 0x60,
237 0x42, 0x34, 0xf7, 0x21, 0xd2, 0x75, 0xd5, 0xff, 0x4c, 0xe6, 0xf6, 0xea,
238 0xda, 0xe9, 0x09, 0xbf, 0xdb, 0x7c, 0x01, 0x31, 0x47, 0x31, 0xae, 0xa5,
239 0x02, 0x78, 0x9c, 0x33, 0x34, 0x30, 0x30, 0x33, 0x31, 0x51, 0x08, 0x72,
240 0x75, 0x74, 0xf1, 0x75, 0xd5, 0xcb, 0x4d, 0x61, 0xe8, 0xd8, 0x59, 0x1d,
241 0x76, 0x3a, 0x81, 0xb7, 0x63, 0xfb, 0xb2, 0xdd, 0x53, 0x39, 0x9e, 0x31,
242 0xf0, 0x9c, 0xfb, 0xbb, 0x54, 0x1a, 0x00, 0xdd, 0x01, 0x0e, 0x01, 0x38,
243 0x78, 0x9c, 0x53, 0x56, 0x08, 0x49, 0x2d, 0x2e, 0xe1, 0xe2, 0x02, 0x00,
244 0x09, 0x37, 0x01, 0xf8, 0x4f, 0x10, 0xd0, 0x02, 0x25, 0x2e, 0x07, 0xc3,
245 0xaf, 0xdb, 0x2d, 0xcc, 0x0a, 0xb8, 0x8d, 0x36, 0xe8, 0xab, 0x4a, 0x26,
246 ];
247 let _pack = Pack::from_reader(&mut std::io::Cursor::new(data)).expect("parse failed");
248 assert_eq!(_pack.version, 2);
249 assert_eq!(_pack.objects[&12], Object {
250 object_type: ObjectType::Commit,
251 data: br"tree 90d83dbf6a598d66405eb0b4baad14990d0f2755
252author yesterday17 <mmf@mmf.moe> 1615876429 +0800
253committer yesterday17 <mmf@mmf.moe> 1615876429 +0800
254
255Initial commit
256".to_vec(),
257 compressed_length: 117,
258 offset: 12,
259 });
260
261 assert_eq!(_pack.objects[&131].object_type, ObjectType::Tree);
262 assert!(_pack.objects[&131].data.starts_with(b"100644 README.md"));
263 assert_eq!(_pack.objects[&131].compressed_length, 46);
264 assert_eq!(_pack.objects[&131].offset, 131);
265
266 assert_eq!(_pack.objects[&179], Object {
267 object_type: ObjectType::Blob,
268 data: br"# Test
269
270".to_vec(),
271 compressed_length: 16,
272 offset: 179,
273 });
274
275 assert_eq!(_pack.sha1, vec![79, 16, 208, 2, 37, 46, 7, 195, 175, 219, 45, 204, 10, 184, 141, 54, 232, 171, 74, 38]);
276 }
277}