anni_fetch/
pack.rs

1use std::io::{Read, Seek, SeekFrom};
2use miniz_oxide::{DataFormat, MZFlush};
3use miniz_oxide::inflate::TINFLStatus;
4use miniz_oxide::inflate::stream::{InflateState, MinReset};
5use thiserror::Error;
6use sha1::Digest;
7use std::collections::BTreeMap;
8use crate::io::{token, take_sized, u32_be, u8};
9
10const INPUT_BUFFER_SIZE: usize = 8 * 1024;
11const OUTPUT_BUFFER_SIZE: usize = 16 * 1024;
12
13#[derive(Debug, Error)]
14pub enum UnpackError {
15    #[error("invalid object type")]
16    InvalidObjectType,
17    #[error("invalid TINFL status")]
18    InvalidTINFLStatus(TINFLStatus),
19    #[error("invalid hash")]
20    InvalidHash,
21    #[error(transparent)]
22    IOError(#[from] std::io::Error),
23}
24
25/// Read git variable integer and extract (object_type, length, bytes_used).
26fn vint_from_reader<R: Read>(reader: &mut R) -> std::io::Result<(u8, usize, usize)> {
27    let mut n = u8(reader)?;
28    let object_type = (n >> 4) & 0b00000111;
29    let mut len = (n as usize) & 0b00001111;
30
31    let mut shift = 4;
32    let mut used = 1;
33    while n & 0b10000000 != 0 {
34        n = u8(reader)?;
35        len |= ((n as usize) & 0b01111111) << shift;
36        shift += 7;
37        used += 1;
38    }
39    Ok((object_type, len, used))
40}
41
42/// Read OFS_DELTA offset and extract (distance, bytes_used).
43fn ofs_from_reader<R: Read>(reader: &mut R) -> std::io::Result<(usize, usize)> {
44    let mut n = u8(reader)?;
45    let mut used = 1;
46    let mut distance = n as usize & 0b01111111;
47    while n & 0b10000000 != 0 {
48        n = u8(reader)?;
49        distance += 1;
50        distance = (distance << 7) + (n & 0b01111111) as usize;
51        used += 1;
52    }
53    Ok((distance, used))
54}
55
56#[derive(Debug)]
57pub struct Pack {
58    pub version: u32,
59    pub objects: BTreeMap<usize, Object>,
60    pub sha1: Vec<u8>,
61}
62
63#[derive(Debug, PartialEq)]
64pub struct Object {
65    pub object_type: ObjectType,
66    pub data: Vec<u8>,
67    pub compressed_length: usize,
68    pub offset: usize,
69}
70
71#[derive(Debug, PartialEq)]
72pub enum ObjectType {
73    Commit,
74    Tree,
75    Blob,
76    Tag,
77    OfsDelta(usize),
78    RefDelta(Vec<u8>),
79}
80
81impl Pack {
82    pub fn from_reader<R: Read + Seek>(reader: &mut R) -> std::result::Result<Self, UnpackError> {
83        token(reader, b"PACK")?;
84        let version = u32_be(reader)?;
85        let objects = u32_be(reader)?;
86
87        let mut offset = 12;
88        let mut result = BTreeMap::new();
89
90        let mut state = InflateState::new_boxed(DataFormat::Zlib);
91        let mut input_buf = vec![0u8; INPUT_BUFFER_SIZE];
92        let mut output_buf = vec![0u8; OUTPUT_BUFFER_SIZE];
93
94        for _ in 0..objects {
95            use crate::pack::ObjectType::*;
96            let (object_type, decompressed_length, mut object_size) = vint_from_reader(reader)?;
97            let object_type = match object_type {
98                1 => Commit,
99                2 => Tree,
100                3 => Blob,
101                4 => Tag,
102                6 => {
103                    let (d, u) = ofs_from_reader(reader)?;
104                    object_size += u;
105                    OfsDelta(d)
106                }
107                7 => RefDelta(Vec::new()), // TODO
108                _ => return Err(UnpackError::InvalidObjectType),
109            };
110
111            let mut compressed_length = 0;
112            let mut data = Vec::with_capacity(decompressed_length);
113            loop {
114                let bytes_available = reader.read(&mut input_buf)?;
115
116                let (consumed, backseek, _) = Pack::extract_from(&mut state, bytes_available, &input_buf, &mut output_buf);
117                compressed_length += consumed;
118                data.append(&mut output_buf);
119                reader.seek(SeekFrom::Current(backseek))?;
120
121                input_buf.resize(2048, 0);
122                output_buf.resize(4096, 0);
123                match state.last_status() {
124                    // Need more input
125                    // Next turn, provide more input
126                    TINFLStatus::NeedsMoreInput => {
127                        continue;
128                    }
129                    // Need more output
130                    // Output buffer of state is full, loop to pump them out
131                    TINFLStatus::HasMoreOutput => {
132                        loop {
133                            let (_, _, produced) = Pack::extract_from(&mut state, 0, &[], &mut output_buf);
134                            data.append(&mut output_buf);
135                            output_buf.resize(4096, 0);
136                            if produced < OUTPUT_BUFFER_SIZE {
137                                break;
138                            }
139                        }
140                        continue;
141                    }
142                    // Done
143                    // Decode finished, but data may still in buffer of `state`
144                    // Need to pump them out first
145                    TINFLStatus::Done => {
146                        while data.len() < decompressed_length {
147                            Pack::extract_from(&mut state, 0, &[], &mut output_buf);
148                            data.append(&mut output_buf);
149                            output_buf.resize(4096, 0);
150                        }
151                        assert_eq!(data.len(), decompressed_length, "data length larget than expected decompressed length");
152                        state.reset_as(MinReset);
153                        break;
154                    }
155                    s => return Err(UnpackError::InvalidTINFLStatus(s)),
156                }
157            }
158            object_size += compressed_length;
159
160            let object = Object {
161                object_type,
162                data,
163                compressed_length,
164                offset,
165            };
166            result.insert(offset, object);
167            offset += object_size;
168        }
169
170        // final sha1
171        let mut hasher = sha1::Sha1::new();
172        reader.seek(SeekFrom::Start(0))?;
173        std::io::copy(&mut reader.take(offset as u64), &mut hasher)?;
174        let hash_result = hasher.finalize();
175        let (checksum, got) = take_sized(reader, 20)?;
176        if got != 20 || hash_result[..] != checksum[..] {
177            return Err(UnpackError::InvalidHash);
178        }
179
180        // bypass EOF check for now
181        // assert_eq!(std::io::copy(&mut reader.take(1), &mut input)?, 0);
182
183        Ok(Self {
184            version,
185            objects: result,
186            sha1: checksum,
187        })
188    }
189
190    fn extract_from(mut state: &mut Box<InflateState>, bytes_available: usize, input_buf: &[u8], mut output_buf: &mut Vec<u8>) -> (usize, i64, usize) {
191        let r = miniz_oxide::inflate::stream::inflate(
192            &mut state,
193            &input_buf[..bytes_available],
194            &mut output_buf,
195            MZFlush::Partial,
196        );
197        let consumed = r.bytes_consumed;
198        let backseek = (consumed as i64) - (bytes_available as i64);
199        let produced = r.bytes_written;
200        if produced != output_buf.len() {
201            output_buf.truncate(produced);
202        }
203        (consumed, backseek, produced)
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use crate::pack::{vint_from_reader, Object, ObjectType};
210    use crate::Pack;
211    use std::io::Cursor;
212
213    #[test]
214    fn test_vint() {
215        assert_eq!(vint_from_reader(&mut Cursor::new(&[0b00101111])).unwrap(), (0b010, 0b1111, 1));
216        assert_eq!(vint_from_reader(&mut Cursor::new(&[0b10010101, 0b00001010])).unwrap(),
217                   (0b001, 0b0101 + (0b1010 << 4), 2)
218        );
219        assert_eq!(vint_from_reader(&mut Cursor::new(
220            &[0b10101111, 0b10101100, 0b10010010, 0b01110101])).unwrap(),
221                   (0b010, 0b1111 + (0b0101100 << 4) + (0b0010010 << 11) + (0b1110101 << 18), 4),
222        );
223    }
224
225    #[test]
226    fn test_unpack() {
227        let data = [
228            0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
229            0x95, 0x0a, 0x78, 0x9c, 0x95, 0x8b, 0x3b, 0x0a, 0x42, 0x31, 0x10, 0x00,
230            0xfb, 0x9c, 0x62, 0x7b, 0x41, 0x36, 0xcf, 0x7c, 0x41, 0xc4, 0xd6, 0x63,
231            0x6c, 0xcc, 0x06, 0x03, 0xae, 0x81, 0xb8, 0x16, 0xef, 0xf6, 0x06, 0x3c,
232            0x81, 0xc5, 0x54, 0x33, 0xa3, 0x93, 0x19, 0x32, 0xd6, 0x74, 0xaa, 0xa5,
233            0x05, 0xf2, 0x39, 0xd5, 0x10, 0x1c, 0x7a, 0x2e, 0x58, 0x5c, 0x21, 0xaa,
234            0xd6, 0xe5, 0xa5, 0xb1, 0x6d, 0xd1, 0x7b, 0x43, 0x1f, 0x7d, 0x8c, 0x09,
235            0x3b, 0xbf, 0x95, 0x67, 0xa5, 0xdd, 0x46, 0x38, 0x8b, 0xb4, 0xeb, 0xe2,
236            0x28, 0x83, 0x2f, 0x60, 0x83, 0xf5, 0x29, 0x06, 0xb7, 0x65, 0x38, 0x60,
237            0x42, 0x34, 0xf7, 0x21, 0xd2, 0x75, 0xd5, 0xff, 0x4c, 0xe6, 0xf6, 0xea,
238            0xda, 0xe9, 0x09, 0xbf, 0xdb, 0x7c, 0x01, 0x31, 0x47, 0x31, 0xae, 0xa5,
239            0x02, 0x78, 0x9c, 0x33, 0x34, 0x30, 0x30, 0x33, 0x31, 0x51, 0x08, 0x72,
240            0x75, 0x74, 0xf1, 0x75, 0xd5, 0xcb, 0x4d, 0x61, 0xe8, 0xd8, 0x59, 0x1d,
241            0x76, 0x3a, 0x81, 0xb7, 0x63, 0xfb, 0xb2, 0xdd, 0x53, 0x39, 0x9e, 0x31,
242            0xf0, 0x9c, 0xfb, 0xbb, 0x54, 0x1a, 0x00, 0xdd, 0x01, 0x0e, 0x01, 0x38,
243            0x78, 0x9c, 0x53, 0x56, 0x08, 0x49, 0x2d, 0x2e, 0xe1, 0xe2, 0x02, 0x00,
244            0x09, 0x37, 0x01, 0xf8, 0x4f, 0x10, 0xd0, 0x02, 0x25, 0x2e, 0x07, 0xc3,
245            0xaf, 0xdb, 0x2d, 0xcc, 0x0a, 0xb8, 0x8d, 0x36, 0xe8, 0xab, 0x4a, 0x26,
246        ];
247        let _pack = Pack::from_reader(&mut std::io::Cursor::new(data)).expect("parse failed");
248        assert_eq!(_pack.version, 2);
249        assert_eq!(_pack.objects[&12], Object {
250            object_type: ObjectType::Commit,
251            data: br"tree 90d83dbf6a598d66405eb0b4baad14990d0f2755
252author yesterday17 <mmf@mmf.moe> 1615876429 +0800
253committer yesterday17 <mmf@mmf.moe> 1615876429 +0800
254
255Initial commit
256".to_vec(),
257            compressed_length: 117,
258            offset: 12,
259        });
260
261        assert_eq!(_pack.objects[&131].object_type, ObjectType::Tree);
262        assert!(_pack.objects[&131].data.starts_with(b"100644 README.md"));
263        assert_eq!(_pack.objects[&131].compressed_length, 46);
264        assert_eq!(_pack.objects[&131].offset, 131);
265
266        assert_eq!(_pack.objects[&179], Object {
267            object_type: ObjectType::Blob,
268            data: br"# Test
269
270".to_vec(),
271            compressed_length: 16,
272            offset: 179,
273        });
274
275        assert_eq!(_pack.sha1, vec![79, 16, 208, 2, 37, 46, 7, 195, 175, 219, 45, 204, 10, 184, 141, 54, 232, 171, 74, 38]);
276    }
277}