Skip to main content

gix_pack/data/entry/
decode.rs

1use std::io;
2
3use gix_features::decode::leb64_from_read;
4
5use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
6use crate::data;
7
8/// The error returned by [data::Entry::from_bytes()].
9#[derive(Debug, thiserror::Error)]
10#[allow(missing_docs)]
11pub enum Error {
12    #[error("Object type {type_id} is unsupported")]
13    UnsupportedType { type_id: u8 },
14    #[error("Pack entry is truncated: {message}")]
15    Corrupt { message: &'static str },
16    #[error("Pack entry header value overflowed while decoding")]
17    Overflow,
18}
19
20/// Decoding
21impl data::Entry {
22    /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section.
23    ///
24    /// # Panics
25    ///
26    /// If we cannot understand the header, garbage data is likely to trigger this.
27    pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> Result<data::Entry, Error> {
28        let (type_id, size, mut consumed) = parse_header_info(d)?;
29
30        use crate::data::entry::Header::*;
31        let object = match type_id {
32            OFS_DELTA => {
33                let (distance, leb_bytes) = parse_leb64(&d[consumed..])?;
34                let delta = OfsDelta {
35                    base_distance: distance,
36                };
37                consumed += leb_bytes;
38                delta
39            }
40            REF_DELTA => {
41                let delta = RefDelta {
42                    base_id: gix_hash::ObjectId::from_bytes_or_panic(d.get(consumed..consumed + hash_len).ok_or(
43                        Error::Corrupt {
44                            message: "ref-delta base object id",
45                        },
46                    )?),
47                };
48                consumed += hash_len;
49                delta
50            }
51            BLOB => Blob,
52            TREE => Tree,
53            COMMIT => Commit,
54            TAG => Tag,
55            other => return Err(Error::UnsupportedType { type_id: other }),
56        };
57        Ok(data::Entry {
58            header: object,
59            decompressed_size: size,
60            data_offset: pack_offset + consumed as u64,
61        })
62    }
63
64    /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section.
65    pub fn from_read(r: &mut dyn io::Read, pack_offset: data::Offset, hash_len: usize) -> io::Result<data::Entry> {
66        let (type_id, size, mut consumed) = streaming_parse_header_info(r)?;
67
68        use crate::data::entry::Header::*;
69        let object = match type_id {
70            OFS_DELTA => {
71                let (distance, leb_bytes) = leb64_from_read(&mut *r)?;
72                let delta = OfsDelta {
73                    base_distance: distance,
74                };
75                consumed += leb_bytes;
76                delta
77            }
78            REF_DELTA => {
79                let mut buf = gix_hash::Kind::buf();
80                let hash = &mut buf[..hash_len];
81                r.read_exact(hash)?;
82                #[allow(clippy::redundant_slicing)]
83                let delta = RefDelta {
84                    base_id: gix_hash::ObjectId::from_bytes_or_panic(&hash[..]),
85                };
86                consumed += hash_len;
87                delta
88            }
89            BLOB => Blob,
90            TREE => Tree,
91            COMMIT => Commit,
92            TAG => Tag,
93            other => return Err(io::Error::other(format!("Object type {other} is unsupported"))),
94        };
95        Ok(data::Entry {
96            header: object,
97            decompressed_size: size,
98            data_offset: pack_offset + consumed as u64,
99        })
100    }
101}
102
103#[inline]
104fn streaming_parse_header_info(read: &mut dyn io::Read) -> Result<(u8, u64, usize), io::Error> {
105    let mut byte = [0u8; 1];
106    read.read_exact(&mut byte)?;
107    let mut c = byte[0];
108    let mut i = 1;
109    let type_id = (c >> 4) & 0b0000_0111;
110    let mut size = u64::from(c) & 0b0000_1111;
111    let mut shift = 4u32;
112    while c & 0b1000_0000 != 0 {
113        read.read_exact(&mut byte)?;
114        c = byte[0];
115        i += 1;
116        let component = u64::from(c & 0b0111_1111)
117            .checked_shl(shift)
118            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "pack entry header overflowed"))?;
119        size = size
120            .checked_add(component)
121            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "pack entry header overflowed"))?;
122        shift += 7;
123    }
124    if i != encoded_pack_entry_header_size(size) {
125        return Err(io::Error::new(
126            io::ErrorKind::InvalidData,
127            "pack entry header uses a non-canonical size encoding",
128        ));
129    }
130    Ok((type_id, size, i))
131}
132
133/// Parses the header of a pack-entry, yielding object type id, decompressed object size, and consumed bytes
134#[inline]
135fn parse_header_info(data: &[u8]) -> Result<(u8, u64, usize), Error> {
136    let mut c = *data.first().ok_or(Error::Corrupt {
137        message: "need a pack entry header, got empty input",
138    })?;
139    let mut i = 1;
140    let type_id = (c >> 4) & 0b0000_0111;
141    let mut size = u64::from(c) & 0b0000_1111;
142    let mut shift = 4u32;
143    while c & 0b1000_0000 != 0 {
144        c = *data.get(i).ok_or(Error::Corrupt {
145            message: "pack entry header continuation byte",
146        })?;
147        i += 1;
148        let component = u64::from(c & 0b0111_1111).checked_shl(shift).ok_or(Error::Overflow)?;
149        size = size.checked_add(component).ok_or(Error::Overflow)?;
150        shift += 7;
151    }
152    if i != encoded_pack_entry_header_size(size) {
153        return Err(Error::Corrupt {
154            message: "pack entry header uses a non-canonical size encoding",
155        });
156    }
157    Ok((type_id, size, i))
158}
159
160fn parse_leb64(data: &[u8]) -> Result<(u64, usize), Error> {
161    let mut i = 0;
162    let mut c = *data.first().ok_or(Error::Corrupt {
163        message: "an ofs-delta base distance",
164    })?;
165    i += 1;
166    let mut value = u64::from(c) & 0x7f;
167    while c & 0x80 != 0 {
168        c = *data.get(i).ok_or(Error::Corrupt {
169            message: "an ofs-delta base distance continuation byte",
170        })?;
171        i += 1;
172        value = value
173            .checked_add(1)
174            .and_then(|value| value.checked_shl(7))
175            .and_then(|value| value.checked_add(u64::from(c) & 0x7f))
176            .ok_or(Error::Overflow)?;
177    }
178    Ok((value, i))
179}
180
181/// Return the canonical byte length of a pack-entry size header for `size`.
182///
183/// We use this to reject overlong size encodings during parsing.
184/// That matters for our delta resolution implementation, which later reconstructs an entry's
185/// pack offset from `data_offset - header_size()`. If we accepted non-canonical encodings here,
186/// `header_size()` would compute the canonical length while `data_offset` would reflect the
187/// actually consumed bytes, breaking that invariant and allowing malformed delta entries to point
188/// back to themselves or otherwise walk the wrong base objects.
189fn encoded_pack_entry_header_size(mut size: u64) -> usize {
190    let mut bytes = 1;
191    size >>= 4;
192    while size != 0 {
193        bytes += 1;
194        size >>= 7;
195    }
196    bytes
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    #[test]
204    fn rejects_non_canonical_pack_entry_header_encoding() {
205        assert!(matches!(
206            data::Entry::from_bytes(&[0xed, 0x00], 0, gix_hash::Kind::Sha1.len_in_bytes()),
207            Err(Error::Corrupt {
208                message: "pack entry header uses a non-canonical size encoding"
209            })
210        ));
211    }
212}