gix_pack/data/entry/
decode.rs1use std::io;
2
3use gix_features::decode::leb64_from_read;
4
5use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
6use crate::data;
7
8#[derive(Debug, thiserror::Error)]
10#[allow(missing_docs)]
11pub enum Error {
12 #[error("Object type {type_id} is unsupported")]
13 UnsupportedType { type_id: u8 },
14 #[error("Pack entry is truncated: {message}")]
15 Corrupt { message: &'static str },
16 #[error("Pack entry header value overflowed while decoding")]
17 Overflow,
18}
19
20impl data::Entry {
22 pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> Result<data::Entry, Error> {
28 let (type_id, size, mut consumed) = parse_header_info(d)?;
29
30 use crate::data::entry::Header::*;
31 let object = match type_id {
32 OFS_DELTA => {
33 let (distance, leb_bytes) = parse_leb64(&d[consumed..])?;
34 let delta = OfsDelta {
35 base_distance: distance,
36 };
37 consumed += leb_bytes;
38 delta
39 }
40 REF_DELTA => {
41 let delta = RefDelta {
42 base_id: gix_hash::ObjectId::from_bytes_or_panic(d.get(consumed..consumed + hash_len).ok_or(
43 Error::Corrupt {
44 message: "ref-delta base object id",
45 },
46 )?),
47 };
48 consumed += hash_len;
49 delta
50 }
51 BLOB => Blob,
52 TREE => Tree,
53 COMMIT => Commit,
54 TAG => Tag,
55 other => return Err(Error::UnsupportedType { type_id: other }),
56 };
57 Ok(data::Entry {
58 header: object,
59 decompressed_size: size,
60 data_offset: pack_offset + consumed as u64,
61 })
62 }
63
64 pub fn from_read(r: &mut dyn io::Read, pack_offset: data::Offset, hash_len: usize) -> io::Result<data::Entry> {
66 let (type_id, size, mut consumed) = streaming_parse_header_info(r)?;
67
68 use crate::data::entry::Header::*;
69 let object = match type_id {
70 OFS_DELTA => {
71 let (distance, leb_bytes) = leb64_from_read(&mut *r)?;
72 let delta = OfsDelta {
73 base_distance: distance,
74 };
75 consumed += leb_bytes;
76 delta
77 }
78 REF_DELTA => {
79 let mut buf = gix_hash::Kind::buf();
80 let hash = &mut buf[..hash_len];
81 r.read_exact(hash)?;
82 #[allow(clippy::redundant_slicing)]
83 let delta = RefDelta {
84 base_id: gix_hash::ObjectId::from_bytes_or_panic(&hash[..]),
85 };
86 consumed += hash_len;
87 delta
88 }
89 BLOB => Blob,
90 TREE => Tree,
91 COMMIT => Commit,
92 TAG => Tag,
93 other => return Err(io::Error::other(format!("Object type {other} is unsupported"))),
94 };
95 Ok(data::Entry {
96 header: object,
97 decompressed_size: size,
98 data_offset: pack_offset + consumed as u64,
99 })
100 }
101}
102
103#[inline]
104fn streaming_parse_header_info(read: &mut dyn io::Read) -> Result<(u8, u64, usize), io::Error> {
105 let mut byte = [0u8; 1];
106 read.read_exact(&mut byte)?;
107 let mut c = byte[0];
108 let mut i = 1;
109 let type_id = (c >> 4) & 0b0000_0111;
110 let mut size = u64::from(c) & 0b0000_1111;
111 let mut shift = 4u32;
112 while c & 0b1000_0000 != 0 {
113 read.read_exact(&mut byte)?;
114 c = byte[0];
115 i += 1;
116 let component = u64::from(c & 0b0111_1111)
117 .checked_shl(shift)
118 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "pack entry header overflowed"))?;
119 size = size
120 .checked_add(component)
121 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "pack entry header overflowed"))?;
122 shift += 7;
123 }
124 if i != encoded_pack_entry_header_size(size) {
125 return Err(io::Error::new(
126 io::ErrorKind::InvalidData,
127 "pack entry header uses a non-canonical size encoding",
128 ));
129 }
130 Ok((type_id, size, i))
131}
132
133#[inline]
135fn parse_header_info(data: &[u8]) -> Result<(u8, u64, usize), Error> {
136 let mut c = *data.first().ok_or(Error::Corrupt {
137 message: "need a pack entry header, got empty input",
138 })?;
139 let mut i = 1;
140 let type_id = (c >> 4) & 0b0000_0111;
141 let mut size = u64::from(c) & 0b0000_1111;
142 let mut shift = 4u32;
143 while c & 0b1000_0000 != 0 {
144 c = *data.get(i).ok_or(Error::Corrupt {
145 message: "pack entry header continuation byte",
146 })?;
147 i += 1;
148 let component = u64::from(c & 0b0111_1111).checked_shl(shift).ok_or(Error::Overflow)?;
149 size = size.checked_add(component).ok_or(Error::Overflow)?;
150 shift += 7;
151 }
152 if i != encoded_pack_entry_header_size(size) {
153 return Err(Error::Corrupt {
154 message: "pack entry header uses a non-canonical size encoding",
155 });
156 }
157 Ok((type_id, size, i))
158}
159
160fn parse_leb64(data: &[u8]) -> Result<(u64, usize), Error> {
161 let mut i = 0;
162 let mut c = *data.first().ok_or(Error::Corrupt {
163 message: "an ofs-delta base distance",
164 })?;
165 i += 1;
166 let mut value = u64::from(c) & 0x7f;
167 while c & 0x80 != 0 {
168 c = *data.get(i).ok_or(Error::Corrupt {
169 message: "an ofs-delta base distance continuation byte",
170 })?;
171 i += 1;
172 value = value
173 .checked_add(1)
174 .and_then(|value| value.checked_shl(7))
175 .and_then(|value| value.checked_add(u64::from(c) & 0x7f))
176 .ok_or(Error::Overflow)?;
177 }
178 Ok((value, i))
179}
180
181fn encoded_pack_entry_header_size(mut size: u64) -> usize {
190 let mut bytes = 1;
191 size >>= 4;
192 while size != 0 {
193 bytes += 1;
194 size >>= 7;
195 }
196 bytes
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202
203 #[test]
204 fn rejects_non_canonical_pack_entry_header_encoding() {
205 assert!(matches!(
206 data::Entry::from_bytes(&[0xed, 0x00], 0, gix_hash::Kind::Sha1.len_in_bytes()),
207 Err(Error::Corrupt {
208 message: "pack entry header uses a non-canonical size encoding"
209 })
210 ));
211 }
212}