dag_cbor_references/
lib.rs

1//! CBOR decoder
2use std::{
3    fmt,
4    io::{self, Read, Seek, SeekFrom},
5    result,
6};
7
8/// Type alias for a blake3 hash
9pub type Hash = [u8; 32];
10
11/// Parse error
12#[derive(Debug)]
13pub enum ParseError {
14    /// Unexpected end of file
15    UnexpectedEof,
16    /// Unexpected code
17    UnexpectedCode(u8),
18    /// Unknown cbor tag
19    UnknownTag(u8),
20    /// Invalid cid prefix
21    InvalidCidPrefix(u8),
22    /// Invalid length
23    LengthOutOfRange,
24    /// Invalid varint
25    InvalidVarint,
26    /// Invalid cid version
27    InvalidCidVersion,
28    /// Invalid hash algorithm (not blake3)
29    InvalidHashAlgorithm,
30    /// Invalid hash length (not 32)
31    InvalidHashLength,
32    /// Generic io error
33    IoError(io::Error),
34}
35
36impl std::error::Error for ParseError {
37    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
38        match self {
39            ParseError::IoError(e) => Some(e),
40            _ => None,
41        }
42    }
43}
44
45impl fmt::Display for ParseError {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        fmt::Debug::fmt(&self, f)
48    }
49}
50
51impl From<io::Error> for ParseError {
52    fn from(e: io::Error) -> Self {
53        match e.kind() {
54            io::ErrorKind::UnexpectedEof => ParseError::UnexpectedEof,
55            _ => ParseError::IoError(e),
56        }
57    }
58}
59
60/// Reads a u8 from a byte stream.
61fn read_u8<R: Read>(r: &mut R) -> io::Result<u8> {
62    let mut buf = [0; 1];
63    r.read_exact(&mut buf)?;
64    Ok(buf[0])
65}
66
67/// Reads a u16 from a byte stream.
68fn read_u16<R: Read>(r: &mut R) -> io::Result<u16> {
69    let mut buf = [0; 2];
70    r.read_exact(&mut buf)?;
71    Ok(u16::from_be_bytes(buf))
72}
73
74/// Reads a u32 from a byte stream.
75fn read_u32<R: Read>(r: &mut R) -> io::Result<u32> {
76    let mut buf = [0; 4];
77    r.read_exact(&mut buf)?;
78    Ok(u32::from_be_bytes(buf))
79}
80
81/// Reads a u64 from a byte stream.
82fn read_u64<R: Read>(r: &mut R) -> io::Result<u64> {
83    let mut buf = [0; 8];
84    r.read_exact(&mut buf)?;
85    Ok(u64::from_be_bytes(buf))
86}
87
88fn parse_u64_varint(mut input: &[u8]) -> Result<(u64, &[u8]), ParseError> {
89    let mut value: u64 = 0;
90    let mut shift: u32 = 0;
91
92    loop {
93        let byte = input[0];
94        input = &input[1..];
95
96        let bits = (byte & 0x7F) as u64;
97        value |= bits << shift;
98
99        if (byte & 0x80) == 0 {
100            break;
101        }
102
103        shift += 7;
104        if shift >= 64 {
105            return Err(ParseError::InvalidVarint);
106        }
107    }
108
109    Ok((value, input))
110}
111
112/// Reads `len` number of bytes from a byte stream.
113fn read_bytes<R: Read>(r: &mut R, len: usize) -> result::Result<Vec<u8>, ParseError> {
114    // Limit up-front allocations to 16KiB as the length is user controlled.
115    let mut buf = Vec::with_capacity(len.min(16 * 1024));
116    r.take(len as u64).read_to_end(&mut buf)?;
117    if buf.len() != len {
118        return Err(ParseError::UnexpectedEof);
119    }
120    Ok(buf)
121}
122
123/// Reads a cid from a stream of cbor encoded bytes.
124fn read_link<R: Read>(r: &mut R) -> Result<(u64, Hash), ParseError> {
125    let ty = read_u8(r)?;
126    if ty != 0x58 {
127        return Err(ParseError::UnknownTag(ty));
128    }
129    let len = read_u8(r)?;
130    if len == 0 {
131        return Err(ParseError::LengthOutOfRange);
132    }
133    let bytes = read_bytes(r, len as usize)?;
134    if bytes[0] != 0 {
135        return Err(ParseError::InvalidCidPrefix(bytes[0]));
136    }
137
138    if bytes.len() < 32 {
139        return Err(ParseError::LengthOutOfRange);
140    }
141    // check that version is 1
142    // skip the first byte per
143    // https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-cbor.md#links
144    let (version_header, rest) = bytes.split_at(2);
145    if version_header != [0, 1] {
146        return Err(ParseError::InvalidCidVersion);
147    }
148    let (codec, rest) = parse_u64_varint(rest)?;
149    // check that hash code is 0x1e (blake3) and length is 32
150    let (mh_header, rest) = rest.split_at(2);
151    if mh_header != [0x1e, 0x20] {
152        return Err(ParseError::InvalidHashAlgorithm);
153    }
154    if rest.len() != 32 {
155        return Err(ParseError::InvalidHashLength);
156    }
157    let bytes = <[u8; 32]>::try_from(rest).unwrap();
158    Ok((codec, bytes))
159}
160
161/// Reads the len given a base.
162fn read_len<R: Read + Seek>(r: &mut R, major: u8) -> Result<usize, ParseError> {
163    Ok(match major {
164        0x00..=0x17 => major as usize,
165        0x18 => read_u8(r)? as usize,
166        0x19 => read_u16(r)? as usize,
167        0x1a => read_u32(r)? as usize,
168        0x1b => {
169            let len = read_u64(r)?;
170            if len > usize::max_value() as u64 {
171                return Err(ParseError::LengthOutOfRange);
172            }
173            len as usize
174        }
175        major => return Err(ParseError::UnexpectedCode(major)),
176    })
177}
178
179/// Read a dag-cbor block and extract all the links.
180///
181/// 'r' is a reader that is expected to be at the start of a dag-cbor block.
182/// 'res' is a vector that will be populated with all the links found.
183///
184/// Will fail unless all links are blake3 hashes.
185pub fn references<R: Read + Seek>(r: &mut R, res: &mut Vec<(u64, Hash)>) -> Result<(), ParseError> {
186    let major = read_u8(r)?;
187    match major {
188        // Major type 0: an unsigned integer
189        0x00..=0x17 => {}
190        0x18 => {
191            r.seek(SeekFrom::Current(1))?;
192        }
193        0x19 => {
194            r.seek(SeekFrom::Current(2))?;
195        }
196        0x1a => {
197            r.seek(SeekFrom::Current(4))?;
198        }
199        0x1b => {
200            r.seek(SeekFrom::Current(8))?;
201        }
202
203        // Major type 1: a negative integer
204        0x20..=0x37 => {}
205        0x38 => {
206            r.seek(SeekFrom::Current(1))?;
207        }
208        0x39 => {
209            r.seek(SeekFrom::Current(2))?;
210        }
211        0x3a => {
212            r.seek(SeekFrom::Current(4))?;
213        }
214        0x3b => {
215            r.seek(SeekFrom::Current(8))?;
216        }
217
218        // Major type 2: a byte string
219        0x40..=0x5b => {
220            let len = read_len(r, major - 0x40)?;
221            r.seek(SeekFrom::Current(len as _))?;
222        }
223
224        // Major type 3: a text string
225        0x60..=0x7b => {
226            let len = read_len(r, major - 0x60)?;
227            r.seek(SeekFrom::Current(len as _))?;
228        }
229
230        // Major type 4: an array of data items
231        0x80..=0x9b => {
232            let len = read_len(r, major - 0x80)?;
233            for _ in 0..len {
234                references(r, res)?;
235            }
236        }
237
238        // Major type 4: an array of data items (indefinite length)
239        0x9f => loop {
240            let major = read_u8(r)?;
241            if major == 0xff {
242                break;
243            }
244            r.seek(SeekFrom::Current(-1))?;
245            references(r, res)?;
246        },
247
248        // Major type 5: a map of pairs of data items
249        0xa0..=0xbb => {
250            let len = read_len(r, major - 0xa0)?;
251            for _ in 0..len {
252                references(r, res)?;
253                references(r, res)?;
254            }
255        }
256
257        // Major type 5: a map of pairs of data items (indefinite length)
258        0xbf => loop {
259            let major = read_u8(r)?;
260            if major == 0xff {
261                break;
262            }
263            r.seek(SeekFrom::Current(-1))?;
264            references(r, res)?;
265            references(r, res)?;
266        },
267
268        // Major type 6: optional semantic tagging of other major types
269        0xd8 => {
270            let tag = read_u8(r)?;
271            if tag == 42 {
272                res.push(read_link(r)?);
273            } else {
274                references(r, res)?;
275            }
276        }
277
278        // Major type 7: floating-point numbers and other simple data types that need no content
279        0xf4..=0xf7 => {}
280        0xf8 => {
281            r.seek(SeekFrom::Current(1))?;
282        }
283        0xf9 => {
284            r.seek(SeekFrom::Current(2))?;
285        }
286        0xfa => {
287            r.seek(SeekFrom::Current(4))?;
288        }
289        0xfb => {
290            r.seek(SeekFrom::Current(8))?;
291        }
292        major => return Err(ParseError::UnexpectedCode(major)),
293    };
294    Ok(())
295}
296
297#[cfg(test)]
298mod tests {
299    use std::io::Cursor;
300
301    use super::references;
302
303    fn bytes(s: &str) -> Vec<u8> {
304        hex::decode(s.chars().filter(|c| !c.is_whitespace()).collect::<String>()).unwrap()
305    }
306
307    #[test]
308    fn references1() {
309        let data = vec![
310            bytes(
311                r"
312            6ffbd8e415444b5940d6fefacf64b922ad80b95debce812931745ad9b59b
313            2565ea08b46db6da5052d6878c074d4f3e705d1a8456d1ae934b38b62e43
314            6e413fbefb2284a5d628e2cf951722c04ff19ff217fcf0360fb8d27b55c0
315            abe378984e0d07beeb964f9f4016408fa0c66b9bf445b53343be521290b9
316            985e30d65c2116b852ab3414d65d6400dc4112ed278f83efc35e59a37b3e
317            b62736dee6a752c331d78f176da7f1ad9bb5ed",
318            ),
319            bytes(
320                r"
321            a564747970656c776e66732f7075622f6469726776657273696f6e65302e
322            322e30686d65746164617461a267637265617465641a643eddeb686d6f64
323            69666965641a643eddeb6870726576696f757381d82a58250001711e2045
324            c910e86e64f78a99dde9232e5978de40823eaa42732ff7a3814983d6969e
325            7368757365726c616e64a16474657374d82a58250001711e2082a8fc238c
326            9a05e2351f8ceaa4e5af2cdb39a895f6e929827a2614e61239d47c",
327            ),
328        ];
329        for data in data {
330            let mut links = Vec::new();
331            references(&mut Cursor::new(&data), &mut links).unwrap();
332            println!("{:?}", links);
333        }
334    }
335}