dag_cbor_references/
lib.rs1use std::{
3 fmt,
4 io::{self, Read, Seek, SeekFrom},
5 result,
6};
7
8pub type Hash = [u8; 32];
10
11#[derive(Debug)]
13pub enum ParseError {
14 UnexpectedEof,
16 UnexpectedCode(u8),
18 UnknownTag(u8),
20 InvalidCidPrefix(u8),
22 LengthOutOfRange,
24 InvalidVarint,
26 InvalidCidVersion,
28 InvalidHashAlgorithm,
30 InvalidHashLength,
32 IoError(io::Error),
34}
35
36impl std::error::Error for ParseError {
37 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
38 match self {
39 ParseError::IoError(e) => Some(e),
40 _ => None,
41 }
42 }
43}
44
45impl fmt::Display for ParseError {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 fmt::Debug::fmt(&self, f)
48 }
49}
50
51impl From<io::Error> for ParseError {
52 fn from(e: io::Error) -> Self {
53 match e.kind() {
54 io::ErrorKind::UnexpectedEof => ParseError::UnexpectedEof,
55 _ => ParseError::IoError(e),
56 }
57 }
58}
59
60fn read_u8<R: Read>(r: &mut R) -> io::Result<u8> {
62 let mut buf = [0; 1];
63 r.read_exact(&mut buf)?;
64 Ok(buf[0])
65}
66
67fn read_u16<R: Read>(r: &mut R) -> io::Result<u16> {
69 let mut buf = [0; 2];
70 r.read_exact(&mut buf)?;
71 Ok(u16::from_be_bytes(buf))
72}
73
74fn read_u32<R: Read>(r: &mut R) -> io::Result<u32> {
76 let mut buf = [0; 4];
77 r.read_exact(&mut buf)?;
78 Ok(u32::from_be_bytes(buf))
79}
80
81fn read_u64<R: Read>(r: &mut R) -> io::Result<u64> {
83 let mut buf = [0; 8];
84 r.read_exact(&mut buf)?;
85 Ok(u64::from_be_bytes(buf))
86}
87
88fn parse_u64_varint(mut input: &[u8]) -> Result<(u64, &[u8]), ParseError> {
89 let mut value: u64 = 0;
90 let mut shift: u32 = 0;
91
92 loop {
93 let byte = input[0];
94 input = &input[1..];
95
96 let bits = (byte & 0x7F) as u64;
97 value |= bits << shift;
98
99 if (byte & 0x80) == 0 {
100 break;
101 }
102
103 shift += 7;
104 if shift >= 64 {
105 return Err(ParseError::InvalidVarint);
106 }
107 }
108
109 Ok((value, input))
110}
111
112fn read_bytes<R: Read>(r: &mut R, len: usize) -> result::Result<Vec<u8>, ParseError> {
114 let mut buf = Vec::with_capacity(len.min(16 * 1024));
116 r.take(len as u64).read_to_end(&mut buf)?;
117 if buf.len() != len {
118 return Err(ParseError::UnexpectedEof);
119 }
120 Ok(buf)
121}
122
123fn read_link<R: Read>(r: &mut R) -> Result<(u64, Hash), ParseError> {
125 let ty = read_u8(r)?;
126 if ty != 0x58 {
127 return Err(ParseError::UnknownTag(ty));
128 }
129 let len = read_u8(r)?;
130 if len == 0 {
131 return Err(ParseError::LengthOutOfRange);
132 }
133 let bytes = read_bytes(r, len as usize)?;
134 if bytes[0] != 0 {
135 return Err(ParseError::InvalidCidPrefix(bytes[0]));
136 }
137
138 if bytes.len() < 32 {
139 return Err(ParseError::LengthOutOfRange);
140 }
141 let (version_header, rest) = bytes.split_at(2);
145 if version_header != [0, 1] {
146 return Err(ParseError::InvalidCidVersion);
147 }
148 let (codec, rest) = parse_u64_varint(rest)?;
149 let (mh_header, rest) = rest.split_at(2);
151 if mh_header != [0x1e, 0x20] {
152 return Err(ParseError::InvalidHashAlgorithm);
153 }
154 if rest.len() != 32 {
155 return Err(ParseError::InvalidHashLength);
156 }
157 let bytes = <[u8; 32]>::try_from(rest).unwrap();
158 Ok((codec, bytes))
159}
160
161fn read_len<R: Read + Seek>(r: &mut R, major: u8) -> Result<usize, ParseError> {
163 Ok(match major {
164 0x00..=0x17 => major as usize,
165 0x18 => read_u8(r)? as usize,
166 0x19 => read_u16(r)? as usize,
167 0x1a => read_u32(r)? as usize,
168 0x1b => {
169 let len = read_u64(r)?;
170 if len > usize::max_value() as u64 {
171 return Err(ParseError::LengthOutOfRange);
172 }
173 len as usize
174 }
175 major => return Err(ParseError::UnexpectedCode(major)),
176 })
177}
178
179pub fn references<R: Read + Seek>(r: &mut R, res: &mut Vec<(u64, Hash)>) -> Result<(), ParseError> {
186 let major = read_u8(r)?;
187 match major {
188 0x00..=0x17 => {}
190 0x18 => {
191 r.seek(SeekFrom::Current(1))?;
192 }
193 0x19 => {
194 r.seek(SeekFrom::Current(2))?;
195 }
196 0x1a => {
197 r.seek(SeekFrom::Current(4))?;
198 }
199 0x1b => {
200 r.seek(SeekFrom::Current(8))?;
201 }
202
203 0x20..=0x37 => {}
205 0x38 => {
206 r.seek(SeekFrom::Current(1))?;
207 }
208 0x39 => {
209 r.seek(SeekFrom::Current(2))?;
210 }
211 0x3a => {
212 r.seek(SeekFrom::Current(4))?;
213 }
214 0x3b => {
215 r.seek(SeekFrom::Current(8))?;
216 }
217
218 0x40..=0x5b => {
220 let len = read_len(r, major - 0x40)?;
221 r.seek(SeekFrom::Current(len as _))?;
222 }
223
224 0x60..=0x7b => {
226 let len = read_len(r, major - 0x60)?;
227 r.seek(SeekFrom::Current(len as _))?;
228 }
229
230 0x80..=0x9b => {
232 let len = read_len(r, major - 0x80)?;
233 for _ in 0..len {
234 references(r, res)?;
235 }
236 }
237
238 0x9f => loop {
240 let major = read_u8(r)?;
241 if major == 0xff {
242 break;
243 }
244 r.seek(SeekFrom::Current(-1))?;
245 references(r, res)?;
246 },
247
248 0xa0..=0xbb => {
250 let len = read_len(r, major - 0xa0)?;
251 for _ in 0..len {
252 references(r, res)?;
253 references(r, res)?;
254 }
255 }
256
257 0xbf => loop {
259 let major = read_u8(r)?;
260 if major == 0xff {
261 break;
262 }
263 r.seek(SeekFrom::Current(-1))?;
264 references(r, res)?;
265 references(r, res)?;
266 },
267
268 0xd8 => {
270 let tag = read_u8(r)?;
271 if tag == 42 {
272 res.push(read_link(r)?);
273 } else {
274 references(r, res)?;
275 }
276 }
277
278 0xf4..=0xf7 => {}
280 0xf8 => {
281 r.seek(SeekFrom::Current(1))?;
282 }
283 0xf9 => {
284 r.seek(SeekFrom::Current(2))?;
285 }
286 0xfa => {
287 r.seek(SeekFrom::Current(4))?;
288 }
289 0xfb => {
290 r.seek(SeekFrom::Current(8))?;
291 }
292 major => return Err(ParseError::UnexpectedCode(major)),
293 };
294 Ok(())
295}
296
297#[cfg(test)]
298mod tests {
299 use std::io::Cursor;
300
301 use super::references;
302
303 fn bytes(s: &str) -> Vec<u8> {
304 hex::decode(s.chars().filter(|c| !c.is_whitespace()).collect::<String>()).unwrap()
305 }
306
307 #[test]
308 fn references1() {
309 let data = vec![
310 bytes(
311 r"
312 6ffbd8e415444b5940d6fefacf64b922ad80b95debce812931745ad9b59b
313 2565ea08b46db6da5052d6878c074d4f3e705d1a8456d1ae934b38b62e43
314 6e413fbefb2284a5d628e2cf951722c04ff19ff217fcf0360fb8d27b55c0
315 abe378984e0d07beeb964f9f4016408fa0c66b9bf445b53343be521290b9
316 985e30d65c2116b852ab3414d65d6400dc4112ed278f83efc35e59a37b3e
317 b62736dee6a752c331d78f176da7f1ad9bb5ed",
318 ),
319 bytes(
320 r"
321 a564747970656c776e66732f7075622f6469726776657273696f6e65302e
322 322e30686d65746164617461a267637265617465641a643eddeb686d6f64
323 69666965641a643eddeb6870726576696f757381d82a58250001711e2045
324 c910e86e64f78a99dde9232e5978de40823eaa42732ff7a3814983d6969e
325 7368757365726c616e64a16474657374d82a58250001711e2082a8fc238c
326 9a05e2351f8ceaa4e5af2cdb39a895f6e929827a2614e61239d47c",
327 ),
328 ];
329 for data in data {
330 let mut links = Vec::new();
331 references(&mut Cursor::new(&data), &mut links).unwrap();
332 println!("{:?}", links);
333 }
334 }
335}