libipld_cbor/
decode.rs

1//! CBOR decoder
2use crate::cbor::{Major, MajorKind, F32, F64, FALSE, NULL, TRUE};
3use crate::error::{
4    DuplicateKey, InvalidCidPrefix, LengthOutOfRange, NumberNotMinimal, NumberOutOfRange,
5    UnexpectedCode, UnexpectedEof, UnknownTag,
6};
7use crate::DagCborCodec as DagCbor;
8use byteorder::{BigEndian, ByteOrder};
9use core::convert::TryFrom;
10use libipld_core::codec::{Decode, References};
11use libipld_core::error::Result;
12use libipld_core::ipld::Ipld;
13use libipld_core::{cid::Cid, raw_value::SkipOne};
14use std::collections::BTreeMap;
15use std::io::{Read, Seek, SeekFrom};
16use std::sync::Arc;
17
18/// Reads a u8 from a byte stream.
19pub fn read_u8<R: Read>(r: &mut R) -> Result<u8> {
20    let mut buf = [0; 1];
21    r.read_exact(&mut buf)?;
22    Ok(buf[0])
23}
24
25/// Reads a u16 from a byte stream.
26pub fn read_u16<R: Read>(r: &mut R) -> Result<u16> {
27    let mut buf = [0; 2];
28    r.read_exact(&mut buf)?;
29    Ok(BigEndian::read_u16(&buf))
30}
31
32/// Reads a u32 from a byte stream.
33pub fn read_u32<R: Read>(r: &mut R) -> Result<u32> {
34    let mut buf = [0; 4];
35    r.read_exact(&mut buf)?;
36    Ok(BigEndian::read_u32(&buf))
37}
38
39/// Reads a u64 from a byte stream.
40pub fn read_u64<R: Read>(r: &mut R) -> Result<u64> {
41    let mut buf = [0; 8];
42    r.read_exact(&mut buf)?;
43    Ok(BigEndian::read_u64(&buf))
44}
45
46/// Reads a f32 from a byte stream.
47pub fn read_f32<R: Read>(r: &mut R) -> Result<f32> {
48    let mut buf = [0; 4];
49    r.read_exact(&mut buf)?;
50    Ok(BigEndian::read_f32(&buf))
51}
52
53/// Reads a f64 from a byte stream.
54pub fn read_f64<R: Read>(r: &mut R) -> Result<f64> {
55    let mut buf = [0; 8];
56    r.read_exact(&mut buf)?;
57    Ok(BigEndian::read_f64(&buf))
58}
59
60/// Reads `len` number of bytes from a byte stream.
61pub fn read_bytes<R: Read>(r: &mut R, len: u64) -> Result<Vec<u8>> {
62    let len = usize::try_from(len).map_err(|_| LengthOutOfRange::new::<usize>())?;
63    // Limit up-front allocations to 16KiB as the length is user controlled.
64    let mut buf = Vec::with_capacity(len.min(16 * 1024));
65    r.take(len as u64).read_to_end(&mut buf)?;
66    if buf.len() != len {
67        return Err(UnexpectedEof.into());
68    }
69    Ok(buf)
70}
71
72/// Reads `len` number of bytes from a byte stream and converts them to a string.
73pub fn read_str<R: Read>(r: &mut R, len: u64) -> Result<String> {
74    let bytes = read_bytes(r, len)?;
75    Ok(String::from_utf8(bytes)?)
76}
77
78/// Reads a list of any type that implements `TryReadCbor` from a stream of cbor encoded bytes.
79pub fn read_list<R: Read + Seek, T: Decode<DagCbor>>(r: &mut R, len: u64) -> Result<Vec<T>> {
80    let len = usize::try_from(len).map_err(|_| LengthOutOfRange::new::<usize>())?;
81    // Limit up-front allocations to 16KiB as the length is user controlled.
82    //
83    // Can't make this "const" because the generic, but it _should_ be known at compile time.
84    let max_alloc = (16 * 1024) / std::mem::size_of::<T>();
85
86    let mut list: Vec<T> = Vec::with_capacity(len.min(max_alloc));
87    for _ in 0..len {
88        list.push(T::decode(DagCbor, r)?);
89    }
90    Ok(list)
91}
92
93/// Reads a map of any type that implements `TryReadCbor` from a stream of cbor encoded bytes.
94pub fn read_map<R: Read + Seek, K: Decode<DagCbor> + Ord, T: Decode<DagCbor>>(
95    r: &mut R,
96    len: u64,
97) -> Result<BTreeMap<K, T>> {
98    let len = usize::try_from(len).map_err(|_| LengthOutOfRange::new::<usize>())?;
99    let mut map: BTreeMap<K, T> = BTreeMap::new();
100    for _ in 0..len {
101        let key = K::decode(DagCbor, r)?;
102        let value = T::decode(DagCbor, r)?;
103        let prev_value = map.insert(key, value);
104        if prev_value.is_some() {
105            return Err(DuplicateKey.into());
106        }
107    }
108    Ok(map)
109}
110
111/// Reads a cid from a stream of cbor encoded bytes.
112pub fn read_link<R: Read + Seek>(r: &mut R) -> Result<Cid> {
113    let major = read_major(r)?;
114    if major.kind() != MajorKind::ByteString {
115        return Err(UnexpectedCode::new::<Cid>(major.into()).into());
116    }
117    let len = read_uint(r, major)?;
118    if len < 1 {
119        return Err(LengthOutOfRange::new::<Cid>().into());
120    }
121
122    let mut r = r.take(len);
123
124    // skip the first byte per
125    // https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-cbor.md#links
126    let prefix = read_u8(&mut r)?;
127    if prefix != 0 {
128        return Err(InvalidCidPrefix(prefix).into());
129    }
130
131    // Read the CID. No need to limit the size, the CID will do this for us.
132    let cid = Cid::read_bytes(&mut r)?;
133
134    // Make sure we've read the entire CID.
135    if r.read(&mut [0u8][..])? != 0 {
136        return Err(LengthOutOfRange::new::<Cid>().into());
137    }
138
139    Ok(cid)
140}
141
142/// Read a and validate major "byte". This includes both the major type and the additional info.
143pub fn read_major<R: Read>(r: &mut R) -> Result<Major> {
144    Ok(Major::try_from(read_u8(r)?)?)
145}
146
147/// Read the uint argument to the given major type. This function errors if:
148/// 1. The major type doesn't expect an integer argument.
149/// 2. The integer argument is not "minimally" encoded per the IPLD spec.
150pub fn read_uint<R: Read>(r: &mut R, major: Major) -> Result<u64> {
151    const MAX_SHORT: u64 = 23;
152    const MAX_1BYTE: u64 = u8::MAX as u64;
153    const MAX_2BYTE: u64 = u16::MAX as u64;
154    const MAX_4BYTE: u64 = u32::MAX as u64;
155    if major.kind() == MajorKind::Other {
156        return Err(UnexpectedCode::new::<u64>(major.into()).into());
157    }
158    match major.info() {
159        value @ 0..=23 => Ok(value as u64),
160        24 => match read_u8(r)? as u64 {
161            0..=MAX_SHORT => Err(NumberNotMinimal.into()),
162            value => Ok(value),
163        },
164        25 => match read_u16(r)? as u64 {
165            0..=MAX_1BYTE => Err(NumberNotMinimal.into()),
166            value => Ok(value),
167        },
168        26 => match read_u32(r)? as u64 {
169            0..=MAX_2BYTE => Err(NumberNotMinimal.into()),
170            value => Ok(value),
171        },
172        27 => match read_u64(r)? {
173            0..=MAX_4BYTE => Err(NumberNotMinimal.into()),
174            value => Ok(value),
175        },
176        _ => Err(UnexpectedCode::new::<u64>(major.into()).into()),
177    }
178}
179
180impl Decode<DagCbor> for bool {
181    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
182        Ok(match read_major(r)? {
183            FALSE => false,
184            TRUE => true,
185            m => return Err(UnexpectedCode::new::<Self>(m.into()).into()),
186        })
187    }
188}
189
190macro_rules! impl_num {
191    (unsigned $($t:ty),*) => {
192        $(
193            impl Decode<DagCbor> for $t {
194                fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
195                    let major = read_major(r)?;
196                    if major.kind() != MajorKind::UnsignedInt {
197                        return Err(UnexpectedCode::new::<Self>(major.into()).into());
198                    }
199                    let value = read_uint(r, major)?;
200                    Self::try_from(value).map_err(|_| NumberOutOfRange::new::<Self>().into())
201                }
202            }
203        )*
204    };
205    (signed $($t:ty),*) => {
206        $(
207            impl Decode<DagCbor> for $t {
208                fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
209                    let major = read_major(r)?;
210                    let value = read_uint(r, major)?;
211                    match major.kind() {
212                        MajorKind::UnsignedInt | MajorKind::NegativeInt => (),
213                        _ => return Err(UnexpectedCode::new::<Self>(major.into()).into()),
214                    };
215
216                    let mut value = Self::try_from(value)
217                        .map_err(|_| NumberOutOfRange::new::<Self>())?;
218                    if major.kind() == MajorKind::NegativeInt {
219                        // This is guaranteed to not overflow.
220                        value = -1 - value;
221                    }
222                    Ok(value)
223                }
224            }
225        )*
226    };
227}
228
229impl_num!(unsigned u8, u16, u32, u64, u128);
230impl_num!(signed i8, i16, i32, i64, i128);
231
232impl Decode<DagCbor> for f32 {
233    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
234        // TODO: We don't accept f16
235        // TODO: By IPLD spec, we shouldn't accept f32 either...
236        let num = match read_major(r)? {
237            F32 => read_f32(r)?,
238            F64 => {
239                let num = read_f64(r)?;
240                let converted = num as Self;
241                if f64::from(converted) != num {
242                    return Err(NumberOutOfRange::new::<Self>().into());
243                }
244                converted
245            }
246            m => return Err(UnexpectedCode::new::<Self>(m.into()).into()),
247        };
248        if !num.is_finite() {
249            return Err(NumberOutOfRange::new::<Self>().into());
250        }
251        Ok(num)
252    }
253}
254
255impl Decode<DagCbor> for f64 {
256    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
257        // TODO: We don't accept f16
258        // TODO: By IPLD spec, we shouldn't accept f32 either...
259        let num = match read_major(r)? {
260            F32 => read_f32(r)?.into(),
261            F64 => read_f64(r)?,
262            m => return Err(UnexpectedCode::new::<Self>(m.into()).into()),
263        };
264        // This is by IPLD spec, but is it widely used?
265        if !num.is_finite() {
266            return Err(NumberOutOfRange::new::<Self>().into());
267        }
268        Ok(num)
269    }
270}
271
272impl Decode<DagCbor> for String {
273    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
274        let major = read_major(r)?;
275        if major.kind() != MajorKind::TextString {
276            return Err(UnexpectedCode::new::<Self>(major.into()).into());
277        }
278        let len = read_uint(r, major)?;
279        read_str(r, len)
280    }
281}
282
283impl Decode<DagCbor> for Cid {
284    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
285        let major = read_major(r)?;
286        if major.kind() == MajorKind::Tag {
287            match read_uint(r, major)? {
288                42 => read_link(r),
289                tag => Err(UnknownTag(tag).into()),
290            }
291        } else {
292            Err(UnexpectedCode::new::<Self>(major.into()).into())
293        }
294    }
295}
296
297impl Decode<DagCbor> for Box<[u8]> {
298    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
299        let major = read_major(r)?;
300        if major.kind() != MajorKind::ByteString {
301            return Err(UnexpectedCode::new::<Self>(major.into()).into());
302        }
303        let len = read_uint(r, major)?;
304        Ok(read_bytes(r, len)?.into_boxed_slice())
305    }
306}
307
308impl<T: Decode<DagCbor>> Decode<DagCbor> for Option<T> {
309    fn decode<R: Read + Seek>(c: DagCbor, r: &mut R) -> Result<Self> {
310        let result = match read_major(r)? {
311            NULL => None,
312            _ => {
313                r.seek(SeekFrom::Current(-1))?;
314                Some(T::decode(c, r)?)
315            }
316        };
317        Ok(result)
318    }
319}
320
321impl<T: Decode<DagCbor>> Decode<DagCbor> for Vec<T> {
322    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
323        let major = read_major(r)?;
324        if major.kind() != MajorKind::Array {
325            return Err(UnexpectedCode::new::<Self>(major.into()).into());
326        }
327        let len = read_uint(r, major)?;
328        read_list(r, len)
329    }
330}
331
332impl<K: Decode<DagCbor> + Ord, T: Decode<DagCbor>> Decode<DagCbor> for BTreeMap<K, T> {
333    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
334        let major = read_major(r)?;
335        if major.kind() != MajorKind::Map {
336            return Err(UnexpectedCode::new::<Self>(major.into()).into());
337        }
338
339        let len = read_uint(r, major)?;
340        read_map(r, len)
341    }
342}
343
344impl Decode<DagCbor> for Ipld {
345    fn decode<R: Read + Seek>(_: DagCbor, r: &mut R) -> Result<Self> {
346        let major = read_major(r)?;
347        let ipld = match major.kind() {
348            MajorKind::UnsignedInt => Self::Integer(read_uint(r, major)? as i128),
349            MajorKind::NegativeInt => Self::Integer(-1 - read_uint(r, major)? as i128),
350            MajorKind::ByteString => {
351                let len = read_uint(r, major)?;
352                Self::Bytes(read_bytes(r, len)?)
353            }
354            MajorKind::TextString => {
355                let len = read_uint(r, major)?;
356                Self::String(read_str(r, len)?)
357            }
358            MajorKind::Array => {
359                let len = read_uint(r, major)?;
360                Self::List(read_list(r, len)?)
361            }
362            MajorKind::Map => {
363                let len = read_uint(r, major)?;
364                Self::Map(read_map(r, len)?)
365            }
366            MajorKind::Tag => {
367                let value = read_uint(r, major)?;
368                if value == 42 {
369                    Self::Link(read_link(r)?)
370                } else {
371                    return Err(UnknownTag(value).into());
372                }
373            }
374            MajorKind::Other => match major {
375                FALSE => Self::Bool(false),
376                TRUE => Self::Bool(true),
377                NULL => Self::Null,
378                F32 => Self::Float(read_f32(r)? as f64),
379                F64 => Self::Float(read_f64(r)?),
380                m => return Err(UnexpectedCode::new::<Self>(m.into()).into()),
381            },
382        };
383        Ok(ipld)
384    }
385}
386
387impl References<DagCbor> for Ipld {
388    fn references<R: Read + Seek, E: Extend<Cid>>(
389        _: DagCbor,
390        r: &mut R,
391        set: &mut E,
392    ) -> Result<()> {
393        let mut remaining: u64 = 1;
394        while remaining > 0 {
395            remaining -= 1;
396            let major = read_major(r)?;
397            match major.kind() {
398                MajorKind::UnsignedInt | MajorKind::NegativeInt | MajorKind::Other => {
399                    // TODO: validate ints & floats?
400                    r.seek(SeekFrom::Current(major.len() as i64))?;
401                }
402                MajorKind::ByteString | MajorKind::TextString => {
403                    // TODO: validate utf8?
404                    // We could just reject this case, but we can't just play it fast and loose and
405                    // wrap. We might as well just try to seek (and likely fail).
406                    let mut offset = read_uint(r, major)?;
407                    while offset > i64::MAX as u64 {
408                        r.seek(SeekFrom::Current(i64::MAX))?;
409                        offset -= i64::MAX as u64;
410                    }
411                    r.seek(SeekFrom::Current(offset as i64))?;
412                }
413                MajorKind::Array => {
414                    remaining = remaining
415                        .checked_add(read_uint(r, major)?)
416                        .ok_or_else(LengthOutOfRange::new::<Self>)?;
417                }
418                MajorKind::Map => {
419                    // TODO: consider using a checked "monad" type to simplify.
420                    let items = read_uint(r, major)?
421                        .checked_mul(2)
422                        .ok_or_else(LengthOutOfRange::new::<Self>)?;
423                    remaining = remaining
424                        .checked_add(items)
425                        .ok_or_else(LengthOutOfRange::new::<Self>)?;
426                }
427                MajorKind::Tag => match read_uint(r, major)? {
428                    42 => set.extend(std::iter::once(read_link(r)?)),
429                    _ => {
430                        remaining = remaining
431                            .checked_add(1)
432                            .ok_or_else(LengthOutOfRange::new::<Self>)?;
433                    }
434                },
435            };
436        }
437        Ok(())
438    }
439}
440
441impl<T: Decode<DagCbor>> Decode<DagCbor> for Arc<T> {
442    fn decode<R: Read + Seek>(c: DagCbor, r: &mut R) -> Result<Self> {
443        Ok(Arc::new(T::decode(c, r)?))
444    }
445}
446
447impl Decode<DagCbor> for () {
448    fn decode<R: Read + Seek>(_c: DagCbor, r: &mut R) -> Result<Self> {
449        let major = read_u8(r)?;
450        match major {
451            0x80 => {}
452            _ => {
453                return Err(UnexpectedCode::new::<Self>(major).into());
454            }
455        };
456        Ok(())
457    }
458}
459
460impl<A: Decode<DagCbor>> Decode<DagCbor> for (A,) {
461    fn decode<R: Read + Seek>(c: DagCbor, r: &mut R) -> Result<Self> {
462        let major = read_u8(r)?;
463        let result = match major {
464            0x81 => (A::decode(c, r)?,),
465            _ => {
466                return Err(UnexpectedCode::new::<Self>(major).into());
467            }
468        };
469        Ok(result)
470    }
471}
472
473impl<A: Decode<DagCbor>, B: Decode<DagCbor>> Decode<DagCbor> for (A, B) {
474    fn decode<R: Read + Seek>(c: DagCbor, r: &mut R) -> Result<Self> {
475        let major = read_u8(r)?;
476        let result = match major {
477            0x82 => (A::decode(c, r)?, B::decode(c, r)?),
478            _ => {
479                return Err(UnexpectedCode::new::<Self>(major).into());
480            }
481        };
482        Ok(result)
483    }
484}
485
486impl<A: Decode<DagCbor>, B: Decode<DagCbor>, C: Decode<DagCbor>> Decode<DagCbor> for (A, B, C) {
487    fn decode<R: Read + Seek>(c: DagCbor, r: &mut R) -> Result<Self> {
488        let major = read_u8(r)?;
489        let result = match major {
490            0x83 => (A::decode(c, r)?, B::decode(c, r)?, C::decode(c, r)?),
491            _ => {
492                return Err(UnexpectedCode::new::<Self>(major).into());
493            }
494        };
495        Ok(result)
496    }
497}
498
499impl<A: Decode<DagCbor>, B: Decode<DagCbor>, C: Decode<DagCbor>, D: Decode<DagCbor>> Decode<DagCbor>
500    for (A, B, C, D)
501{
502    fn decode<R: Read + Seek>(c: DagCbor, r: &mut R) -> Result<Self> {
503        let major = read_u8(r)?;
504        let result = match major {
505            0x84 => (
506                A::decode(c, r)?,
507                B::decode(c, r)?,
508                C::decode(c, r)?,
509                D::decode(c, r)?,
510            ),
511            _ => {
512                return Err(UnexpectedCode::new::<Self>(major).into());
513            }
514        };
515        Ok(result)
516    }
517}
518
519impl SkipOne for DagCbor {
520    fn skip<R: Read + Seek>(&self, r: &mut R) -> Result<()> {
521        let mut remaining: u64 = 1;
522        while remaining > 0 {
523            remaining -= 1;
524            let major = read_major(r)?;
525            match major.kind() {
526                MajorKind::UnsignedInt | MajorKind::NegativeInt | MajorKind::Other => {
527                    // TODO: validate?
528                    // minimal integer, valid float, etc?
529                    r.seek(SeekFrom::Current(major.len() as i64))?;
530                }
531                MajorKind::ByteString | MajorKind::TextString => {
532                    // We could just reject this case, but we can't just play it fast and loose and
533                    // wrap. We might as well just try to seek (and likely fail).
534                    let mut offset = read_uint(r, major)?;
535                    while offset > i64::MAX as u64 {
536                        r.seek(SeekFrom::Current(i64::MAX))?;
537                        offset -= i64::MAX as u64;
538                    }
539                    // TODO: validate utf8?
540                    r.seek(SeekFrom::Current(offset as i64))?;
541                }
542                MajorKind::Array => {
543                    remaining = remaining
544                        .checked_add(read_uint(r, major)?)
545                        .ok_or_else(LengthOutOfRange::new::<Self>)?;
546                }
547                MajorKind::Map => {
548                    // TODO: consider using a checked "monad" type to simplify.
549                    let items = read_uint(r, major)?
550                        .checked_mul(2)
551                        .ok_or_else(LengthOutOfRange::new::<Self>)?;
552                    remaining = remaining
553                        .checked_add(items)
554                        .ok_or_else(LengthOutOfRange::new::<Self>)?;
555                }
556                MajorKind::Tag => {
557                    // TODO: validate tag?
558                    r.seek(SeekFrom::Current(major.len() as i64))?;
559                    remaining = remaining
560                        .checked_add(1)
561                        .ok_or_else(LengthOutOfRange::new::<Self>)?;
562                }
563            };
564        }
565        Ok(())
566    }
567}
568
569#[cfg(test)]
570mod tests {
571    use super::*;
572    use crate::{error::UnexpectedEof, DagCborCodec};
573    use libipld_core::codec::Codec;
574
575    #[test]
576    fn il_map() {
577        let bytes = [
578            0xBF, // Start indefinite-length map
579            0x63, // First key, UTF-8 string length 3
580            0x46, 0x75, 0x6e, // "Fun"
581            0xF5, // First value, true
582            0x63, // Second key, UTF-8 string length 3
583            0x41, 0x6d, 0x74, // "Amt"
584            0x21, // Second value, -2
585            0xFF, // "break"
586        ];
587        DagCborCodec
588            .decode::<Ipld>(&bytes)
589            .expect_err("should have failed to decode indefinit length map");
590    }
591
592    #[test]
593    fn bad_list() {
594        let bytes = [
595            0x5b, // Byte string with an 8 byte length
596            0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // very long
597            0x01, // but only one byte.
598        ];
599        DagCborCodec
600            .decode::<Ipld>(&bytes)
601            .expect_err("decoding large truncated buffer should have failed")
602            .downcast::<UnexpectedEof>()
603            .expect("expected an unexpected eof");
604    }
605
606    #[test]
607    #[allow(clippy::let_unit_value)]
608    fn tuples() -> Result<()> {
609        let data = ();
610        let bytes = DagCborCodec.encode(&data)?;
611        let _data2: () = DagCborCodec.decode(&bytes)?;
612
613        let data = ("hello".to_string(),);
614        let bytes = DagCborCodec.encode(&data)?;
615        let data2: (String,) = DagCborCodec.decode(&bytes)?;
616        assert_eq!(data, data2);
617
618        let data = ("hello".to_string(), "world".to_string());
619        let bytes = DagCborCodec.encode(&data)?;
620        let data2: (String, String) = DagCborCodec.decode(&bytes)?;
621        assert_eq!(data, data2);
622
623        let data = ("hello".to_string(), "world".to_string(), 42);
624        let bytes = DagCborCodec.encode(&data)?;
625        let data2: (String, String, u32) = DagCborCodec.decode(&bytes)?;
626        assert_eq!(data, data2);
627
628        let data = ("hello".to_string(), "world".to_string(), 42, 64);
629        let bytes = DagCborCodec.encode(&data)?;
630        let data2: (String, String, u32, u8) = DagCborCodec.decode(&bytes)?;
631        assert_eq!(data, data2);
632
633        Ok(())
634    }
635}