Skip to main content

cbor_core/
decode_options.rs

1use std::collections::BTreeMap;
2
3use crate::{
4    DataType, Error, Float, Format, IoResult, Result, SequenceDecoder, SequenceReader, SimpleValue, Value,
5    codec::{Argument, Head, Major},
6    io::{HexReader, HexSliceReader, MyReader, SliceReader},
7    limits,
8    parse::Parser,
9};
10
11/// Configuration for CBOR decoding.
12///
13/// `DecodeOptions` controls the input format ([`Binary`](Format::Binary),
14/// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic)) and the
15/// limits the decoder enforces against hostile or malformed input.
16/// Construct it with [`DecodeOptions::new`] (or `Default`), adjust
17/// settings with the builder methods, and call [`decode`](Self::decode)
18/// or [`read_from`](Self::read_from) for a single item, or
19/// [`sequence_decoder`](Self::sequence_decoder) / [`sequence_reader`](Self::sequence_reader)
20/// for a CBOR sequence.
21///
22/// The convenience methods on [`Value`] ([`decode`](Value::decode),
23/// [`decode_hex`](Value::decode_hex), [`read_from`](Value::read_from),
24/// [`read_hex_from`](Value::read_hex_from)) all forward to a default
25/// `DecodeOptions`. Use this type directly when you need to decode
26/// diagnostic notation, iterate a sequence, relax a limit for a known
27/// input, or tighten one for untrusted input.
28///
29/// # Options
30///
31/// | Option | Default | Purpose |
32/// |---|---|---|
33/// | [`format`](Self::format) | [`Binary`](Format::Binary) | Input syntax: binary, hex text, or diagnostic notation. |
34/// | [`recursion_limit`](Self::recursion_limit) | 200 | Maximum nesting depth of arrays, maps, and tags. |
35/// | [`length_limit`](Self::length_limit) | 1,000,000,000 | Maximum declared element count of a single array, map, byte string, or text string. |
36/// | [`oom_mitigation`](Self::oom_mitigation) | 100,000,000 | Byte budget for speculative pre-allocation. |
37///
38/// ## `recursion_limit`
39///
40/// Each array, map, or tag consumes one unit of recursion budget for
41/// its contents. Exceeding the limit returns [`Error::NestingTooDeep`].
42/// The limit protects against stack overflow on adversarial input and
43/// should be well below the stack a thread has available.
44///
45/// ## `length_limit`
46///
47/// Applies to the length field in the CBOR head of arrays, maps, byte
48/// strings, and text strings. It caps the declared size before any
49/// bytes are read, so a malicious header claiming a petabyte-long
50/// string is rejected immediately with [`Error::LengthTooLarge`]. The
51/// limit does not restrict total input size; a valid document may
52/// contain many items each up to the limit.
53///
54/// ## `oom_mitigation`
55///
56/// CBOR encodes lengths in the head, so a decoder is tempted to
57/// pre-allocate a `Vec` of the declared capacity. On hostile input
58/// that is a trivial amplification attack: a few bytes on the wire
59/// reserve gigabytes of memory. `oom_mitigation` is a byte budget,
60/// shared across the current decode, that caps the total amount of
61/// speculative capacity the decoder may reserve for array backing
62/// storage. Once the budget is exhausted, further arrays start empty
63/// and grow on demand. Decoding still succeeds if the input is
64/// well-formed; only the up-front reservation is bounded.
65///
66/// The budget is consumed, not refilled: a deeply nested structure
67/// with many small arrays can drain it early and decode the tail with
68/// zero pre-allocation. That is the intended behavior.
69///
70/// # Examples
71///
72/// Decode binary CBOR with default limits:
73///
74/// ```
75/// use cbor_core::DecodeOptions;
76///
77/// let v = DecodeOptions::new().decode([0x18, 42]).unwrap();
78/// assert_eq!(v.to_u32().unwrap(), 42);
79/// ```
80///
81/// Switch the input format to hex text or diagnostic notation:
82///
83/// ```
84/// use cbor_core::{DecodeOptions, Format};
85///
86/// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
87/// assert_eq!(v.to_u32().unwrap(), 42);
88///
89/// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
90/// assert_eq!(v.to_u32().unwrap(), 42);
91/// ```
92///
93/// Tighten limits for input from an untrusted source:
94///
95/// ```
96/// use cbor_core::DecodeOptions;
97///
98/// let strict = DecodeOptions::new()
99///     .recursion_limit(16)
100///     .length_limit(4096)
101///     .oom_mitigation(64 * 1024);
102///
103/// assert!(strict.decode([0x18, 42]).is_ok());
104/// ```
105#[derive(Debug, Clone)]
106pub struct DecodeOptions {
107    format: Format,
108    recursion_limit: u16,
109    length_limit: u64,
110    oom_mitigation: usize,
111}
112
113impl Default for DecodeOptions {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119impl DecodeOptions {
120    /// Create a new set of options with the crate defaults.
121    ///
122    /// ```
123    /// use cbor_core::DecodeOptions;
124    ///
125    /// let opts = DecodeOptions::new();
126    /// let v = opts.decode([0x18, 42]).unwrap();
127    /// assert_eq!(v.to_u32().unwrap(), 42);
128    /// ```
129    #[must_use]
130    pub const fn new() -> Self {
131        Self {
132            format: Format::Binary,
133            recursion_limit: limits::RECURSION_LIMIT,
134            length_limit: limits::LENGTH_LIMIT,
135            oom_mitigation: limits::OOM_MITIGATION,
136        }
137    }
138
139    /// Select the input format: [`Binary`](Format::Binary),
140    /// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic).
141    ///
142    /// Default: [`Format::Binary`].
143    ///
144    /// ```
145    /// use cbor_core::{DecodeOptions, Format};
146    ///
147    /// let hex = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
148    /// let bin = DecodeOptions::new().decode([0x18, 0x2a]).unwrap();
149    /// assert_eq!(hex, bin);
150    ///
151    /// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
152    /// assert_eq!(v.to_u32().unwrap(), 42);
153    /// ```
154    pub const fn format(mut self, format: Format) -> Self {
155        self.format = format;
156        self
157    }
158
159    /// Set the maximum nesting depth of arrays, maps, and tags.
160    ///
161    /// Default: 200. Input that exceeds the limit returns
162    /// [`Error::NestingTooDeep`].
163    ///
164    /// ```
165    /// use cbor_core::{DecodeOptions, Error};
166    ///
167    /// // Two nested one-element arrays: 0x81 0x81 0x00
168    /// let err = DecodeOptions::new()
169    ///     .recursion_limit(1)
170    ///     .decode([0x81, 0x81, 0x00])
171    ///     .unwrap_err();
172    /// assert_eq!(err, Error::NestingTooDeep);
173    /// ```
174    pub const fn recursion_limit(mut self, limit: u16) -> Self {
175        self.recursion_limit = limit;
176        self
177    }
178
179    /// Set the maximum declared length for byte strings, text strings,
180    /// arrays, and maps.
181    ///
182    /// Default: 1,000,000,000. Checked against the length field in the
183    /// CBOR head before any bytes are consumed; an oversized declaration
184    /// returns [`Error::LengthTooLarge`].
185    ///
186    /// ```
187    /// use cbor_core::{DecodeOptions, Error};
188    ///
189    /// // A five-byte text string: 0x65 'h' 'e' 'l' 'l' 'o'
190    /// let err = DecodeOptions::new()
191    ///     .length_limit(4)
192    ///     .decode(b"\x65hello")
193    ///     .unwrap_err();
194    /// assert_eq!(err, Error::LengthTooLarge);
195    /// ```
196    pub const fn length_limit(mut self, limit: u64) -> Self {
197        self.length_limit = limit;
198        self
199    }
200
201    /// Set the byte budget for speculative pre-allocation of array
202    /// backing storage.
203    ///
204    /// Default: 100,000,000. Lower values trade a small amount of
205    /// decoding throughput for stronger resistance to memory-amplification
206    /// attacks. Valid input decodes regardless; only the up-front
207    /// reservation is bounded.
208    ///
209    /// ```
210    /// use cbor_core::DecodeOptions;
211    ///
212    /// // A two-element array: 0x82 0x01 0x02
213    /// let v = DecodeOptions::new()
214    ///     .oom_mitigation(0)
215    ///     .decode([0x82, 0x01, 0x02])
216    ///     .unwrap();
217    /// assert_eq!(v.len(), Some(2));
218    /// ```
219    pub const fn oom_mitigation(mut self, bytes: usize) -> Self {
220        self.oom_mitigation = bytes;
221        self
222    }
223
224    /// Decode exactly one CBOR data item from an in-memory buffer.
225    ///
226    /// Accepts any `AsRef<[u8]>`: `&[u8]`, `Vec<u8>`, `&str`, `String`,
227    /// and so on. The input must contain **exactly one** value: any
228    /// bytes remaining after a successful decode cause
229    /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
230    /// trailing whitespace and comments are accepted, but nothing
231    /// else. Use [`sequence_decoder`](Self::sequence_decoder) when the input is a CBOR
232    /// sequence.
233    ///
234    /// An empty buffer (and, for diagnostic notation, one containing
235    /// only whitespace and comments) returns [`Error::UnexpectedEof`].
236    /// A partial value returns [`Error::UnexpectedEof`] too.
237    ///
238    /// ```
239    /// use cbor_core::{DecodeOptions, Format};
240    ///
241    /// let v = DecodeOptions::new().decode([0x18, 42]).unwrap();
242    /// assert_eq!(v.to_u32().unwrap(), 42);
243    ///
244    /// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
245    /// assert_eq!(v.to_u32().unwrap(), 42);
246    ///
247    /// let v = DecodeOptions::new()
248    ///     .format(Format::Diagnostic)
249    ///     .decode("42  / trailing comment is fine /")
250    ///     .unwrap();
251    /// assert_eq!(v.to_u32().unwrap(), 42);
252    /// ```
253    pub fn decode(&self, bytes: impl AsRef<[u8]>) -> Result<Value> {
254        let bytes = bytes.as_ref();
255        match self.format {
256            Format::Binary => {
257                let mut reader = SliceReader(bytes);
258                let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
259                if !reader.0.is_empty() {
260                    return Err(Error::InvalidFormat);
261                }
262                Ok(value)
263            }
264            Format::Hex => {
265                let mut reader = HexSliceReader(bytes);
266                let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
267                if !reader.0.is_empty() {
268                    return Err(Error::InvalidFormat);
269                }
270                Ok(value)
271            }
272            Format::Diagnostic => {
273                let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit);
274                parser.parse_complete()
275            }
276        }
277    }
278
279    /// Read a single CBOR data item from a stream.
280    ///
281    /// Designed to be called repeatedly to pull successive elements of
282    /// a CBOR sequence:
283    ///
284    /// * In [`Format::Binary`] and [`Format::Hex`] the reader is
285    ///   consumed only up to the end of the item; any bytes after
286    ///   remain in the stream.
287    /// * In [`Format::Diagnostic`] trailing whitespace and comments
288    ///   are consumed up to either end of stream or a top-level
289    ///   separator comma (the comma is also consumed). Anything else
290    ///   after the value fails with [`Error::InvalidFormat`].
291    ///
292    /// I/O failures are returned as [`IoError::Io`](crate::IoError::Io);
293    /// malformed or oversized input as [`IoError::Data`](crate::IoError::Data).
294    ///
295    /// ```
296    /// use cbor_core::{DecodeOptions, Format};
297    ///
298    /// let mut bytes: &[u8] = &[0x18, 42];
299    /// let v = DecodeOptions::new().read_from(&mut bytes).unwrap();
300    /// assert_eq!(v.to_u32().unwrap(), 42);
301    ///
302    /// let mut hex: &[u8] = b"182a";
303    /// let v = DecodeOptions::new().format(Format::Hex).read_from(&mut hex).unwrap();
304    /// assert_eq!(v.to_u32().unwrap(), 42);
305    ///
306    /// // Diagnostic: repeated read_from pulls successive sequence items.
307    /// let mut diag: &[u8] = b"1, 2, 3";
308    /// let opts = DecodeOptions::new().format(Format::Diagnostic);
309    /// let a = opts.read_from(&mut diag).unwrap();
310    /// let b = opts.read_from(&mut diag).unwrap();
311    /// let c = opts.read_from(&mut diag).unwrap();
312    /// assert_eq!(a.to_u32().unwrap(), 1);
313    /// assert_eq!(b.to_u32().unwrap(), 2);
314    /// assert_eq!(c.to_u32().unwrap(), 3);
315    /// ```
316    pub fn read_from(&self, reader: impl std::io::Read) -> IoResult<Value> {
317        match self.format {
318            Format::Binary => {
319                let mut reader = reader;
320                self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
321            }
322            Format::Hex => {
323                let mut reader = HexReader(reader);
324                self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
325            }
326            Format::Diagnostic => {
327                let mut parser = Parser::new(reader, self.recursion_limit);
328                parser.parse_stream_item()
329            }
330        }
331    }
332
333    /// Create an iterator over a CBOR sequence stored in memory.
334    ///
335    /// The returned [`SequenceDecoder`] yields each successive item of the
336    /// sequence as `Result<Value>`. The iterator captures a snapshot
337    /// of these options; subsequent changes to `self` do not affect
338    /// it.
339    ///
340    /// ```
341    /// use cbor_core::{DecodeOptions, Format};
342    ///
343    /// let opts = DecodeOptions::new().format(Format::Diagnostic);
344    ///
345    /// let items: Vec<_> = opts
346    ///     .sequence_decoder(b"1, 2, 3,")
347    ///     .collect::<Result<_, _>>()
348    ///     .unwrap();
349    /// assert_eq!(items.len(), 3);
350    /// ```
351    pub fn sequence_decoder<'a, B: AsRef<[u8]> + ?Sized>(&self, input: &'a B) -> SequenceDecoder<'a> {
352        SequenceDecoder::with_options(self.clone(), input.as_ref())
353    }
354
355    /// Create an iterator over a CBOR sequence read from a stream.
356    ///
357    /// The returned [`SequenceReader`] yields each successive item as
358    /// `IoResult<Value>`. `None` indicates a clean end between items;
359    /// a truncated item produces `Some(Err(_))`.
360    ///
361    /// ```
362    /// use cbor_core::DecodeOptions;
363    ///
364    /// // Binary CBOR sequence: three one-byte items 0x01 0x02 0x03.
365    /// let bytes: &[u8] = &[0x01, 0x02, 0x03];
366    /// let items: Vec<_> = DecodeOptions::new()
367    ///     .sequence_reader(bytes)
368    ///     .collect::<Result<_, _>>()
369    ///     .unwrap();
370    /// assert_eq!(items.len(), 3);
371    /// ```
372    pub fn sequence_reader<R: std::io::Read>(&self, reader: R) -> SequenceReader<R> {
373        SequenceReader::with_options(self.clone(), reader)
374    }
375
376    /// Decode exactly one CBOR data item from an arbitrary reader.
377    /// Used by the sequence iterators to share the core decoding logic.
378    pub(crate) fn decode_one<R>(&self, reader: &mut R) -> std::result::Result<Value, R::Error>
379    where
380        R: MyReader,
381        R::Error: From<Error>,
382    {
383        self.do_read(reader, self.recursion_limit, self.oom_mitigation)
384    }
385
386    /// Expose the parser's recursion limit for sequence iterators.
387    pub(crate) fn recursion_limit_value(&self) -> u16 {
388        self.recursion_limit
389    }
390
391    /// Expose the selected format for sequence iterators.
392    pub(crate) fn format_value(&self) -> Format {
393        self.format
394    }
395
396    fn do_read<R>(
397        &self,
398        reader: &mut R,
399        recursion_limit: u16,
400        oom_mitigation: usize,
401    ) -> std::result::Result<Value, R::Error>
402    where
403        R: MyReader,
404        R::Error: From<Error>,
405    {
406        let head = Head::read_from(reader)?;
407
408        let is_float = head.initial_byte.major() == Major::SimpleOrFloat
409            && matches!(head.argument, Argument::U16(_) | Argument::U32(_) | Argument::U64(_));
410
411        if !is_float && !head.argument.is_deterministic() {
412            return Err(Error::NonDeterministic.into());
413        }
414
415        let this = match head.initial_byte.major() {
416            Major::Unsigned => Value::Unsigned(head.value()),
417            Major::Negative => Value::Negative(head.value()),
418
419            Major::ByteString => {
420                let len = head.value();
421                if len > self.length_limit {
422                    return Err(Error::LengthTooLarge.into());
423                }
424                Value::ByteString(reader.read_vec(len, oom_mitigation)?)
425            }
426
427            Major::TextString => {
428                let len = head.value();
429                if len > self.length_limit {
430                    return Err(Error::LengthTooLarge.into());
431                }
432                let bytes = reader.read_vec(len, oom_mitigation)?;
433                let string = String::from_utf8(bytes).map_err(Error::from)?;
434                Value::TextString(string)
435            }
436
437            Major::Array => {
438                let value = head.value();
439
440                if value > self.length_limit {
441                    return Err(Error::LengthTooLarge.into());
442                }
443
444                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
445                    return Err(Error::NestingTooDeep.into());
446                };
447
448                let request: usize = value.try_into().or(Err(Error::LengthTooLarge))?;
449                let granted = request.min(oom_mitigation / size_of::<Value>());
450                let oom_mitigation = oom_mitigation - granted * size_of::<Value>();
451
452                let mut vec = Vec::with_capacity(granted);
453
454                for _ in 0..value {
455                    vec.push(self.do_read(reader, recursion_limit, oom_mitigation)?);
456                }
457
458                Value::Array(vec)
459            }
460
461            Major::Map => {
462                let value = head.value();
463
464                if value > self.length_limit {
465                    return Err(Error::LengthTooLarge.into());
466                }
467
468                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
469                    return Err(Error::NestingTooDeep.into());
470                };
471
472                let mut map = BTreeMap::new();
473                let mut prev = None;
474
475                for _ in 0..value {
476                    let key = self.do_read(reader, recursion_limit, oom_mitigation)?;
477                    let value = self.do_read(reader, recursion_limit, oom_mitigation)?;
478
479                    if let Some((prev_key, prev_value)) = prev.take() {
480                        if prev_key >= key {
481                            return Err(Error::NonDeterministic.into());
482                        }
483                        map.insert(prev_key, prev_value);
484                    }
485
486                    prev = Some((key, value));
487                }
488
489                if let Some((key, value)) = prev.take() {
490                    map.insert(key, value);
491                }
492
493                Value::Map(map)
494            }
495
496            Major::Tag => {
497                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
498                    return Err(Error::NestingTooDeep.into());
499                };
500
501                let tag_number = head.value();
502                let tag_content = Box::new(self.do_read(reader, recursion_limit, oom_mitigation)?);
503
504                let this = Value::Tag(tag_number, tag_content);
505
506                if this.data_type() == DataType::BigInt {
507                    let bytes = this.as_bytes().unwrap();
508                    let valid = bytes.len() >= 8 && bytes[0] != 0;
509                    if !valid {
510                        return Err(Error::NonDeterministic.into());
511                    }
512                }
513
514                this
515            }
516
517            Major::SimpleOrFloat => match head.argument {
518                Argument::None => Value::SimpleValue(SimpleValue(head.initial_byte.info())),
519                Argument::U8(n) if n >= 32 => Value::SimpleValue(SimpleValue(n)),
520
521                Argument::U16(bits) => Value::Float(Float::from_bits_u16(bits)),
522                Argument::U32(bits) => Value::Float(Float::from_bits_u32(bits)?),
523                Argument::U64(bits) => Value::Float(Float::from_bits_u64(bits)?),
524
525                _ => return Err(Error::Malformed.into()),
526            },
527        };
528
529        Ok(this)
530    }
531}