cbor_core/
decode_options.rs

1use std::{borrow::Cow, collections::BTreeMap};
2
3use crate::{
4    Error, Float, Format, IoResult, Result, SequenceDecoder, SequenceReader, SimpleValue, Strictness, Value,
5    codec::{Argument, Head, HeadOrStop, Major},
6    io::{HexReader, HexSliceReader, MyReader, SliceReader},
7    limits,
8    parse::Parser,
9    tag::{NEG_BIG_INT, POS_BIG_INT},
10    util::{trim_leading_zeros, u64_from_slice},
11};
12
13/// Configuration for CBOR decoding.
14///
15/// `DecodeOptions` controls the input format ([`Binary`](Format::Binary),
16/// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic)) and the
17/// limits the decoder enforces against hostile or malformed input.
18/// Construct it with [`DecodeOptions::new`] (or `Default`), adjust
19/// settings with the builder methods, and call [`decode`](Self::decode)
20/// or [`read_from`](Self::read_from) for a single item, or
21/// [`sequence_decoder`](Self::sequence_decoder) / [`sequence_reader`](Self::sequence_reader)
22/// for a CBOR sequence.
23///
24/// The convenience methods on [`Value`] ([`decode`](Value::decode),
25/// [`decode_hex`](Value::decode_hex), [`read_from`](Value::read_from),
26/// [`read_hex_from`](Value::read_hex_from)) all forward to a default
27/// `DecodeOptions`. Use this type directly when you need to decode
28/// diagnostic notation, iterate a sequence, relax a limit for a known
29/// input, or tighten one for untrusted input.
30///
31/// # Options
32///
33/// | Option | Default | Purpose |
34/// |---|---|---|
35/// | [`format`](Self::format) | [`Binary`](Format::Binary) | Input syntax: binary, hex text, or diagnostic notation. |
36/// | [`recursion_limit`](Self::recursion_limit) | 200 | Maximum nesting depth of arrays, maps, and tags. |
37/// | [`length_limit`](Self::length_limit) | 1,000,000,000 | Maximum declared element count of a single array, map, byte string, or text string. |
38/// | [`oom_mitigation`](Self::oom_mitigation) | 100,000,000 | Byte budget for speculative pre-allocation. |
39/// | [`strictness`](Self::strictness) | [`Strictness::STRICT`] | Which non-deterministic encodings the decoder accepts and normalizes. |
40///
41/// ## `recursion_limit`
42///
43/// Each array, map, or tag consumes one unit of recursion budget for
44/// its contents. Exceeding the limit returns [`Error::NestingTooDeep`].
45/// The limit protects against stack overflow on adversarial input and
46/// should be well below the stack a thread has available.
47///
48/// ## `length_limit`
49///
50/// Applies to the length field in the CBOR head of arrays, maps, byte
51/// strings, and text strings. It caps the declared size before any
52/// bytes are read, so a malicious header claiming a petabyte-long
53/// string is rejected immediately with [`Error::LengthTooLarge`]. The
54/// limit does not restrict total input size; a valid document may
55/// contain many items each up to the limit.
56///
57/// ## `oom_mitigation`
58///
59/// CBOR encodes lengths in the head, so a decoder is tempted to
60/// pre-allocate a `Vec` of the declared capacity. On hostile input
61/// that is a trivial amplification attack: a few bytes on the wire
62/// reserve gigabytes of memory. `oom_mitigation` is a byte budget,
63/// shared across the current decode, that caps the total amount of
64/// speculative capacity the decoder may reserve for array backing
65/// storage. Once the budget is exhausted, further arrays start empty
66/// and grow on demand. Decoding still succeeds if the input is
67/// well-formed; only the up-front reservation is bounded.
68///
69/// The budget is consumed, not refilled: a deeply nested structure
70/// with many small arrays can drain it early and decode the tail with
71/// zero pre-allocation. That is the intended behavior.
72///
73/// # Examples
74///
75/// Decode binary CBOR with default limits:
76///
77/// ```
78/// use cbor_core::DecodeOptions;
79///
80/// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
81/// assert_eq!(v.to_u32().unwrap(), 42);
82/// ```
83///
84/// Switch the input format to hex text or diagnostic notation:
85///
86/// ```
87/// use cbor_core::{DecodeOptions, Format};
88///
89/// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
90/// assert_eq!(v.to_u32().unwrap(), 42);
91///
92/// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
93/// assert_eq!(v.to_u32().unwrap(), 42);
94/// ```
95///
96/// Tighten limits for input from an untrusted source:
97///
98/// ```
99/// use cbor_core::DecodeOptions;
100///
101/// let strict = DecodeOptions::new()
102///     .recursion_limit(16)
103///     .length_limit(4096)
104///     .oom_mitigation(64 * 1024);
105///
106/// assert!(strict.decode(&[0x18, 42]).is_ok());
107/// ```
108#[derive(Debug, Clone)]
109pub struct DecodeOptions {
110    pub(crate) format: Format,
111    pub(crate) recursion_limit: u16,
112    pub(crate) length_limit: u64,
113    pub(crate) oom_mitigation: usize,
114    pub(crate) strictness: Strictness,
115}
116
117impl Default for DecodeOptions {
118    fn default() -> Self {
119        Self::new()
120    }
121}
122
123impl DecodeOptions {
124    /// Create a new set of options with the crate defaults.
125    ///
126    /// ```
127    /// use cbor_core::DecodeOptions;
128    ///
129    /// let opts = DecodeOptions::new();
130    /// let v = opts.decode(&[0x18, 42]).unwrap();
131    /// assert_eq!(v.to_u32().unwrap(), 42);
132    /// ```
133    #[must_use]
134    pub const fn new() -> Self {
135        Self {
136            format: Format::Binary,
137            recursion_limit: limits::RECURSION_LIMIT,
138            length_limit: limits::LENGTH_LIMIT,
139            oom_mitigation: limits::OOM_MITIGATION,
140            strictness: Strictness::STRICT,
141        }
142    }
143
144    /// Select the input format: [`Binary`](Format::Binary),
145    /// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic).
146    ///
147    /// Default: [`Format::Binary`].
148    ///
149    /// ```
150    /// use cbor_core::{DecodeOptions, Format};
151    ///
152    /// let hex = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
153    /// let bin = DecodeOptions::new().decode(&[0x18, 0x2a]).unwrap();
154    /// assert_eq!(hex, bin);
155    ///
156    /// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
157    /// assert_eq!(v.to_u32().unwrap(), 42);
158    /// ```
159    #[must_use]
160    pub const fn format(mut self, format: Format) -> Self {
161        self.format = format;
162        self
163    }
164
165    /// Set the maximum nesting depth of arrays, maps, and tags.
166    ///
167    /// Default: 200. Input that exceeds the limit returns
168    /// [`Error::NestingTooDeep`].
169    ///
170    /// ```
171    /// use cbor_core::{DecodeOptions, Error};
172    ///
173    /// // Two nested one-element arrays: 0x81 0x81 0x00
174    /// let err = DecodeOptions::new()
175    ///     .recursion_limit(1)
176    ///     .decode(&[0x81, 0x81, 0x00])
177    ///     .unwrap_err();
178    /// assert_eq!(err, Error::NestingTooDeep);
179    /// ```
180    #[must_use]
181    pub const fn recursion_limit(mut self, limit: u16) -> Self {
182        self.recursion_limit = limit;
183        self
184    }
185
186    /// Set the maximum declared length for byte strings, text strings,
187    /// arrays, and maps.
188    ///
189    /// Default: 1,000,000,000. Checked against the length field in the
190    /// CBOR head before any bytes are consumed; an oversized declaration
191    /// returns [`Error::LengthTooLarge`].
192    ///
193    /// ```
194    /// use cbor_core::{DecodeOptions, Error};
195    ///
196    /// // A five-byte text string: 0x65 'h' 'e' 'l' 'l' 'o'
197    /// let err = DecodeOptions::new()
198    ///     .length_limit(4)
199    ///     .decode(b"\x65hello")
200    ///     .unwrap_err();
201    /// assert_eq!(err, Error::LengthTooLarge);
202    /// ```
203    #[must_use]
204    pub const fn length_limit(mut self, limit: u64) -> Self {
205        self.length_limit = limit;
206        self
207    }
208
209    /// Set the byte budget for speculative pre-allocation of array
210    /// backing storage.
211    ///
212    /// Default: 100,000,000. Lower values trade a small amount of
213    /// decoding throughput for stronger resistance to memory-amplification
214    /// attacks. Valid input decodes regardless; only the up-front
215    /// reservation is bounded.
216    ///
217    /// ```
218    /// use cbor_core::DecodeOptions;
219    ///
220    /// // A two-element array: 0x82 0x01 0x02
221    /// let v = DecodeOptions::new()
222    ///     .oom_mitigation(0)
223    ///     .decode(&[0x82, 0x01, 0x02])
224    ///     .unwrap();
225    /// assert_eq!(v.len(), Some(2));
226    /// ```
227    #[must_use]
228    pub const fn oom_mitigation(mut self, bytes: usize) -> Self {
229        self.oom_mitigation = bytes;
230        self
231    }
232
233    /// Configure which non-deterministic encodings the decoder will
234    /// accept. Default: [`Strictness::STRICT`], which rejects every
235    /// deviation with [`Error::NonDeterministic`].
236    ///
237    /// Pass [`Strictness::LENIENT`] to accept all known deviations, or
238    /// build a custom mix of `allow_*` fields. Tolerated input is
239    /// normalized while decoding, so the resulting [`Value`] is
240    /// canonical and re-encoding it produces CBOR::Core compliant
241    /// bytes.
242    ///
243    /// ```
244    /// use cbor_core::{DecodeOptions, Strictness, Value};
245    ///
246    /// // 255 wrongly encoded with a two byte argument; normalized on read.
247    /// let v = DecodeOptions::new()
248    ///     .strictness(Strictness::LENIENT)
249    ///     .decode(&[0x19, 0x00, 0xff])
250    ///     .unwrap();
251    /// assert_eq!(v, Value::from(255));
252    /// assert_eq!(v.encode(), vec![0x18, 0xff]);
253    /// ```
254    #[must_use]
255    pub const fn strictness(mut self, strictness: Strictness) -> Self {
256        self.strictness = strictness;
257        self
258    }
259
260    /// Decode exactly one CBOR data item from an in-memory buffer.
261    ///
262    /// Takes the input by reference: `&[u8]`, `&[u8; N]`, `&Vec<u8>`,
263    /// `&str`, `&String`, etc. all work via `T: AsRef<[u8]> + ?Sized`.
264    /// In [`Format::Binary`], decoded text and byte strings borrow
265    /// directly from the input slice and the returned [`Value`]
266    /// inherits that lifetime; in [`Format::Hex`] and
267    /// [`Format::Diagnostic`] the result is owned.
268    ///
269    /// The input must contain **exactly one** value: any bytes
270    /// remaining after a successful decode cause
271    /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
272    /// trailing whitespace and comments are accepted, but nothing
273    /// else. Use [`sequence_decoder`](Self::sequence_decoder) when the input is a CBOR
274    /// sequence.
275    ///
276    /// An empty buffer (and, for diagnostic notation, one containing
277    /// only whitespace and comments) returns [`Error::UnexpectedEof`].
278    /// A partial value returns [`Error::UnexpectedEof`] too.
279    ///
280    /// ```
281    /// use cbor_core::{DecodeOptions, Format};
282    ///
283    /// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
284    /// assert_eq!(v.to_u32().unwrap(), 42);
285    ///
286    /// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
287    /// assert_eq!(v.to_u32().unwrap(), 42);
288    ///
289    /// let v = DecodeOptions::new()
290    ///     .format(Format::Diagnostic)
291    ///     .decode("42  / trailing comment is fine /")
292    ///     .unwrap();
293    /// assert_eq!(v.to_u32().unwrap(), 42);
294    /// ```
295    pub fn decode<'a, T>(&self, bytes: &'a T) -> Result<Value<'a>>
296    where
297        T: AsRef<[u8]> + ?Sized,
298    {
299        let bytes = bytes.as_ref();
300        match self.format {
301            Format::Binary => {
302                let mut reader = SliceReader(bytes);
303                let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
304                if !reader.0.is_empty() {
305                    return Err(Error::InvalidFormat);
306                }
307                Ok(value)
308            }
309            Format::Hex => {
310                let mut reader = HexSliceReader(bytes);
311                let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
312                if !reader.0.is_empty() {
313                    return Err(Error::InvalidFormat);
314                }
315                Ok(value)
316            }
317            Format::Diagnostic => {
318                let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
319                parser.parse_complete()
320            }
321        }
322    }
323
324    /// Decode exactly one CBOR data item into an owned [`Value`].
325    ///
326    /// Takes the input by value: `Vec<u8>`, `&[u8]`, `&str`, and
327    /// anything else that implements `AsRef<[u8]>` all work. Unlike
328    /// [`decode`](Self::decode), the result never borrows from the
329    /// input regardless of format: text and byte strings are always
330    /// copied into owned allocations. The returned value can be held
331    /// as `Value<'static>` and stored or sent across threads without
332    /// any lifetime constraint.
333    ///
334    /// Use this when the input is short-lived (a temporary buffer, a
335    /// `Vec` returned from a function, etc.) and the decoded value
336    /// needs to outlive it. When the input already lives long enough,
337    /// [`decode`](Self::decode) avoids the copies.
338    ///
339    /// The input must contain **exactly one** value: any bytes
340    /// remaining after a successful decode cause
341    /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
342    /// trailing whitespace and comments are accepted, but nothing
343    /// else. Use [`sequence_decoder`](Self::sequence_decoder) when
344    /// the input is a CBOR sequence.
345    ///
346    /// An empty buffer (and, for diagnostic notation, one containing
347    /// only whitespace and comments) returns [`Error::UnexpectedEof`].
348    /// A partial value returns [`Error::UnexpectedEof`] too.
349    ///
350    /// ```
351    /// use cbor_core::{DecodeOptions, Format, Value};
352    ///
353    /// // Decode from a short-lived Vec without worrying about lifetimes.
354    /// let bytes: Vec<u8> = vec![0x18, 42];
355    /// let v: Value<'static> = DecodeOptions::new().decode_owned(bytes).unwrap();
356    /// assert_eq!(v.to_u32().unwrap(), 42);
357    ///
358    /// // Hex and diagnostic formats work the same way.
359    /// let v: Value<'static> = DecodeOptions::new()
360    ///     .format(Format::Hex)
361    ///     .decode_owned("182a")
362    ///     .unwrap();
363    /// assert_eq!(v.to_u32().unwrap(), 42);
364    /// ```
365    pub fn decode_owned<'a>(&self, bytes: impl AsRef<[u8]>) -> Result<Value<'a>> {
366        let mut bytes = bytes.as_ref();
367
368        match self.format {
369            Format::Binary | Format::Hex => {
370                let value = self.read_from(&mut bytes).map_err(|err| match err {
371                    crate::IoError::Io(_io_error) => unreachable!(),
372                    crate::IoError::Data(error) => error,
373                })?;
374
375                if bytes.is_empty() {
376                    Ok(value)
377                } else {
378                    Err(Error::InvalidFormat)
379                }
380            }
381
382            Format::Diagnostic => {
383                let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
384                parser.parse_complete()
385            }
386        }
387    }
388
389    /// Read a single CBOR data item from a stream.
390    ///
391    /// Designed to be called repeatedly to pull successive elements of
392    /// a CBOR sequence:
393    ///
394    /// * In [`Format::Binary`] and [`Format::Hex`] the reader is
395    ///   consumed only up to the end of the item; any bytes after
396    ///   remain in the stream.
397    /// * In [`Format::Diagnostic`] trailing whitespace and comments
398    ///   are consumed up to either end of stream or a top-level
399    ///   separator comma (the comma is also consumed). Anything else
400    ///   after the value fails with [`Error::InvalidFormat`].
401    ///
402    /// Bytes are read into an internal buffer, so the result is
403    /// always owned and can be held as `Value<'static>`. For
404    /// zero-copy decoding from a byte slice, use
405    /// [`decode`](Self::decode) instead.
406    ///
407    /// I/O failures are returned as [`IoError::Io`](crate::IoError::Io);
408    /// malformed or oversized input as [`IoError::Data`](crate::IoError::Data).
409    ///
410    /// ```
411    /// use cbor_core::{DecodeOptions, Format};
412    ///
413    /// let mut bytes: &[u8] = &[0x18, 42];
414    /// let v = DecodeOptions::new().read_from(&mut bytes).unwrap();
415    /// assert_eq!(v.to_u32().unwrap(), 42);
416    ///
417    /// let mut hex: &[u8] = b"182a";
418    /// let v = DecodeOptions::new().format(Format::Hex).read_from(&mut hex).unwrap();
419    /// assert_eq!(v.to_u32().unwrap(), 42);
420    ///
421    /// // Diagnostic: repeated read_from pulls successive sequence items.
422    /// let mut diag: &[u8] = b"1, 2, 3";
423    /// let opts = DecodeOptions::new().format(Format::Diagnostic);
424    /// let a = opts.read_from(&mut diag).unwrap();
425    /// let b = opts.read_from(&mut diag).unwrap();
426    /// let c = opts.read_from(&mut diag).unwrap();
427    /// assert_eq!(a.to_u32().unwrap(), 1);
428    /// assert_eq!(b.to_u32().unwrap(), 2);
429    /// assert_eq!(c.to_u32().unwrap(), 3);
430    /// ```
431    pub fn read_from<'a>(&self, reader: impl std::io::Read) -> IoResult<Value<'a>> {
432        match self.format {
433            Format::Binary => {
434                let mut reader = reader;
435                self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
436            }
437            Format::Hex => {
438                let mut reader = HexReader(reader);
439                self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
440            }
441            Format::Diagnostic => {
442                let mut parser = Parser::new(reader, self.recursion_limit, self.strictness);
443                parser.parse_stream_item()
444            }
445        }
446    }
447
448    /// Create an iterator over a CBOR sequence stored in memory.
449    ///
450    /// The returned [`SequenceDecoder`] yields each successive item of the
451    /// sequence as `Result<Value<'a>>`, where `'a` is the lifetime of
452    /// the input slice. In binary format, items borrow text and byte
453    /// strings from the input; in hex and diagnostic format the items
454    /// are owned. The iterator captures a snapshot of these options;
455    /// subsequent changes to `self` do not affect it.
456    ///
457    /// ```
458    /// use cbor_core::{DecodeOptions, Format};
459    ///
460    /// let opts = DecodeOptions::new().format(Format::Diagnostic);
461    ///
462    /// let items: Vec<_> = opts
463    ///     .sequence_decoder(b"1, 2, 3,")
464    ///     .collect::<Result<_, _>>()
465    ///     .unwrap();
466    /// assert_eq!(items.len(), 3);
467    /// ```
468    pub fn sequence_decoder<'a, T>(&self, input: &'a T) -> SequenceDecoder<'a>
469    where
470        T: AsRef<[u8]> + ?Sized,
471    {
472        SequenceDecoder::with_options(self.clone(), input.as_ref())
473    }
474
475    /// Create an iterator over a CBOR sequence read from a stream.
476    ///
477    /// The returned [`SequenceReader`] yields each successive item as
478    /// `IoResult<Value<'static>>`. `None` indicates a clean end
479    /// between items; a truncated item produces `Some(Err(_))`. Items
480    /// are always owned (the bytes are read into an internal
481    /// buffer); for zero-copy iteration use
482    /// [`sequence_decoder`](Self::sequence_decoder) on a byte slice
483    /// instead.
484    ///
485    /// ```
486    /// use cbor_core::DecodeOptions;
487    ///
488    /// // Binary CBOR sequence: three one-byte items 0x01 0x02 0x03.
489    /// let bytes: &[u8] = &[0x01, 0x02, 0x03];
490    /// let items: Vec<_> = DecodeOptions::new()
491    ///     .sequence_reader(bytes)
492    ///     .collect::<Result<_, _>>()
493    ///     .unwrap();
494    /// assert_eq!(items.len(), 3);
495    /// ```
496    pub fn sequence_reader<R: std::io::Read>(&self, reader: R) -> SequenceReader<R> {
497        SequenceReader::with_options(self.clone(), reader)
498    }
499
500    /// Decode exactly one CBOR data item from an arbitrary reader.
501    /// Used by the sequence iterators to share the core decoding logic.
502    pub(crate) fn decode_one<'a, R>(&self, reader: &mut R) -> std::result::Result<Value<'a>, R::Error>
503    where
504        R: MyReader<'a>,
505        R::Error: From<Error>,
506    {
507        self.do_read(reader, self.recursion_limit, self.oom_mitigation)
508    }
509
510    fn do_read<'a, R>(
511        &self,
512        reader: &mut R,
513        recursion_limit: u16,
514        oom_mitigation: usize,
515    ) -> std::result::Result<Value<'a>, R::Error>
516    where
517        R: MyReader<'a>,
518        R::Error: From<Error>,
519    {
520        match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
521            Some(value) => Ok(value),
522            // A break code where a value was expected (top level, array
523            // item position, map key position, tag content) is malformed.
524            None => Err(Error::Malformed.into()),
525        }
526    }
527
528    /// Read the next item, returning `Ok(None)` when a break code stops
529    /// the input. Used by indefinite-length container loops, which need
530    /// to terminate on the break.
531    fn read_value_or_break<'a, R>(
532        &self,
533        reader: &mut R,
534        recursion_limit: u16,
535        oom_mitigation: usize,
536    ) -> std::result::Result<Option<Value<'a>>, R::Error>
537    where
538        R: MyReader<'a>,
539        R::Error: From<Error>,
540    {
541        match HeadOrStop::read_from(reader)? {
542            HeadOrStop::Definite(head) => self
543                .process_head(head, reader, recursion_limit, oom_mitigation)
544                .map(Some),
545
546            HeadOrStop::Indefinite(major) => {
547                if self.strictness.allow_indefinite_length {
548                    self.process_indefinite(major, reader, recursion_limit, oom_mitigation)
549                        .map(Some)
550                } else {
551                    Err(Error::NonDeterministic.into())
552                }
553            }
554
555            HeadOrStop::Break => Ok(None),
556        }
557    }
558
559    fn process_head<'a, R>(
560        &self,
561        head: Head,
562        reader: &mut R,
563        recursion_limit: u16,
564        oom_mitigation: usize,
565    ) -> std::result::Result<Value<'a>, R::Error>
566    where
567        R: MyReader<'a>,
568        R::Error: From<Error>,
569    {
570        let is_float = head.initial_byte.major() == Major::SimpleOrFloat
571            && matches!(head.argument, Argument::U16(_) | Argument::U32(_) | Argument::U64(_));
572
573        if !is_float && !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
574            return Err(Error::NonDeterministic.into());
575        }
576
577        let this = match head.initial_byte.major() {
578            Major::Unsigned => Value::Unsigned(head.value()),
579            Major::Negative => Value::Negative(head.value()),
580
581            Major::ByteString => {
582                let len = head.value();
583                if len > self.length_limit {
584                    return Err(Error::LengthTooLarge.into());
585                }
586                Value::ByteString(reader.read_cow(len, oom_mitigation)?)
587            }
588
589            Major::TextString => {
590                let len = head.value();
591                if len > self.length_limit {
592                    return Err(Error::LengthTooLarge.into());
593                }
594                let text = match reader.read_cow(len, oom_mitigation)? {
595                    Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes).map_err(Error::from)?),
596                    Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).map_err(Error::from)?),
597                };
598                Value::TextString(text)
599            }
600
601            Major::Array => {
602                let value = head.value();
603
604                if value > self.length_limit {
605                    return Err(Error::LengthTooLarge.into());
606                }
607
608                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
609                    return Err(Error::NestingTooDeep.into());
610                };
611
612                let request: usize = value.try_into().or(Err(Error::LengthTooLarge))?;
613                let granted = request.min(oom_mitigation / size_of::<Value>());
614                let oom_mitigation = oom_mitigation - granted * size_of::<Value>();
615
616                let mut vec = Vec::with_capacity(granted);
617
618                for _ in 0..value {
619                    vec.push(self.do_read(reader, recursion_limit, oom_mitigation)?);
620                }
621
622                Value::Array(vec)
623            }
624
625            Major::Map => {
626                let value = head.value();
627
628                if value > self.length_limit {
629                    return Err(Error::LengthTooLarge.into());
630                }
631
632                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
633                    return Err(Error::NestingTooDeep.into());
634                };
635
636                let mut map = BTreeMap::new();
637                for _ in 0..value {
638                    let key = self.do_read(reader, recursion_limit, oom_mitigation)?;
639                    let val = self.do_read(reader, recursion_limit, oom_mitigation)?;
640                    self.map_insert(&mut map, key, val)?;
641                }
642
643                Value::Map(map)
644            }
645
646            Major::Tag => {
647                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
648                    return Err(Error::NestingTooDeep.into());
649                };
650
651                let tag_number = head.value();
652                let tag_content = self.do_read(reader, recursion_limit, oom_mitigation)?;
653
654                // Big integer canonicalization (tag 2 / tag 3): the
655                // payload must be a byte string longer than 8 bytes
656                // (otherwise the value fits in u64) with no leading
657                // zero byte.
658                match tag_content {
659                    Value::ByteString(bytes) if matches!(tag_number, POS_BIG_INT | NEG_BIG_INT) => {
660                        let canonical = bytes.len() > 8 && bytes[0] != 0;
661                        if canonical {
662                            Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
663                        } else if self.strictness.allow_oversized_bigints {
664                            normalize_bigint(tag_number, bytes)
665                        } else {
666                            return Err(Error::NonDeterministic.into());
667                        }
668                    }
669                    other => Value::Tag(tag_number, Box::new(other)),
670                }
671            }
672
673            Major::SimpleOrFloat => match head.argument {
674                Argument::None => Value::SimpleValue(SimpleValue(head.initial_byte.info())),
675                Argument::U8(n) if n >= 32 => Value::SimpleValue(SimpleValue(n)),
676
677                Argument::U16(bits) => Value::Float(Float::from_bits_u16(bits)),
678                Argument::U32(bits) => self.checked_float(Float::from_bits_u32(bits))?,
679                Argument::U64(bits) => self.checked_float(Float::from_bits_u64(bits))?,
680
681                _ => return Err(Error::Malformed.into()),
682            },
683        };
684
685        Ok(this)
686    }
687
688    fn checked_float<'a>(&self, float: Float) -> Result<Value<'a>> {
689        if float.is_deterministic() {
690            Ok(Value::Float(float))
691        } else if self.strictness.allow_non_shortest_floats {
692            Ok(Value::Float(float.shortest()))
693        } else {
694            Err(Error::NonDeterministic)
695        }
696    }
697
698    /// Insert a key/value pair into a map under the active determinism
699    /// policy. Used by both definite and indefinite-length map decoders.
700    fn map_insert<'a>(&self, map: &mut BTreeMap<Value<'a>, Value<'a>>, key: Value<'a>, val: Value<'a>) -> Result<()> {
701        if !self.strictness.allow_unsorted_map_keys
702            && let Some(last) = map.last_entry()
703            && *last.key() >= key
704        {
705            Err(Error::NonDeterministic)
706        } else if map.insert(key, val).is_some() && !self.strictness.allow_duplicate_map_keys {
707            Err(Error::NonDeterministic)
708        } else {
709            Ok(())
710        }
711    }
712
713    /// Decode an indefinite-length container of the given major type.
714    /// The break code that terminates the container is consumed.
715    fn process_indefinite<'a, R>(
716        &self,
717        major: Major,
718        reader: &mut R,
719        recursion_limit: u16,
720        oom_mitigation: usize,
721    ) -> std::result::Result<Value<'a>, R::Error>
722    where
723        R: MyReader<'a>,
724        R::Error: From<Error>,
725    {
726        match major {
727            Major::ByteString => self.read_indefinite_bytes(reader, oom_mitigation),
728            Major::TextString => self.read_indefinite_text(reader, oom_mitigation),
729            Major::Array => self.read_indefinite_array(reader, recursion_limit, oom_mitigation),
730            Major::Map => self.read_indefinite_map(reader, recursion_limit, oom_mitigation),
731            _ => unreachable!("process_indefinite: invalid major"),
732        }
733    }
734
735    /// Read a `(_ chunk*)` byte string. Each chunk is itself a
736    /// definite-length byte string; an indefinite-length chunk or a
737    /// chunk of a different major type is malformed even in lenient
738    /// mode.
739    fn read_indefinite_bytes<'a, R>(
740        &self,
741        reader: &mut R,
742        oom_mitigation: usize,
743    ) -> std::result::Result<Value<'a>, R::Error>
744    where
745        R: MyReader<'a>,
746        R::Error: From<Error>,
747    {
748        let mut buf = Vec::new();
749        let mut total: u64 = 0;
750
751        loop {
752            match HeadOrStop::read_from(reader)? {
753                HeadOrStop::Break => break,
754
755                HeadOrStop::Definite(head) if head.initial_byte.major() == Major::ByteString => {
756                    if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
757                        return Err(Error::NonDeterministic.into());
758                    }
759
760                    let chunk_len = head.value();
761
762                    total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
763                    if total > self.length_limit {
764                        return Err(Error::LengthTooLarge.into());
765                    }
766
767                    let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
768                    buf.extend_from_slice(&chunk);
769                }
770
771                _ => return Err(Error::Malformed.into()),
772            }
773        }
774
775        Ok(Value::ByteString(Cow::Owned(buf)))
776    }
777
778    /// Read a `(_ chunk*)` text string. Each chunk is independently
779    /// validated as UTF-8 (per RFC 8949 §3.2.2).
780    fn read_indefinite_text<'a, R>(
781        &self,
782        reader: &mut R,
783        oom_mitigation: usize,
784    ) -> std::result::Result<Value<'a>, R::Error>
785    where
786        R: MyReader<'a>,
787        R::Error: From<Error>,
788    {
789        let mut buf = String::new();
790        let mut total: u64 = 0;
791
792        loop {
793            match HeadOrStop::read_from(reader)? {
794                HeadOrStop::Break => break,
795
796                HeadOrStop::Definite(head) if head.initial_byte.major() == Major::TextString => {
797                    if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
798                        return Err(Error::NonDeterministic.into());
799                    }
800
801                    let chunk_len = head.value();
802
803                    total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
804                    if total > self.length_limit {
805                        return Err(Error::LengthTooLarge.into());
806                    }
807
808                    let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
809                    buf.push_str(std::str::from_utf8(&chunk).map_err(Error::from)?);
810                }
811
812                _ => return Err(Error::Malformed.into()),
813            }
814        }
815
816        Ok(Value::TextString(Cow::Owned(buf)))
817    }
818
819    fn read_indefinite_array<'a, R>(
820        &self,
821        reader: &mut R,
822        recursion_limit: u16,
823        oom_mitigation: usize,
824    ) -> std::result::Result<Value<'a>, R::Error>
825    where
826        R: MyReader<'a>,
827        R::Error: From<Error>,
828    {
829        let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
830            return Err(Error::NestingTooDeep.into());
831        };
832
833        let mut vec = Vec::new();
834
835        for _ in 0..self.length_limit {
836            match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
837                Some(item) => vec.push(item),
838                None => return Ok(Value::Array(vec)),
839            }
840        }
841
842        match HeadOrStop::read_from(reader)? {
843            HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
844            HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
845            HeadOrStop::Break => Ok(Value::Array(vec)),
846        }
847    }
848
849    fn read_indefinite_map<'a, R>(
850        &self,
851        reader: &mut R,
852        recursion_limit: u16,
853        oom_mitigation: usize,
854    ) -> std::result::Result<Value<'a>, R::Error>
855    where
856        R: MyReader<'a>,
857        R::Error: From<Error>,
858    {
859        let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
860            return Err(Error::NestingTooDeep.into());
861        };
862
863        let mut map = BTreeMap::new();
864
865        for _ in 0..self.length_limit {
866            match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
867                Some(key) => {
868                    let value = self.do_read(reader, recursion_limit, oom_mitigation)?;
869                    self.map_insert(&mut map, key, value)?;
870                }
871                None => return Ok(Value::Map(map)),
872            }
873        }
874
875        match HeadOrStop::read_from(reader)? {
876            HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
877            HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
878            HeadOrStop::Break => Ok(Value::Map(map)),
879        }
880    }
881}
882
883/// Normalize a non-canonical big integer payload.
884///
885/// Strips leading zero bytes and downcasts to
886/// [`Value::Unsigned`] / [`Value::Negative`] when the magnitude fits
887/// in a `u64`. Otherwise returns a tag 2 / tag 3 with a stripped
888/// payload, preserving the [`Cow`] borrow when the input was borrowed.
889fn normalize_bigint(tag_number: u64, bytes: Cow<'_, [u8]>) -> Value<'_> {
890    fn integer<'b>(tag_number: u64, n: u64) -> Value<'b> {
891        match tag_number {
892            POS_BIG_INT => Value::Unsigned(n),
893            NEG_BIG_INT => Value::Negative(n),
894            _other => unreachable!("normalize_bigint: invalid tag"),
895        }
896    }
897
898    match bytes {
899        Cow::Borrowed(bytes) => {
900            let trimmed = trim_leading_zeros(bytes);
901
902            if let Ok(n) = u64_from_slice(trimmed) {
903                integer(tag_number, n)
904            } else {
905                let bytes = trimmed.into();
906                Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
907            }
908        }
909        Cow::Owned(bytes) => {
910            let trimmed = trim_leading_zeros(&bytes);
911
912            if let Ok(n) = u64_from_slice(trimmed) {
913                integer(tag_number, n)
914            } else {
915                let bytes = if trimmed.len() == bytes.len() {
916                    bytes.into()
917                } else {
918                    trimmed.to_vec().into()
919                };
920                Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
921            }
922        }
923    }
924}
cbor_core/decode_options.rs

cbor_core/
decode_options.rs