Skip to main content

cbor_core/
decode_options.rs

1use std::{borrow::Cow, collections::BTreeMap};
2
3use crate::{
4    Error, Float, Format, IoResult, Result, SequenceDecoder, SequenceReader, SimpleValue, Strictness, Value,
5    codec::{Argument, Head, HeadOrStop, Major},
6    io::{HexReader, HexSliceReader, MyReader, SliceReader},
7    limits,
8    parse::Parser,
9    tag::{NEG_BIG_INT, POS_BIG_INT},
10    util::{trim_leading_zeros, u64_from_slice},
11};
12
13/// Configuration for CBOR decoding.
14///
15/// `DecodeOptions` controls the input format ([`Binary`](Format::Binary),
16/// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic)) and the
17/// limits the decoder enforces against hostile or malformed input.
18/// Construct it with [`DecodeOptions::new`] (or `Default`), adjust
19/// settings with the builder methods, and call [`decode`](Self::decode)
20/// or [`read_from`](Self::read_from) for a single item, or
21/// [`sequence_decoder`](Self::sequence_decoder) / [`sequence_reader`](Self::sequence_reader)
22/// for a CBOR sequence.
23///
24/// The convenience methods on [`Value`] ([`decode`](Value::decode),
25/// [`decode_hex`](Value::decode_hex), [`read_from`](Value::read_from),
26/// [`read_hex_from`](Value::read_hex_from)) all forward to a default
27/// `DecodeOptions`. Use this type directly when you need to decode
28/// diagnostic notation, iterate a sequence, relax a limit for a known
29/// input, or tighten one for untrusted input.
30///
31/// # Options
32///
33/// | Option | Default | Purpose |
34/// |---|---|---|
35/// | [`format`](Self::format) | [`Binary`](Format::Binary) | Input syntax: binary, hex text, or diagnostic notation. |
36/// | [`recursion_limit`](Self::recursion_limit) | 200 | Maximum nesting depth of arrays, maps, and tags. |
37/// | [`length_limit`](Self::length_limit) | 1,000,000,000 | Maximum declared element count of a single array, map, byte string, or text string. |
38/// | [`oom_mitigation`](Self::oom_mitigation) | 100,000,000 | Byte budget for speculative pre-allocation. |
39/// | [`strictness`](Self::strictness) | [`Strictness::STRICT`] | Which non-deterministic encodings the decoder accepts and normalizes. |
40///
41/// ## `recursion_limit`
42///
43/// Each array, map, or tag consumes one unit of recursion budget for
44/// its contents. Exceeding the limit returns [`Error::NestingTooDeep`].
45/// The limit protects against stack overflow on adversarial input and
46/// should be well below the stack a thread has available.
47///
48/// ## `length_limit`
49///
50/// Applies to the length field in the CBOR head of arrays, maps, byte
51/// strings, and text strings. It caps the declared size before any
52/// bytes are read, so a malicious header claiming a petabyte-long
53/// string is rejected immediately with [`Error::LengthTooLarge`]. The
54/// limit does not restrict total input size; a valid document may
55/// contain many items each up to the limit.
56///
57/// ## `oom_mitigation`
58///
59/// CBOR encodes lengths in the head, so a decoder is tempted to
60/// pre-allocate a `Vec` of the declared capacity. On hostile input
61/// that is a trivial amplification attack: a few bytes on the wire
62/// reserve gigabytes of memory. `oom_mitigation` is a byte budget,
63/// shared across the current decode, that caps the total amount of
64/// speculative capacity the decoder may reserve for array backing
65/// storage. Once the budget is exhausted, further arrays start empty
66/// and grow on demand. Decoding still succeeds if the input is
67/// well-formed; only the up-front reservation is bounded.
68///
69/// The budget is consumed, not refilled: a deeply nested structure
70/// with many small arrays can drain it early and decode the tail with
71/// zero pre-allocation. That is the intended behavior.
72///
73/// # Examples
74///
75/// Decode binary CBOR with default limits:
76///
77/// ```
78/// use cbor_core::DecodeOptions;
79///
80/// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
81/// assert_eq!(v.to_u32().unwrap(), 42);
82/// ```
83///
84/// Switch the input format to hex text or diagnostic notation:
85///
86/// ```
87/// use cbor_core::{DecodeOptions, Format};
88///
89/// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
90/// assert_eq!(v.to_u32().unwrap(), 42);
91///
92/// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
93/// assert_eq!(v.to_u32().unwrap(), 42);
94/// ```
95///
96/// Tighten limits for input from an untrusted source:
97///
98/// ```
99/// use cbor_core::DecodeOptions;
100///
101/// let strict = DecodeOptions::new()
102///     .recursion_limit(16)
103///     .length_limit(4096)
104///     .oom_mitigation(64 * 1024);
105///
106/// assert!(strict.decode(&[0x18, 42]).is_ok());
107/// ```
108#[derive(Debug, Clone)]
109pub struct DecodeOptions {
110    pub(crate) format: Format,
111    pub(crate) recursion_limit: u16,
112    pub(crate) length_limit: u64,
113    pub(crate) oom_mitigation: usize,
114    pub(crate) strictness: Strictness,
115}
116
117impl Default for DecodeOptions {
118    fn default() -> Self {
119        Self::new()
120    }
121}
122
123impl DecodeOptions {
124    /// Create a new set of options with the crate defaults.
125    ///
126    /// ```
127    /// use cbor_core::DecodeOptions;
128    ///
129    /// let opts = DecodeOptions::new();
130    /// let v = opts.decode(&[0x18, 42]).unwrap();
131    /// assert_eq!(v.to_u32().unwrap(), 42);
132    /// ```
133    #[must_use]
134    pub const fn new() -> Self {
135        Self {
136            format: Format::Binary,
137            recursion_limit: limits::RECURSION_LIMIT,
138            length_limit: limits::LENGTH_LIMIT,
139            oom_mitigation: limits::OOM_MITIGATION,
140            strictness: Strictness::STRICT,
141        }
142    }
143
144    /// Select the input format: [`Binary`](Format::Binary),
145    /// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic).
146    ///
147    /// Default: [`Format::Binary`].
148    ///
149    /// ```
150    /// use cbor_core::{DecodeOptions, Format};
151    ///
152    /// let hex = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
153    /// let bin = DecodeOptions::new().decode(&[0x18, 0x2a]).unwrap();
154    /// assert_eq!(hex, bin);
155    ///
156    /// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
157    /// assert_eq!(v.to_u32().unwrap(), 42);
158    /// ```
159    pub const fn format(mut self, format: Format) -> Self {
160        self.format = format;
161        self
162    }
163
164    /// Set the maximum nesting depth of arrays, maps, and tags.
165    ///
166    /// Default: 200. Input that exceeds the limit returns
167    /// [`Error::NestingTooDeep`].
168    ///
169    /// ```
170    /// use cbor_core::{DecodeOptions, Error};
171    ///
172    /// // Two nested one-element arrays: 0x81 0x81 0x00
173    /// let err = DecodeOptions::new()
174    ///     .recursion_limit(1)
175    ///     .decode(&[0x81, 0x81, 0x00])
176    ///     .unwrap_err();
177    /// assert_eq!(err, Error::NestingTooDeep);
178    /// ```
179    pub const fn recursion_limit(mut self, limit: u16) -> Self {
180        self.recursion_limit = limit;
181        self
182    }
183
184    /// Set the maximum declared length for byte strings, text strings,
185    /// arrays, and maps.
186    ///
187    /// Default: 1,000,000,000. Checked against the length field in the
188    /// CBOR head before any bytes are consumed; an oversized declaration
189    /// returns [`Error::LengthTooLarge`].
190    ///
191    /// ```
192    /// use cbor_core::{DecodeOptions, Error};
193    ///
194    /// // A five-byte text string: 0x65 'h' 'e' 'l' 'l' 'o'
195    /// let err = DecodeOptions::new()
196    ///     .length_limit(4)
197    ///     .decode(b"\x65hello")
198    ///     .unwrap_err();
199    /// assert_eq!(err, Error::LengthTooLarge);
200    /// ```
201    pub const fn length_limit(mut self, limit: u64) -> Self {
202        self.length_limit = limit;
203        self
204    }
205
206    /// Set the byte budget for speculative pre-allocation of array
207    /// backing storage.
208    ///
209    /// Default: 100,000,000. Lower values trade a small amount of
210    /// decoding throughput for stronger resistance to memory-amplification
211    /// attacks. Valid input decodes regardless; only the up-front
212    /// reservation is bounded.
213    ///
214    /// ```
215    /// use cbor_core::DecodeOptions;
216    ///
217    /// // A two-element array: 0x82 0x01 0x02
218    /// let v = DecodeOptions::new()
219    ///     .oom_mitigation(0)
220    ///     .decode(&[0x82, 0x01, 0x02])
221    ///     .unwrap();
222    /// assert_eq!(v.len(), Some(2));
223    /// ```
224    pub const fn oom_mitigation(mut self, bytes: usize) -> Self {
225        self.oom_mitigation = bytes;
226        self
227    }
228
229    /// Configure which non-deterministic encodings the decoder will
230    /// accept. Default: [`Strictness::STRICT`], which rejects every
231    /// deviation with [`Error::NonDeterministic`].
232    ///
233    /// Pass [`Strictness::LENIENT`] to accept all known deviations, or
234    /// build a custom mix of `allow_*` fields. Tolerated input is
235    /// normalized while decoding, so the resulting [`Value`] is
236    /// canonical and re-encoding it produces CBOR::Core compliant
237    /// bytes.
238    ///
239    /// ```
240    /// use cbor_core::{DecodeOptions, Strictness, Value};
241    ///
242    /// // 255 wrongly encoded with a two byte argument; normalized on read.
243    /// let v = DecodeOptions::new()
244    ///     .strictness(Strictness::LENIENT)
245    ///     .decode(&[0x19, 0x00, 0xff])
246    ///     .unwrap();
247    /// assert_eq!(v, Value::from(255));
248    /// assert_eq!(v.encode(), vec![0x18, 0xff]);
249    /// ```
250    pub const fn strictness(mut self, strictness: Strictness) -> Self {
251        self.strictness = strictness;
252        self
253    }
254
255    /// Decode exactly one CBOR data item from an in-memory buffer.
256    ///
257    /// Takes the input by reference: `&[u8]`, `&[u8; N]`, `&Vec<u8>`,
258    /// `&str`, `&String`, etc. all work via `T: AsRef<[u8]> + ?Sized`.
259    /// In [`Format::Binary`], decoded text and byte strings borrow
260    /// directly from the input slice and the returned [`Value`]
261    /// inherits that lifetime; in [`Format::Hex`] and
262    /// [`Format::Diagnostic`] the result is owned.
263    ///
264    /// The input must contain **exactly one** value: any bytes
265    /// remaining after a successful decode cause
266    /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
267    /// trailing whitespace and comments are accepted, but nothing
268    /// else. Use [`sequence_decoder`](Self::sequence_decoder) when the input is a CBOR
269    /// sequence.
270    ///
271    /// An empty buffer (and, for diagnostic notation, one containing
272    /// only whitespace and comments) returns [`Error::UnexpectedEof`].
273    /// A partial value returns [`Error::UnexpectedEof`] too.
274    ///
275    /// ```
276    /// use cbor_core::{DecodeOptions, Format};
277    ///
278    /// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
279    /// assert_eq!(v.to_u32().unwrap(), 42);
280    ///
281    /// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
282    /// assert_eq!(v.to_u32().unwrap(), 42);
283    ///
284    /// let v = DecodeOptions::new()
285    ///     .format(Format::Diagnostic)
286    ///     .decode("42  / trailing comment is fine /")
287    ///     .unwrap();
288    /// assert_eq!(v.to_u32().unwrap(), 42);
289    /// ```
290    pub fn decode<'a, T>(&self, bytes: &'a T) -> Result<Value<'a>>
291    where
292        T: AsRef<[u8]> + ?Sized,
293    {
294        let bytes = bytes.as_ref();
295        match self.format {
296            Format::Binary => {
297                let mut reader = SliceReader(bytes);
298                let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
299                if !reader.0.is_empty() {
300                    return Err(Error::InvalidFormat);
301                }
302                Ok(value)
303            }
304            Format::Hex => {
305                let mut reader = HexSliceReader(bytes);
306                let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
307                if !reader.0.is_empty() {
308                    return Err(Error::InvalidFormat);
309                }
310                Ok(value)
311            }
312            Format::Diagnostic => {
313                let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
314                parser.parse_complete()
315            }
316        }
317    }
318
319    /// Decode exactly one CBOR data item into an owned [`Value`].
320    ///
321    /// Takes the input by value: `Vec<u8>`, `&[u8]`, `&str`, and
322    /// anything else that implements `AsRef<[u8]>` all work. Unlike
323    /// [`decode`](Self::decode), the result never borrows from the
324    /// input regardless of format: text and byte strings are always
325    /// copied into owned allocations. The returned value can be held
326    /// as `Value<'static>` and stored or sent across threads without
327    /// any lifetime constraint.
328    ///
329    /// Use this when the input is short-lived (a temporary buffer, a
330    /// `Vec` returned from a function, etc.) and the decoded value
331    /// needs to outlive it. When the input already lives long enough,
332    /// [`decode`](Self::decode) avoids the copies.
333    ///
334    /// The input must contain **exactly one** value: any bytes
335    /// remaining after a successful decode cause
336    /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
337    /// trailing whitespace and comments are accepted, but nothing
338    /// else. Use [`sequence_decoder`](Self::sequence_decoder) when
339    /// the input is a CBOR sequence.
340    ///
341    /// An empty buffer (and, for diagnostic notation, one containing
342    /// only whitespace and comments) returns [`Error::UnexpectedEof`].
343    /// A partial value returns [`Error::UnexpectedEof`] too.
344    ///
345    /// ```
346    /// use cbor_core::{DecodeOptions, Format, Value};
347    ///
348    /// // Decode from a short-lived Vec without worrying about lifetimes.
349    /// let bytes: Vec<u8> = vec![0x18, 42];
350    /// let v: Value<'static> = DecodeOptions::new().decode_owned(bytes).unwrap();
351    /// assert_eq!(v.to_u32().unwrap(), 42);
352    ///
353    /// // Hex and diagnostic formats work the same way.
354    /// let v: Value<'static> = DecodeOptions::new()
355    ///     .format(Format::Hex)
356    ///     .decode_owned("182a")
357    ///     .unwrap();
358    /// assert_eq!(v.to_u32().unwrap(), 42);
359    /// ```
360    pub fn decode_owned<'a>(&self, bytes: impl AsRef<[u8]>) -> Result<Value<'a>> {
361        let mut bytes = bytes.as_ref();
362
363        match self.format {
364            Format::Binary | Format::Hex => {
365                let value = self.read_from(&mut bytes).map_err(|err| match err {
366                    crate::IoError::Io(_io_error) => unreachable!(),
367                    crate::IoError::Data(error) => error,
368                })?;
369
370                if bytes.is_empty() {
371                    Ok(value)
372                } else {
373                    Err(Error::InvalidFormat)
374                }
375            }
376
377            Format::Diagnostic => {
378                let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
379                parser.parse_complete()
380            }
381        }
382    }
383
384    /// Read a single CBOR data item from a stream.
385    ///
386    /// Designed to be called repeatedly to pull successive elements of
387    /// a CBOR sequence:
388    ///
389    /// * In [`Format::Binary`] and [`Format::Hex`] the reader is
390    ///   consumed only up to the end of the item; any bytes after
391    ///   remain in the stream.
392    /// * In [`Format::Diagnostic`] trailing whitespace and comments
393    ///   are consumed up to either end of stream or a top-level
394    ///   separator comma (the comma is also consumed). Anything else
395    ///   after the value fails with [`Error::InvalidFormat`].
396    ///
397    /// Bytes are read into an internal buffer, so the result is
398    /// always owned and can be held as `Value<'static>`. For
399    /// zero-copy decoding from a byte slice, use
400    /// [`decode`](Self::decode) instead.
401    ///
402    /// I/O failures are returned as [`IoError::Io`](crate::IoError::Io);
403    /// malformed or oversized input as [`IoError::Data`](crate::IoError::Data).
404    ///
405    /// ```
406    /// use cbor_core::{DecodeOptions, Format};
407    ///
408    /// let mut bytes: &[u8] = &[0x18, 42];
409    /// let v = DecodeOptions::new().read_from(&mut bytes).unwrap();
410    /// assert_eq!(v.to_u32().unwrap(), 42);
411    ///
412    /// let mut hex: &[u8] = b"182a";
413    /// let v = DecodeOptions::new().format(Format::Hex).read_from(&mut hex).unwrap();
414    /// assert_eq!(v.to_u32().unwrap(), 42);
415    ///
416    /// // Diagnostic: repeated read_from pulls successive sequence items.
417    /// let mut diag: &[u8] = b"1, 2, 3";
418    /// let opts = DecodeOptions::new().format(Format::Diagnostic);
419    /// let a = opts.read_from(&mut diag).unwrap();
420    /// let b = opts.read_from(&mut diag).unwrap();
421    /// let c = opts.read_from(&mut diag).unwrap();
422    /// assert_eq!(a.to_u32().unwrap(), 1);
423    /// assert_eq!(b.to_u32().unwrap(), 2);
424    /// assert_eq!(c.to_u32().unwrap(), 3);
425    /// ```
426    pub fn read_from<'a>(&self, reader: impl std::io::Read) -> IoResult<Value<'a>> {
427        match self.format {
428            Format::Binary => {
429                let mut reader = reader;
430                self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
431            }
432            Format::Hex => {
433                let mut reader = HexReader(reader);
434                self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
435            }
436            Format::Diagnostic => {
437                let mut parser = Parser::new(reader, self.recursion_limit, self.strictness);
438                parser.parse_stream_item()
439            }
440        }
441    }
442
443    /// Create an iterator over a CBOR sequence stored in memory.
444    ///
445    /// The returned [`SequenceDecoder`] yields each successive item of the
446    /// sequence as `Result<Value<'a>>`, where `'a` is the lifetime of
447    /// the input slice. In binary format, items borrow text and byte
448    /// strings from the input; in hex and diagnostic format the items
449    /// are owned. The iterator captures a snapshot of these options;
450    /// subsequent changes to `self` do not affect it.
451    ///
452    /// ```
453    /// use cbor_core::{DecodeOptions, Format};
454    ///
455    /// let opts = DecodeOptions::new().format(Format::Diagnostic);
456    ///
457    /// let items: Vec<_> = opts
458    ///     .sequence_decoder(b"1, 2, 3,")
459    ///     .collect::<Result<_, _>>()
460    ///     .unwrap();
461    /// assert_eq!(items.len(), 3);
462    /// ```
463    pub fn sequence_decoder<'a, T>(&self, input: &'a T) -> SequenceDecoder<'a>
464    where
465        T: AsRef<[u8]> + ?Sized,
466    {
467        SequenceDecoder::with_options(self.clone(), input.as_ref())
468    }
469
470    /// Create an iterator over a CBOR sequence read from a stream.
471    ///
472    /// The returned [`SequenceReader`] yields each successive item as
473    /// `IoResult<Value<'static>>`. `None` indicates a clean end
474    /// between items; a truncated item produces `Some(Err(_))`. Items
475    /// are always owned (the bytes are read into an internal
476    /// buffer); for zero-copy iteration use
477    /// [`sequence_decoder`](Self::sequence_decoder) on a byte slice
478    /// instead.
479    ///
480    /// ```
481    /// use cbor_core::DecodeOptions;
482    ///
483    /// // Binary CBOR sequence: three one-byte items 0x01 0x02 0x03.
484    /// let bytes: &[u8] = &[0x01, 0x02, 0x03];
485    /// let items: Vec<_> = DecodeOptions::new()
486    ///     .sequence_reader(bytes)
487    ///     .collect::<Result<_, _>>()
488    ///     .unwrap();
489    /// assert_eq!(items.len(), 3);
490    /// ```
491    pub fn sequence_reader<R: std::io::Read>(&self, reader: R) -> SequenceReader<R> {
492        SequenceReader::with_options(self.clone(), reader)
493    }
494
495    /// Decode exactly one CBOR data item from an arbitrary reader.
496    /// Used by the sequence iterators to share the core decoding logic.
497    pub(crate) fn decode_one<'a, R>(&self, reader: &mut R) -> std::result::Result<Value<'a>, R::Error>
498    where
499        R: MyReader<'a>,
500        R::Error: From<Error>,
501    {
502        self.do_read(reader, self.recursion_limit, self.oom_mitigation)
503    }
504
505    fn do_read<'a, R>(
506        &self,
507        reader: &mut R,
508        recursion_limit: u16,
509        oom_mitigation: usize,
510    ) -> std::result::Result<Value<'a>, R::Error>
511    where
512        R: MyReader<'a>,
513        R::Error: From<Error>,
514    {
515        match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
516            Some(value) => Ok(value),
517            // A break code where a value was expected (top level, array
518            // item position, map key position, tag content) is malformed.
519            None => Err(Error::Malformed.into()),
520        }
521    }
522
523    /// Read the next item, returning `Ok(None)` when a break code stops
524    /// the input. Used by indefinite-length container loops, which need
525    /// to terminate on the break.
526    fn read_value_or_break<'a, R>(
527        &self,
528        reader: &mut R,
529        recursion_limit: u16,
530        oom_mitigation: usize,
531    ) -> std::result::Result<Option<Value<'a>>, R::Error>
532    where
533        R: MyReader<'a>,
534        R::Error: From<Error>,
535    {
536        match HeadOrStop::read_from(reader)? {
537            HeadOrStop::Definite(head) => self
538                .process_head(head, reader, recursion_limit, oom_mitigation)
539                .map(Some),
540
541            HeadOrStop::Indefinite(major) => {
542                if self.strictness.allow_indefinite_length {
543                    self.process_indefinite(major, reader, recursion_limit, oom_mitigation)
544                        .map(Some)
545                } else {
546                    Err(Error::NonDeterministic.into())
547                }
548            }
549
550            HeadOrStop::Break => Ok(None),
551        }
552    }
553
554    fn process_head<'a, R>(
555        &self,
556        head: Head,
557        reader: &mut R,
558        recursion_limit: u16,
559        oom_mitigation: usize,
560    ) -> std::result::Result<Value<'a>, R::Error>
561    where
562        R: MyReader<'a>,
563        R::Error: From<Error>,
564    {
565        let is_float = head.initial_byte.major() == Major::SimpleOrFloat
566            && matches!(head.argument, Argument::U16(_) | Argument::U32(_) | Argument::U64(_));
567
568        if !is_float && !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
569            return Err(Error::NonDeterministic.into());
570        }
571
572        let this = match head.initial_byte.major() {
573            Major::Unsigned => Value::Unsigned(head.value()),
574            Major::Negative => Value::Negative(head.value()),
575
576            Major::ByteString => {
577                let len = head.value();
578                if len > self.length_limit {
579                    return Err(Error::LengthTooLarge.into());
580                }
581                Value::ByteString(reader.read_cow(len, oom_mitigation)?)
582            }
583
584            Major::TextString => {
585                let len = head.value();
586                if len > self.length_limit {
587                    return Err(Error::LengthTooLarge.into());
588                }
589                let text = match reader.read_cow(len, oom_mitigation)? {
590                    Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes).map_err(Error::from)?),
591                    Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).map_err(Error::from)?),
592                };
593                Value::TextString(text)
594            }
595
596            Major::Array => {
597                let value = head.value();
598
599                if value > self.length_limit {
600                    return Err(Error::LengthTooLarge.into());
601                }
602
603                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
604                    return Err(Error::NestingTooDeep.into());
605                };
606
607                let request: usize = value.try_into().or(Err(Error::LengthTooLarge))?;
608                let granted = request.min(oom_mitigation / size_of::<Value>());
609                let oom_mitigation = oom_mitigation - granted * size_of::<Value>();
610
611                let mut vec = Vec::with_capacity(granted);
612
613                for _ in 0..value {
614                    vec.push(self.do_read(reader, recursion_limit, oom_mitigation)?);
615                }
616
617                Value::Array(vec)
618            }
619
620            Major::Map => {
621                let value = head.value();
622
623                if value > self.length_limit {
624                    return Err(Error::LengthTooLarge.into());
625                }
626
627                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
628                    return Err(Error::NestingTooDeep.into());
629                };
630
631                let mut map = BTreeMap::new();
632                for _ in 0..value {
633                    let key = self.do_read(reader, recursion_limit, oom_mitigation)?;
634                    let val = self.do_read(reader, recursion_limit, oom_mitigation)?;
635                    self.map_insert(&mut map, key, val)?;
636                }
637
638                Value::Map(map)
639            }
640
641            Major::Tag => {
642                let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
643                    return Err(Error::NestingTooDeep.into());
644                };
645
646                let tag_number = head.value();
647                let tag_content = self.do_read(reader, recursion_limit, oom_mitigation)?;
648
649                // Big integer canonicalization (tag 2 / tag 3): the
650                // payload must be a byte string longer than 8 bytes
651                // (otherwise the value fits in u64) with no leading
652                // zero byte.
653                match tag_content {
654                    Value::ByteString(bytes) if matches!(tag_number, POS_BIG_INT | NEG_BIG_INT) => {
655                        let canonical = bytes.len() > 8 && bytes[0] != 0;
656                        if canonical {
657                            Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
658                        } else if self.strictness.allow_oversized_bigints {
659                            normalize_bigint(tag_number, bytes)
660                        } else {
661                            return Err(Error::NonDeterministic.into());
662                        }
663                    }
664                    other => Value::Tag(tag_number, Box::new(other)),
665                }
666            }
667
668            Major::SimpleOrFloat => match head.argument {
669                Argument::None => Value::SimpleValue(SimpleValue(head.initial_byte.info())),
670                Argument::U8(n) if n >= 32 => Value::SimpleValue(SimpleValue(n)),
671
672                Argument::U16(bits) => Value::Float(Float::from_bits_u16(bits)),
673                Argument::U32(bits) => self.checked_float(Float::from_bits_u32(bits))?,
674                Argument::U64(bits) => self.checked_float(Float::from_bits_u64(bits))?,
675
676                _ => return Err(Error::Malformed.into()),
677            },
678        };
679
680        Ok(this)
681    }
682
683    fn checked_float<'a>(&self, float: Float) -> Result<Value<'a>> {
684        if float.is_deterministic() {
685            Ok(Value::Float(float))
686        } else if self.strictness.allow_non_shortest_floats {
687            Ok(Value::Float(float.shortest()))
688        } else {
689            Err(Error::NonDeterministic)
690        }
691    }
692
693    /// Insert a key/value pair into a map under the active determinism
694    /// policy. Used by both definite and indefinite-length map decoders.
695    fn map_insert<'a>(&self, map: &mut BTreeMap<Value<'a>, Value<'a>>, key: Value<'a>, val: Value<'a>) -> Result<()> {
696        if !self.strictness.allow_unsorted_map_keys
697            && let Some(last) = map.last_entry()
698            && *last.key() >= key
699        {
700            Err(Error::NonDeterministic)
701        } else if map.insert(key, val).is_some() && !self.strictness.allow_duplicate_map_keys {
702            Err(Error::NonDeterministic)
703        } else {
704            Ok(())
705        }
706    }
707
708    /// Decode an indefinite-length container of the given major type.
709    /// The break code that terminates the container is consumed.
710    fn process_indefinite<'a, R>(
711        &self,
712        major: Major,
713        reader: &mut R,
714        recursion_limit: u16,
715        oom_mitigation: usize,
716    ) -> std::result::Result<Value<'a>, R::Error>
717    where
718        R: MyReader<'a>,
719        R::Error: From<Error>,
720    {
721        match major {
722            Major::ByteString => self.read_indefinite_bytes(reader, oom_mitigation),
723            Major::TextString => self.read_indefinite_text(reader, oom_mitigation),
724            Major::Array => self.read_indefinite_array(reader, recursion_limit, oom_mitigation),
725            Major::Map => self.read_indefinite_map(reader, recursion_limit, oom_mitigation),
726            _ => unreachable!("process_indefinite: invalid major"),
727        }
728    }
729
730    /// Read a `(_ chunk*)` byte string. Each chunk is itself a
731    /// definite-length byte string; an indefinite-length chunk or a
732    /// chunk of a different major type is malformed even in lenient
733    /// mode.
734    fn read_indefinite_bytes<'a, R>(
735        &self,
736        reader: &mut R,
737        oom_mitigation: usize,
738    ) -> std::result::Result<Value<'a>, R::Error>
739    where
740        R: MyReader<'a>,
741        R::Error: From<Error>,
742    {
743        let mut buf = Vec::new();
744        let mut total: u64 = 0;
745
746        loop {
747            match HeadOrStop::read_from(reader)? {
748                HeadOrStop::Break => break,
749
750                HeadOrStop::Definite(head) if head.initial_byte.major() == Major::ByteString => {
751                    if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
752                        return Err(Error::NonDeterministic.into());
753                    }
754
755                    let chunk_len = head.value();
756
757                    total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
758                    if total > self.length_limit {
759                        return Err(Error::LengthTooLarge.into());
760                    }
761
762                    let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
763                    buf.extend_from_slice(&chunk);
764                }
765
766                _ => return Err(Error::Malformed.into()),
767            }
768        }
769
770        Ok(Value::ByteString(Cow::Owned(buf)))
771    }
772
773    /// Read a `(_ chunk*)` text string. Each chunk is independently
774    /// validated as UTF-8 (per RFC 8949 ยง3.2.2).
775    fn read_indefinite_text<'a, R>(
776        &self,
777        reader: &mut R,
778        oom_mitigation: usize,
779    ) -> std::result::Result<Value<'a>, R::Error>
780    where
781        R: MyReader<'a>,
782        R::Error: From<Error>,
783    {
784        let mut buf = String::new();
785        let mut total: u64 = 0;
786
787        loop {
788            match HeadOrStop::read_from(reader)? {
789                HeadOrStop::Break => break,
790
791                HeadOrStop::Definite(head) if head.initial_byte.major() == Major::TextString => {
792                    if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
793                        return Err(Error::NonDeterministic.into());
794                    }
795
796                    let chunk_len = head.value();
797
798                    total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
799                    if total > self.length_limit {
800                        return Err(Error::LengthTooLarge.into());
801                    }
802
803                    let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
804                    buf.push_str(std::str::from_utf8(&chunk).map_err(Error::from)?);
805                }
806
807                _ => return Err(Error::Malformed.into()),
808            }
809        }
810
811        Ok(Value::TextString(Cow::Owned(buf)))
812    }
813
814    fn read_indefinite_array<'a, R>(
815        &self,
816        reader: &mut R,
817        recursion_limit: u16,
818        oom_mitigation: usize,
819    ) -> std::result::Result<Value<'a>, R::Error>
820    where
821        R: MyReader<'a>,
822        R::Error: From<Error>,
823    {
824        let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
825            return Err(Error::NestingTooDeep.into());
826        };
827
828        let mut vec = Vec::new();
829
830        for _ in 0..self.length_limit {
831            match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
832                Some(item) => vec.push(item),
833                None => return Ok(Value::Array(vec)),
834            };
835        }
836
837        match HeadOrStop::read_from(reader)? {
838            HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
839            HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
840            HeadOrStop::Break => Ok(Value::Array(vec)),
841        }
842    }
843
844    fn read_indefinite_map<'a, R>(
845        &self,
846        reader: &mut R,
847        recursion_limit: u16,
848        oom_mitigation: usize,
849    ) -> std::result::Result<Value<'a>, R::Error>
850    where
851        R: MyReader<'a>,
852        R::Error: From<Error>,
853    {
854        let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
855            return Err(Error::NestingTooDeep.into());
856        };
857
858        let mut map = BTreeMap::new();
859
860        for _ in 0..self.length_limit {
861            match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
862                Some(key) => {
863                    let value = self.do_read(reader, recursion_limit, oom_mitigation)?;
864                    self.map_insert(&mut map, key, value)?;
865                }
866                None => return Ok(Value::Map(map)),
867            };
868        }
869
870        match HeadOrStop::read_from(reader)? {
871            HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
872            HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
873            HeadOrStop::Break => Ok(Value::Map(map)),
874        }
875    }
876}
877
878/// Normalize a non-canonical big integer payload.
879///
880/// Strips leading zero bytes and downcasts to
881/// [`Value::Unsigned`] / [`Value::Negative`] when the magnitude fits
882/// in a `u64`. Otherwise returns a tag 2 / tag 3 with a stripped
883/// payload, preserving the [`Cow`] borrow when the input was borrowed.
884fn normalize_bigint<'a>(tag_number: u64, bytes: Cow<'a, [u8]>) -> Value<'a> {
885    fn integer<'b>(tag_number: u64, n: u64) -> Value<'b> {
886        match tag_number {
887            POS_BIG_INT => Value::Unsigned(n),
888            NEG_BIG_INT => Value::Negative(n),
889            _other => unreachable!("normalize_bigint: invalid tag"),
890        }
891    }
892
893    match bytes {
894        Cow::Borrowed(bytes) => {
895            let trimmed = trim_leading_zeros(bytes);
896
897            if let Ok(n) = u64_from_slice(trimmed) {
898                integer(tag_number, n)
899            } else {
900                let bytes = trimmed.into();
901                Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
902            }
903        }
904        Cow::Owned(bytes) => {
905            let trimmed = trim_leading_zeros(&bytes);
906
907            if let Ok(n) = u64_from_slice(trimmed) {
908                integer(tag_number, n)
909            } else {
910                let bytes = if trimmed.len() == bytes.len() {
911                    bytes.into()
912                } else {
913                    trimmed.to_vec().into()
914                };
915                Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
916            }
917        }
918    }
919}