cbor_core/decode_options.rs
1use std::{borrow::Cow, collections::BTreeMap};
2
3use crate::{
4 Error, Float, Format, IoResult, Result, SequenceDecoder, SequenceReader, SimpleValue, Strictness, Value,
5 codec::{Argument, Head, HeadOrStop, Major},
6 io::{HexReader, HexSliceReader, MyReader, SliceReader},
7 limits,
8 parse::Parser,
9 tag::{NEG_BIG_INT, POS_BIG_INT},
10 util::{trim_leading_zeros, u64_from_slice},
11};
12
13/// Configuration for CBOR decoding.
14///
15/// `DecodeOptions` controls the input format ([`Binary`](Format::Binary),
16/// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic)) and the
17/// limits the decoder enforces against hostile or malformed input.
18/// Construct it with [`DecodeOptions::new`] (or `Default`), adjust
19/// settings with the builder methods, and call [`decode`](Self::decode)
20/// or [`read_from`](Self::read_from) for a single item, or
21/// [`sequence_decoder`](Self::sequence_decoder) / [`sequence_reader`](Self::sequence_reader)
22/// for a CBOR sequence.
23///
24/// The convenience methods on [`Value`] ([`decode`](Value::decode),
25/// [`decode_hex`](Value::decode_hex), [`read_from`](Value::read_from),
26/// [`read_hex_from`](Value::read_hex_from)) all forward to a default
27/// `DecodeOptions`. Use this type directly when you need to decode
28/// diagnostic notation, iterate a sequence, relax a limit for a known
29/// input, or tighten one for untrusted input.
30///
31/// # Options
32///
33/// | Option | Default | Purpose |
34/// |---|---|---|
35/// | [`format`](Self::format) | [`Binary`](Format::Binary) | Input syntax: binary, hex text, or diagnostic notation. |
36/// | [`recursion_limit`](Self::recursion_limit) | 200 | Maximum nesting depth of arrays, maps, and tags. |
37/// | [`length_limit`](Self::length_limit) | 1,000,000,000 | Maximum declared element count of a single array, map, byte string, or text string. |
38/// | [`oom_mitigation`](Self::oom_mitigation) | 100,000,000 | Byte budget for speculative pre-allocation. |
39/// | [`strictness`](Self::strictness) | [`Strictness::STRICT`] | Which non-deterministic encodings the decoder accepts and normalizes. |
40///
41/// ## `recursion_limit`
42///
43/// Each array, map, or tag consumes one unit of recursion budget for
44/// its contents. Exceeding the limit returns [`Error::NestingTooDeep`].
45/// The limit protects against stack overflow on adversarial input and
46/// should be well below the stack a thread has available.
47///
48/// ## `length_limit`
49///
50/// Applies to the length field in the CBOR head of arrays, maps, byte
51/// strings, and text strings. It caps the declared size before any
52/// bytes are read, so a malicious header claiming a petabyte-long
53/// string is rejected immediately with [`Error::LengthTooLarge`]. The
54/// limit does not restrict total input size; a valid document may
55/// contain many items each up to the limit.
56///
57/// ## `oom_mitigation`
58///
59/// CBOR encodes lengths in the head, so a decoder is tempted to
60/// pre-allocate a `Vec` of the declared capacity. On hostile input
61/// that is a trivial amplification attack: a few bytes on the wire
62/// reserve gigabytes of memory. `oom_mitigation` is a byte budget,
63/// shared across the current decode, that caps the total amount of
64/// speculative capacity the decoder may reserve for array backing
65/// storage. Once the budget is exhausted, further arrays start empty
66/// and grow on demand. Decoding still succeeds if the input is
67/// well-formed; only the up-front reservation is bounded.
68///
69/// The budget is consumed, not refilled: a deeply nested structure
70/// with many small arrays can drain it early and decode the tail with
71/// zero pre-allocation. That is the intended behavior.
72///
73/// # Examples
74///
75/// Decode binary CBOR with default limits:
76///
77/// ```
78/// use cbor_core::DecodeOptions;
79///
80/// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
81/// assert_eq!(v.to_u32().unwrap(), 42);
82/// ```
83///
84/// Switch the input format to hex text or diagnostic notation:
85///
86/// ```
87/// use cbor_core::{DecodeOptions, Format};
88///
89/// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
90/// assert_eq!(v.to_u32().unwrap(), 42);
91///
92/// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
93/// assert_eq!(v.to_u32().unwrap(), 42);
94/// ```
95///
96/// Tighten limits for input from an untrusted source:
97///
98/// ```
99/// use cbor_core::DecodeOptions;
100///
101/// let strict = DecodeOptions::new()
102/// .recursion_limit(16)
103/// .length_limit(4096)
104/// .oom_mitigation(64 * 1024);
105///
106/// assert!(strict.decode(&[0x18, 42]).is_ok());
107/// ```
108#[derive(Debug, Clone)]
109pub struct DecodeOptions {
110 pub(crate) format: Format,
111 pub(crate) recursion_limit: u16,
112 pub(crate) length_limit: u64,
113 pub(crate) oom_mitigation: usize,
114 pub(crate) strictness: Strictness,
115}
116
117impl Default for DecodeOptions {
118 fn default() -> Self {
119 Self::new()
120 }
121}
122
123impl DecodeOptions {
124 /// Create a new set of options with the crate defaults.
125 ///
126 /// ```
127 /// use cbor_core::DecodeOptions;
128 ///
129 /// let opts = DecodeOptions::new();
130 /// let v = opts.decode(&[0x18, 42]).unwrap();
131 /// assert_eq!(v.to_u32().unwrap(), 42);
132 /// ```
133 #[must_use]
134 pub const fn new() -> Self {
135 Self {
136 format: Format::Binary,
137 recursion_limit: limits::RECURSION_LIMIT,
138 length_limit: limits::LENGTH_LIMIT,
139 oom_mitigation: limits::OOM_MITIGATION,
140 strictness: Strictness::STRICT,
141 }
142 }
143
144 /// Select the input format: [`Binary`](Format::Binary),
145 /// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic).
146 ///
147 /// Default: [`Format::Binary`].
148 ///
149 /// ```
150 /// use cbor_core::{DecodeOptions, Format};
151 ///
152 /// let hex = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
153 /// let bin = DecodeOptions::new().decode(&[0x18, 0x2a]).unwrap();
154 /// assert_eq!(hex, bin);
155 ///
156 /// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
157 /// assert_eq!(v.to_u32().unwrap(), 42);
158 /// ```
159 pub const fn format(mut self, format: Format) -> Self {
160 self.format = format;
161 self
162 }
163
164 /// Set the maximum nesting depth of arrays, maps, and tags.
165 ///
166 /// Default: 200. Input that exceeds the limit returns
167 /// [`Error::NestingTooDeep`].
168 ///
169 /// ```
170 /// use cbor_core::{DecodeOptions, Error};
171 ///
172 /// // Two nested one-element arrays: 0x81 0x81 0x00
173 /// let err = DecodeOptions::new()
174 /// .recursion_limit(1)
175 /// .decode(&[0x81, 0x81, 0x00])
176 /// .unwrap_err();
177 /// assert_eq!(err, Error::NestingTooDeep);
178 /// ```
179 pub const fn recursion_limit(mut self, limit: u16) -> Self {
180 self.recursion_limit = limit;
181 self
182 }
183
184 /// Set the maximum declared length for byte strings, text strings,
185 /// arrays, and maps.
186 ///
187 /// Default: 1,000,000,000. Checked against the length field in the
188 /// CBOR head before any bytes are consumed; an oversized declaration
189 /// returns [`Error::LengthTooLarge`].
190 ///
191 /// ```
192 /// use cbor_core::{DecodeOptions, Error};
193 ///
194 /// // A five-byte text string: 0x65 'h' 'e' 'l' 'l' 'o'
195 /// let err = DecodeOptions::new()
196 /// .length_limit(4)
197 /// .decode(b"\x65hello")
198 /// .unwrap_err();
199 /// assert_eq!(err, Error::LengthTooLarge);
200 /// ```
201 pub const fn length_limit(mut self, limit: u64) -> Self {
202 self.length_limit = limit;
203 self
204 }
205
206 /// Set the byte budget for speculative pre-allocation of array
207 /// backing storage.
208 ///
209 /// Default: 100,000,000. Lower values trade a small amount of
210 /// decoding throughput for stronger resistance to memory-amplification
211 /// attacks. Valid input decodes regardless; only the up-front
212 /// reservation is bounded.
213 ///
214 /// ```
215 /// use cbor_core::DecodeOptions;
216 ///
217 /// // A two-element array: 0x82 0x01 0x02
218 /// let v = DecodeOptions::new()
219 /// .oom_mitigation(0)
220 /// .decode(&[0x82, 0x01, 0x02])
221 /// .unwrap();
222 /// assert_eq!(v.len(), Some(2));
223 /// ```
224 pub const fn oom_mitigation(mut self, bytes: usize) -> Self {
225 self.oom_mitigation = bytes;
226 self
227 }
228
229 /// Configure which non-deterministic encodings the decoder will
230 /// accept. Default: [`Strictness::STRICT`], which rejects every
231 /// deviation with [`Error::NonDeterministic`].
232 ///
233 /// Pass [`Strictness::LENIENT`] to accept all known deviations, or
234 /// build a custom mix of `allow_*` fields. Tolerated input is
235 /// normalized while decoding, so the resulting [`Value`] is
236 /// canonical and re-encoding it produces CBOR::Core compliant
237 /// bytes.
238 ///
239 /// ```
240 /// use cbor_core::{DecodeOptions, Strictness, Value};
241 ///
242 /// // 255 wrongly encoded with a two byte argument; normalized on read.
243 /// let v = DecodeOptions::new()
244 /// .strictness(Strictness::LENIENT)
245 /// .decode(&[0x19, 0x00, 0xff])
246 /// .unwrap();
247 /// assert_eq!(v, Value::from(255));
248 /// assert_eq!(v.encode(), vec![0x18, 0xff]);
249 /// ```
250 pub const fn strictness(mut self, strictness: Strictness) -> Self {
251 self.strictness = strictness;
252 self
253 }
254
255 /// Decode exactly one CBOR data item from an in-memory buffer.
256 ///
257 /// Takes the input by reference: `&[u8]`, `&[u8; N]`, `&Vec<u8>`,
258 /// `&str`, `&String`, etc. all work via `T: AsRef<[u8]> + ?Sized`.
259 /// In [`Format::Binary`], decoded text and byte strings borrow
260 /// directly from the input slice and the returned [`Value`]
261 /// inherits that lifetime; in [`Format::Hex`] and
262 /// [`Format::Diagnostic`] the result is owned.
263 ///
264 /// The input must contain **exactly one** value: any bytes
265 /// remaining after a successful decode cause
266 /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
267 /// trailing whitespace and comments are accepted, but nothing
268 /// else. Use [`sequence_decoder`](Self::sequence_decoder) when the input is a CBOR
269 /// sequence.
270 ///
271 /// An empty buffer (and, for diagnostic notation, one containing
272 /// only whitespace and comments) returns [`Error::UnexpectedEof`].
273 /// A partial value returns [`Error::UnexpectedEof`] too.
274 ///
275 /// ```
276 /// use cbor_core::{DecodeOptions, Format};
277 ///
278 /// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
279 /// assert_eq!(v.to_u32().unwrap(), 42);
280 ///
281 /// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
282 /// assert_eq!(v.to_u32().unwrap(), 42);
283 ///
284 /// let v = DecodeOptions::new()
285 /// .format(Format::Diagnostic)
286 /// .decode("42 / trailing comment is fine /")
287 /// .unwrap();
288 /// assert_eq!(v.to_u32().unwrap(), 42);
289 /// ```
290 pub fn decode<'a, T>(&self, bytes: &'a T) -> Result<Value<'a>>
291 where
292 T: AsRef<[u8]> + ?Sized,
293 {
294 let bytes = bytes.as_ref();
295 match self.format {
296 Format::Binary => {
297 let mut reader = SliceReader(bytes);
298 let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
299 if !reader.0.is_empty() {
300 return Err(Error::InvalidFormat);
301 }
302 Ok(value)
303 }
304 Format::Hex => {
305 let mut reader = HexSliceReader(bytes);
306 let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
307 if !reader.0.is_empty() {
308 return Err(Error::InvalidFormat);
309 }
310 Ok(value)
311 }
312 Format::Diagnostic => {
313 let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
314 parser.parse_complete()
315 }
316 }
317 }
318
319 /// Decode exactly one CBOR data item into an owned [`Value`].
320 ///
321 /// Takes the input by value: `Vec<u8>`, `&[u8]`, `&str`, and
322 /// anything else that implements `AsRef<[u8]>` all work. Unlike
323 /// [`decode`](Self::decode), the result never borrows from the
324 /// input regardless of format: text and byte strings are always
325 /// copied into owned allocations. The returned value can be held
326 /// as `Value<'static>` and stored or sent across threads without
327 /// any lifetime constraint.
328 ///
329 /// Use this when the input is short-lived (a temporary buffer, a
330 /// `Vec` returned from a function, etc.) and the decoded value
331 /// needs to outlive it. When the input already lives long enough,
332 /// [`decode`](Self::decode) avoids the copies.
333 ///
334 /// The input must contain **exactly one** value: any bytes
335 /// remaining after a successful decode cause
336 /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
337 /// trailing whitespace and comments are accepted, but nothing
338 /// else. Use [`sequence_decoder`](Self::sequence_decoder) when
339 /// the input is a CBOR sequence.
340 ///
341 /// An empty buffer (and, for diagnostic notation, one containing
342 /// only whitespace and comments) returns [`Error::UnexpectedEof`].
343 /// A partial value returns [`Error::UnexpectedEof`] too.
344 ///
345 /// ```
346 /// use cbor_core::{DecodeOptions, Format, Value};
347 ///
348 /// // Decode from a short-lived Vec without worrying about lifetimes.
349 /// let bytes: Vec<u8> = vec![0x18, 42];
350 /// let v: Value<'static> = DecodeOptions::new().decode_owned(bytes).unwrap();
351 /// assert_eq!(v.to_u32().unwrap(), 42);
352 ///
353 /// // Hex and diagnostic formats work the same way.
354 /// let v: Value<'static> = DecodeOptions::new()
355 /// .format(Format::Hex)
356 /// .decode_owned("182a")
357 /// .unwrap();
358 /// assert_eq!(v.to_u32().unwrap(), 42);
359 /// ```
360 pub fn decode_owned<'a>(&self, bytes: impl AsRef<[u8]>) -> Result<Value<'a>> {
361 let mut bytes = bytes.as_ref();
362
363 match self.format {
364 Format::Binary | Format::Hex => {
365 let value = self.read_from(&mut bytes).map_err(|err| match err {
366 crate::IoError::Io(_io_error) => unreachable!(),
367 crate::IoError::Data(error) => error,
368 })?;
369
370 if bytes.is_empty() {
371 Ok(value)
372 } else {
373 Err(Error::InvalidFormat)
374 }
375 }
376
377 Format::Diagnostic => {
378 let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
379 parser.parse_complete()
380 }
381 }
382 }
383
384 /// Read a single CBOR data item from a stream.
385 ///
386 /// Designed to be called repeatedly to pull successive elements of
387 /// a CBOR sequence:
388 ///
389 /// * In [`Format::Binary`] and [`Format::Hex`] the reader is
390 /// consumed only up to the end of the item; any bytes after
391 /// remain in the stream.
392 /// * In [`Format::Diagnostic`] trailing whitespace and comments
393 /// are consumed up to either end of stream or a top-level
394 /// separator comma (the comma is also consumed). Anything else
395 /// after the value fails with [`Error::InvalidFormat`].
396 ///
397 /// Bytes are read into an internal buffer, so the result is
398 /// always owned and can be held as `Value<'static>`. For
399 /// zero-copy decoding from a byte slice, use
400 /// [`decode`](Self::decode) instead.
401 ///
402 /// I/O failures are returned as [`IoError::Io`](crate::IoError::Io);
403 /// malformed or oversized input as [`IoError::Data`](crate::IoError::Data).
404 ///
405 /// ```
406 /// use cbor_core::{DecodeOptions, Format};
407 ///
408 /// let mut bytes: &[u8] = &[0x18, 42];
409 /// let v = DecodeOptions::new().read_from(&mut bytes).unwrap();
410 /// assert_eq!(v.to_u32().unwrap(), 42);
411 ///
412 /// let mut hex: &[u8] = b"182a";
413 /// let v = DecodeOptions::new().format(Format::Hex).read_from(&mut hex).unwrap();
414 /// assert_eq!(v.to_u32().unwrap(), 42);
415 ///
416 /// // Diagnostic: repeated read_from pulls successive sequence items.
417 /// let mut diag: &[u8] = b"1, 2, 3";
418 /// let opts = DecodeOptions::new().format(Format::Diagnostic);
419 /// let a = opts.read_from(&mut diag).unwrap();
420 /// let b = opts.read_from(&mut diag).unwrap();
421 /// let c = opts.read_from(&mut diag).unwrap();
422 /// assert_eq!(a.to_u32().unwrap(), 1);
423 /// assert_eq!(b.to_u32().unwrap(), 2);
424 /// assert_eq!(c.to_u32().unwrap(), 3);
425 /// ```
426 pub fn read_from<'a>(&self, reader: impl std::io::Read) -> IoResult<Value<'a>> {
427 match self.format {
428 Format::Binary => {
429 let mut reader = reader;
430 self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
431 }
432 Format::Hex => {
433 let mut reader = HexReader(reader);
434 self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
435 }
436 Format::Diagnostic => {
437 let mut parser = Parser::new(reader, self.recursion_limit, self.strictness);
438 parser.parse_stream_item()
439 }
440 }
441 }
442
443 /// Create an iterator over a CBOR sequence stored in memory.
444 ///
445 /// The returned [`SequenceDecoder`] yields each successive item of the
446 /// sequence as `Result<Value<'a>>`, where `'a` is the lifetime of
447 /// the input slice. In binary format, items borrow text and byte
448 /// strings from the input; in hex and diagnostic format the items
449 /// are owned. The iterator captures a snapshot of these options;
450 /// subsequent changes to `self` do not affect it.
451 ///
452 /// ```
453 /// use cbor_core::{DecodeOptions, Format};
454 ///
455 /// let opts = DecodeOptions::new().format(Format::Diagnostic);
456 ///
457 /// let items: Vec<_> = opts
458 /// .sequence_decoder(b"1, 2, 3,")
459 /// .collect::<Result<_, _>>()
460 /// .unwrap();
461 /// assert_eq!(items.len(), 3);
462 /// ```
463 pub fn sequence_decoder<'a, T>(&self, input: &'a T) -> SequenceDecoder<'a>
464 where
465 T: AsRef<[u8]> + ?Sized,
466 {
467 SequenceDecoder::with_options(self.clone(), input.as_ref())
468 }
469
470 /// Create an iterator over a CBOR sequence read from a stream.
471 ///
472 /// The returned [`SequenceReader`] yields each successive item as
473 /// `IoResult<Value<'static>>`. `None` indicates a clean end
474 /// between items; a truncated item produces `Some(Err(_))`. Items
475 /// are always owned (the bytes are read into an internal
476 /// buffer); for zero-copy iteration use
477 /// [`sequence_decoder`](Self::sequence_decoder) on a byte slice
478 /// instead.
479 ///
480 /// ```
481 /// use cbor_core::DecodeOptions;
482 ///
483 /// // Binary CBOR sequence: three one-byte items 0x01 0x02 0x03.
484 /// let bytes: &[u8] = &[0x01, 0x02, 0x03];
485 /// let items: Vec<_> = DecodeOptions::new()
486 /// .sequence_reader(bytes)
487 /// .collect::<Result<_, _>>()
488 /// .unwrap();
489 /// assert_eq!(items.len(), 3);
490 /// ```
491 pub fn sequence_reader<R: std::io::Read>(&self, reader: R) -> SequenceReader<R> {
492 SequenceReader::with_options(self.clone(), reader)
493 }
494
495 /// Decode exactly one CBOR data item from an arbitrary reader.
496 /// Used by the sequence iterators to share the core decoding logic.
497 pub(crate) fn decode_one<'a, R>(&self, reader: &mut R) -> std::result::Result<Value<'a>, R::Error>
498 where
499 R: MyReader<'a>,
500 R::Error: From<Error>,
501 {
502 self.do_read(reader, self.recursion_limit, self.oom_mitigation)
503 }
504
505 fn do_read<'a, R>(
506 &self,
507 reader: &mut R,
508 recursion_limit: u16,
509 oom_mitigation: usize,
510 ) -> std::result::Result<Value<'a>, R::Error>
511 where
512 R: MyReader<'a>,
513 R::Error: From<Error>,
514 {
515 match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
516 Some(value) => Ok(value),
517 // A break code where a value was expected (top level, array
518 // item position, map key position, tag content) is malformed.
519 None => Err(Error::Malformed.into()),
520 }
521 }
522
523 /// Read the next item, returning `Ok(None)` when a break code stops
524 /// the input. Used by indefinite-length container loops, which need
525 /// to terminate on the break.
526 fn read_value_or_break<'a, R>(
527 &self,
528 reader: &mut R,
529 recursion_limit: u16,
530 oom_mitigation: usize,
531 ) -> std::result::Result<Option<Value<'a>>, R::Error>
532 where
533 R: MyReader<'a>,
534 R::Error: From<Error>,
535 {
536 match HeadOrStop::read_from(reader)? {
537 HeadOrStop::Definite(head) => self
538 .process_head(head, reader, recursion_limit, oom_mitigation)
539 .map(Some),
540
541 HeadOrStop::Indefinite(major) => {
542 if self.strictness.allow_indefinite_length {
543 self.process_indefinite(major, reader, recursion_limit, oom_mitigation)
544 .map(Some)
545 } else {
546 Err(Error::NonDeterministic.into())
547 }
548 }
549
550 HeadOrStop::Break => Ok(None),
551 }
552 }
553
554 fn process_head<'a, R>(
555 &self,
556 head: Head,
557 reader: &mut R,
558 recursion_limit: u16,
559 oom_mitigation: usize,
560 ) -> std::result::Result<Value<'a>, R::Error>
561 where
562 R: MyReader<'a>,
563 R::Error: From<Error>,
564 {
565 let is_float = head.initial_byte.major() == Major::SimpleOrFloat
566 && matches!(head.argument, Argument::U16(_) | Argument::U32(_) | Argument::U64(_));
567
568 if !is_float && !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
569 return Err(Error::NonDeterministic.into());
570 }
571
572 let this = match head.initial_byte.major() {
573 Major::Unsigned => Value::Unsigned(head.value()),
574 Major::Negative => Value::Negative(head.value()),
575
576 Major::ByteString => {
577 let len = head.value();
578 if len > self.length_limit {
579 return Err(Error::LengthTooLarge.into());
580 }
581 Value::ByteString(reader.read_cow(len, oom_mitigation)?)
582 }
583
584 Major::TextString => {
585 let len = head.value();
586 if len > self.length_limit {
587 return Err(Error::LengthTooLarge.into());
588 }
589 let text = match reader.read_cow(len, oom_mitigation)? {
590 Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes).map_err(Error::from)?),
591 Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).map_err(Error::from)?),
592 };
593 Value::TextString(text)
594 }
595
596 Major::Array => {
597 let value = head.value();
598
599 if value > self.length_limit {
600 return Err(Error::LengthTooLarge.into());
601 }
602
603 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
604 return Err(Error::NestingTooDeep.into());
605 };
606
607 let request: usize = value.try_into().or(Err(Error::LengthTooLarge))?;
608 let granted = request.min(oom_mitigation / size_of::<Value>());
609 let oom_mitigation = oom_mitigation - granted * size_of::<Value>();
610
611 let mut vec = Vec::with_capacity(granted);
612
613 for _ in 0..value {
614 vec.push(self.do_read(reader, recursion_limit, oom_mitigation)?);
615 }
616
617 Value::Array(vec)
618 }
619
620 Major::Map => {
621 let value = head.value();
622
623 if value > self.length_limit {
624 return Err(Error::LengthTooLarge.into());
625 }
626
627 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
628 return Err(Error::NestingTooDeep.into());
629 };
630
631 let mut map = BTreeMap::new();
632 for _ in 0..value {
633 let key = self.do_read(reader, recursion_limit, oom_mitigation)?;
634 let val = self.do_read(reader, recursion_limit, oom_mitigation)?;
635 self.map_insert(&mut map, key, val)?;
636 }
637
638 Value::Map(map)
639 }
640
641 Major::Tag => {
642 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
643 return Err(Error::NestingTooDeep.into());
644 };
645
646 let tag_number = head.value();
647 let tag_content = self.do_read(reader, recursion_limit, oom_mitigation)?;
648
649 // Big integer canonicalization (tag 2 / tag 3): the
650 // payload must be a byte string longer than 8 bytes
651 // (otherwise the value fits in u64) with no leading
652 // zero byte.
653 match tag_content {
654 Value::ByteString(bytes) if matches!(tag_number, POS_BIG_INT | NEG_BIG_INT) => {
655 let canonical = bytes.len() > 8 && bytes[0] != 0;
656 if canonical {
657 Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
658 } else if self.strictness.allow_oversized_bigints {
659 normalize_bigint(tag_number, bytes)
660 } else {
661 return Err(Error::NonDeterministic.into());
662 }
663 }
664 other => Value::Tag(tag_number, Box::new(other)),
665 }
666 }
667
668 Major::SimpleOrFloat => match head.argument {
669 Argument::None => Value::SimpleValue(SimpleValue(head.initial_byte.info())),
670 Argument::U8(n) if n >= 32 => Value::SimpleValue(SimpleValue(n)),
671
672 Argument::U16(bits) => Value::Float(Float::from_bits_u16(bits)),
673 Argument::U32(bits) => self.checked_float(Float::from_bits_u32(bits))?,
674 Argument::U64(bits) => self.checked_float(Float::from_bits_u64(bits))?,
675
676 _ => return Err(Error::Malformed.into()),
677 },
678 };
679
680 Ok(this)
681 }
682
683 fn checked_float<'a>(&self, float: Float) -> Result<Value<'a>> {
684 if float.is_deterministic() {
685 Ok(Value::Float(float))
686 } else if self.strictness.allow_non_shortest_floats {
687 Ok(Value::Float(float.shortest()))
688 } else {
689 Err(Error::NonDeterministic)
690 }
691 }
692
693 /// Insert a key/value pair into a map under the active determinism
694 /// policy. Used by both definite and indefinite-length map decoders.
695 fn map_insert<'a>(&self, map: &mut BTreeMap<Value<'a>, Value<'a>>, key: Value<'a>, val: Value<'a>) -> Result<()> {
696 if !self.strictness.allow_unsorted_map_keys
697 && let Some(last) = map.last_entry()
698 && *last.key() >= key
699 {
700 Err(Error::NonDeterministic)
701 } else if map.insert(key, val).is_some() && !self.strictness.allow_duplicate_map_keys {
702 Err(Error::NonDeterministic)
703 } else {
704 Ok(())
705 }
706 }
707
708 /// Decode an indefinite-length container of the given major type.
709 /// The break code that terminates the container is consumed.
710 fn process_indefinite<'a, R>(
711 &self,
712 major: Major,
713 reader: &mut R,
714 recursion_limit: u16,
715 oom_mitigation: usize,
716 ) -> std::result::Result<Value<'a>, R::Error>
717 where
718 R: MyReader<'a>,
719 R::Error: From<Error>,
720 {
721 match major {
722 Major::ByteString => self.read_indefinite_bytes(reader, oom_mitigation),
723 Major::TextString => self.read_indefinite_text(reader, oom_mitigation),
724 Major::Array => self.read_indefinite_array(reader, recursion_limit, oom_mitigation),
725 Major::Map => self.read_indefinite_map(reader, recursion_limit, oom_mitigation),
726 _ => unreachable!("process_indefinite: invalid major"),
727 }
728 }
729
730 /// Read a `(_ chunk*)` byte string. Each chunk is itself a
731 /// definite-length byte string; an indefinite-length chunk or a
732 /// chunk of a different major type is malformed even in lenient
733 /// mode.
734 fn read_indefinite_bytes<'a, R>(
735 &self,
736 reader: &mut R,
737 oom_mitigation: usize,
738 ) -> std::result::Result<Value<'a>, R::Error>
739 where
740 R: MyReader<'a>,
741 R::Error: From<Error>,
742 {
743 let mut buf = Vec::new();
744 let mut total: u64 = 0;
745
746 loop {
747 match HeadOrStop::read_from(reader)? {
748 HeadOrStop::Break => break,
749
750 HeadOrStop::Definite(head) if head.initial_byte.major() == Major::ByteString => {
751 if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
752 return Err(Error::NonDeterministic.into());
753 }
754
755 let chunk_len = head.value();
756
757 total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
758 if total > self.length_limit {
759 return Err(Error::LengthTooLarge.into());
760 }
761
762 let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
763 buf.extend_from_slice(&chunk);
764 }
765
766 _ => return Err(Error::Malformed.into()),
767 }
768 }
769
770 Ok(Value::ByteString(Cow::Owned(buf)))
771 }
772
773 /// Read a `(_ chunk*)` text string. Each chunk is independently
774 /// validated as UTF-8 (per RFC 8949 ยง3.2.2).
775 fn read_indefinite_text<'a, R>(
776 &self,
777 reader: &mut R,
778 oom_mitigation: usize,
779 ) -> std::result::Result<Value<'a>, R::Error>
780 where
781 R: MyReader<'a>,
782 R::Error: From<Error>,
783 {
784 let mut buf = String::new();
785 let mut total: u64 = 0;
786
787 loop {
788 match HeadOrStop::read_from(reader)? {
789 HeadOrStop::Break => break,
790
791 HeadOrStop::Definite(head) if head.initial_byte.major() == Major::TextString => {
792 if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
793 return Err(Error::NonDeterministic.into());
794 }
795
796 let chunk_len = head.value();
797
798 total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
799 if total > self.length_limit {
800 return Err(Error::LengthTooLarge.into());
801 }
802
803 let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
804 buf.push_str(std::str::from_utf8(&chunk).map_err(Error::from)?);
805 }
806
807 _ => return Err(Error::Malformed.into()),
808 }
809 }
810
811 Ok(Value::TextString(Cow::Owned(buf)))
812 }
813
814 fn read_indefinite_array<'a, R>(
815 &self,
816 reader: &mut R,
817 recursion_limit: u16,
818 oom_mitigation: usize,
819 ) -> std::result::Result<Value<'a>, R::Error>
820 where
821 R: MyReader<'a>,
822 R::Error: From<Error>,
823 {
824 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
825 return Err(Error::NestingTooDeep.into());
826 };
827
828 let mut vec = Vec::new();
829
830 for _ in 0..self.length_limit {
831 match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
832 Some(item) => vec.push(item),
833 None => return Ok(Value::Array(vec)),
834 };
835 }
836
837 match HeadOrStop::read_from(reader)? {
838 HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
839 HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
840 HeadOrStop::Break => Ok(Value::Array(vec)),
841 }
842 }
843
844 fn read_indefinite_map<'a, R>(
845 &self,
846 reader: &mut R,
847 recursion_limit: u16,
848 oom_mitigation: usize,
849 ) -> std::result::Result<Value<'a>, R::Error>
850 where
851 R: MyReader<'a>,
852 R::Error: From<Error>,
853 {
854 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
855 return Err(Error::NestingTooDeep.into());
856 };
857
858 let mut map = BTreeMap::new();
859
860 for _ in 0..self.length_limit {
861 match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
862 Some(key) => {
863 let value = self.do_read(reader, recursion_limit, oom_mitigation)?;
864 self.map_insert(&mut map, key, value)?;
865 }
866 None => return Ok(Value::Map(map)),
867 };
868 }
869
870 match HeadOrStop::read_from(reader)? {
871 HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
872 HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
873 HeadOrStop::Break => Ok(Value::Map(map)),
874 }
875 }
876}
877
878/// Normalize a non-canonical big integer payload.
879///
880/// Strips leading zero bytes and downcasts to
881/// [`Value::Unsigned`] / [`Value::Negative`] when the magnitude fits
882/// in a `u64`. Otherwise returns a tag 2 / tag 3 with a stripped
883/// payload, preserving the [`Cow`] borrow when the input was borrowed.
884fn normalize_bigint<'a>(tag_number: u64, bytes: Cow<'a, [u8]>) -> Value<'a> {
885 fn integer<'b>(tag_number: u64, n: u64) -> Value<'b> {
886 match tag_number {
887 POS_BIG_INT => Value::Unsigned(n),
888 NEG_BIG_INT => Value::Negative(n),
889 _other => unreachable!("normalize_bigint: invalid tag"),
890 }
891 }
892
893 match bytes {
894 Cow::Borrowed(bytes) => {
895 let trimmed = trim_leading_zeros(bytes);
896
897 if let Ok(n) = u64_from_slice(trimmed) {
898 integer(tag_number, n)
899 } else {
900 let bytes = trimmed.into();
901 Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
902 }
903 }
904 Cow::Owned(bytes) => {
905 let trimmed = trim_leading_zeros(&bytes);
906
907 if let Ok(n) = u64_from_slice(trimmed) {
908 integer(tag_number, n)
909 } else {
910 let bytes = if trimmed.len() == bytes.len() {
911 bytes.into()
912 } else {
913 trimmed.to_vec().into()
914 };
915 Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
916 }
917 }
918 }
919}