cbor_core/decode_options.rs
1use std::{borrow::Cow, collections::BTreeMap};
2
3use crate::{
4 Error, Float, Format, IoResult, Result, SequenceDecoder, SequenceReader, SimpleValue, Strictness, Value,
5 codec::{Argument, Head, HeadOrStop, Major},
6 io::{HexReader, HexSliceReader, MyReader, SliceReader},
7 limits,
8 parse::Parser,
9 tag::{NEG_BIG_INT, POS_BIG_INT},
10 util::{trim_leading_zeros, u64_from_slice},
11};
12
13/// Configuration for CBOR decoding.
14///
15/// `DecodeOptions` controls the input format ([`Binary`](Format::Binary),
16/// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic)) and the
17/// limits the decoder enforces against hostile or malformed input.
18/// Construct it with [`DecodeOptions::new`] (or `Default`), adjust
19/// settings with the builder methods, and call [`decode`](Self::decode)
20/// or [`read_from`](Self::read_from) for a single item, or
21/// [`sequence_decoder`](Self::sequence_decoder) / [`sequence_reader`](Self::sequence_reader)
22/// for a CBOR sequence.
23///
24/// The convenience methods on [`Value`] ([`decode`](Value::decode),
25/// [`decode_hex`](Value::decode_hex), [`read_from`](Value::read_from),
26/// [`read_hex_from`](Value::read_hex_from)) all forward to a default
27/// `DecodeOptions`. Use this type directly when you need to decode
28/// diagnostic notation, iterate a sequence, relax a limit for a known
29/// input, or tighten one for untrusted input.
30///
31/// # Options
32///
33/// | Option | Default | Purpose |
34/// |---|---|---|
35/// | [`format`](Self::format) | [`Binary`](Format::Binary) | Input syntax: binary, hex text, or diagnostic notation. |
36/// | [`recursion_limit`](Self::recursion_limit) | 200 | Maximum nesting depth of arrays, maps, and tags. |
37/// | [`length_limit`](Self::length_limit) | 1,000,000,000 | Maximum declared element count of a single array, map, byte string, or text string. |
38/// | [`oom_mitigation`](Self::oom_mitigation) | 100,000,000 | Byte budget for speculative pre-allocation. |
39/// | [`strictness`](Self::strictness) | [`Strictness::STRICT`] | Which non-deterministic encodings the decoder accepts and normalizes. |
40///
41/// ## `recursion_limit`
42///
43/// Each array, map, or tag consumes one unit of recursion budget for
44/// its contents. Exceeding the limit returns [`Error::NestingTooDeep`].
45/// The limit protects against stack overflow on adversarial input and
46/// should be well below the stack a thread has available.
47///
48/// ## `length_limit`
49///
50/// Applies to the length field in the CBOR head of arrays, maps, byte
51/// strings, and text strings. It caps the declared size before any
52/// bytes are read, so a malicious header claiming a petabyte-long
53/// string is rejected immediately with [`Error::LengthTooLarge`]. The
54/// limit does not restrict total input size; a valid document may
55/// contain many items each up to the limit.
56///
57/// ## `oom_mitigation`
58///
59/// CBOR encodes lengths in the head, so a decoder is tempted to
60/// pre-allocate a `Vec` of the declared capacity. On hostile input
61/// that is a trivial amplification attack: a few bytes on the wire
62/// reserve gigabytes of memory. `oom_mitigation` is a byte budget,
63/// shared across the current decode, that caps the total amount of
64/// speculative capacity the decoder may reserve for array backing
65/// storage. Once the budget is exhausted, further arrays start empty
66/// and grow on demand. Decoding still succeeds if the input is
67/// well-formed; only the up-front reservation is bounded.
68///
69/// The budget is consumed, not refilled: a deeply nested structure
70/// with many small arrays can drain it early and decode the tail with
71/// zero pre-allocation. That is the intended behavior.
72///
73/// # Examples
74///
75/// Decode binary CBOR with default limits:
76///
77/// ```
78/// use cbor_core::DecodeOptions;
79///
80/// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
81/// assert_eq!(v.to_u32().unwrap(), 42);
82/// ```
83///
84/// Switch the input format to hex text or diagnostic notation:
85///
86/// ```
87/// use cbor_core::{DecodeOptions, Format};
88///
89/// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
90/// assert_eq!(v.to_u32().unwrap(), 42);
91///
92/// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
93/// assert_eq!(v.to_u32().unwrap(), 42);
94/// ```
95///
96/// Tighten limits for input from an untrusted source:
97///
98/// ```
99/// use cbor_core::DecodeOptions;
100///
101/// let strict = DecodeOptions::new()
102/// .recursion_limit(16)
103/// .length_limit(4096)
104/// .oom_mitigation(64 * 1024);
105///
106/// assert!(strict.decode(&[0x18, 42]).is_ok());
107/// ```
108#[derive(Debug, Clone)]
109pub struct DecodeOptions {
110 pub(crate) format: Format,
111 pub(crate) recursion_limit: u16,
112 pub(crate) length_limit: u64,
113 pub(crate) oom_mitigation: usize,
114 pub(crate) strictness: Strictness,
115}
116
117impl Default for DecodeOptions {
118 fn default() -> Self {
119 Self::new()
120 }
121}
122
123impl DecodeOptions {
124 /// Create a new set of options with the crate defaults.
125 ///
126 /// ```
127 /// use cbor_core::DecodeOptions;
128 ///
129 /// let opts = DecodeOptions::new();
130 /// let v = opts.decode(&[0x18, 42]).unwrap();
131 /// assert_eq!(v.to_u32().unwrap(), 42);
132 /// ```
133 #[must_use]
134 pub const fn new() -> Self {
135 Self {
136 format: Format::Binary,
137 recursion_limit: limits::RECURSION_LIMIT,
138 length_limit: limits::LENGTH_LIMIT,
139 oom_mitigation: limits::OOM_MITIGATION,
140 strictness: Strictness::STRICT,
141 }
142 }
143
144 /// Select the input format: [`Binary`](Format::Binary),
145 /// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic).
146 ///
147 /// Default: [`Format::Binary`].
148 ///
149 /// ```
150 /// use cbor_core::{DecodeOptions, Format};
151 ///
152 /// let hex = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
153 /// let bin = DecodeOptions::new().decode(&[0x18, 0x2a]).unwrap();
154 /// assert_eq!(hex, bin);
155 ///
156 /// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
157 /// assert_eq!(v.to_u32().unwrap(), 42);
158 /// ```
159 #[must_use]
160 pub const fn format(mut self, format: Format) -> Self {
161 self.format = format;
162 self
163 }
164
165 /// Set the maximum nesting depth of arrays, maps, and tags.
166 ///
167 /// Default: 200. Input that exceeds the limit returns
168 /// [`Error::NestingTooDeep`].
169 ///
170 /// ```
171 /// use cbor_core::{DecodeOptions, Error};
172 ///
173 /// // Two nested one-element arrays: 0x81 0x81 0x00
174 /// let err = DecodeOptions::new()
175 /// .recursion_limit(1)
176 /// .decode(&[0x81, 0x81, 0x00])
177 /// .unwrap_err();
178 /// assert_eq!(err, Error::NestingTooDeep);
179 /// ```
180 #[must_use]
181 pub const fn recursion_limit(mut self, limit: u16) -> Self {
182 self.recursion_limit = limit;
183 self
184 }
185
186 /// Set the maximum declared length for byte strings, text strings,
187 /// arrays, and maps.
188 ///
189 /// Default: 1,000,000,000. Checked against the length field in the
190 /// CBOR head before any bytes are consumed; an oversized declaration
191 /// returns [`Error::LengthTooLarge`].
192 ///
193 /// ```
194 /// use cbor_core::{DecodeOptions, Error};
195 ///
196 /// // A five-byte text string: 0x65 'h' 'e' 'l' 'l' 'o'
197 /// let err = DecodeOptions::new()
198 /// .length_limit(4)
199 /// .decode(b"\x65hello")
200 /// .unwrap_err();
201 /// assert_eq!(err, Error::LengthTooLarge);
202 /// ```
203 #[must_use]
204 pub const fn length_limit(mut self, limit: u64) -> Self {
205 self.length_limit = limit;
206 self
207 }
208
209 /// Set the byte budget for speculative pre-allocation of array
210 /// backing storage.
211 ///
212 /// Default: 100,000,000. Lower values trade a small amount of
213 /// decoding throughput for stronger resistance to memory-amplification
214 /// attacks. Valid input decodes regardless; only the up-front
215 /// reservation is bounded.
216 ///
217 /// ```
218 /// use cbor_core::DecodeOptions;
219 ///
220 /// // A two-element array: 0x82 0x01 0x02
221 /// let v = DecodeOptions::new()
222 /// .oom_mitigation(0)
223 /// .decode(&[0x82, 0x01, 0x02])
224 /// .unwrap();
225 /// assert_eq!(v.len(), Some(2));
226 /// ```
227 #[must_use]
228 pub const fn oom_mitigation(mut self, bytes: usize) -> Self {
229 self.oom_mitigation = bytes;
230 self
231 }
232
233 /// Configure which non-deterministic encodings the decoder will
234 /// accept. Default: [`Strictness::STRICT`], which rejects every
235 /// deviation with [`Error::NonDeterministic`].
236 ///
237 /// Pass [`Strictness::LENIENT`] to accept all known deviations, or
238 /// build a custom mix of `allow_*` fields. Tolerated input is
239 /// normalized while decoding, so the resulting [`Value`] is
240 /// canonical and re-encoding it produces CBOR::Core compliant
241 /// bytes.
242 ///
243 /// ```
244 /// use cbor_core::{DecodeOptions, Strictness, Value};
245 ///
246 /// // 255 wrongly encoded with a two byte argument; normalized on read.
247 /// let v = DecodeOptions::new()
248 /// .strictness(Strictness::LENIENT)
249 /// .decode(&[0x19, 0x00, 0xff])
250 /// .unwrap();
251 /// assert_eq!(v, Value::from(255));
252 /// assert_eq!(v.encode(), vec![0x18, 0xff]);
253 /// ```
254 #[must_use]
255 pub const fn strictness(mut self, strictness: Strictness) -> Self {
256 self.strictness = strictness;
257 self
258 }
259
260 /// Decode exactly one CBOR data item from an in-memory buffer.
261 ///
262 /// Takes the input by reference: `&[u8]`, `&[u8; N]`, `&Vec<u8>`,
263 /// `&str`, `&String`, etc. all work via `T: AsRef<[u8]> + ?Sized`.
264 /// In [`Format::Binary`], decoded text and byte strings borrow
265 /// directly from the input slice and the returned [`Value`]
266 /// inherits that lifetime; in [`Format::Hex`] and
267 /// [`Format::Diagnostic`] the result is owned.
268 ///
269 /// The input must contain **exactly one** value: any bytes
270 /// remaining after a successful decode cause
271 /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
272 /// trailing whitespace and comments are accepted, but nothing
273 /// else. Use [`sequence_decoder`](Self::sequence_decoder) when the input is a CBOR
274 /// sequence.
275 ///
276 /// An empty buffer (and, for diagnostic notation, one containing
277 /// only whitespace and comments) returns [`Error::UnexpectedEof`].
278 /// A partial value returns [`Error::UnexpectedEof`] too.
279 ///
280 /// ```
281 /// use cbor_core::{DecodeOptions, Format};
282 ///
283 /// let v = DecodeOptions::new().decode(&[0x18, 42]).unwrap();
284 /// assert_eq!(v.to_u32().unwrap(), 42);
285 ///
286 /// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
287 /// assert_eq!(v.to_u32().unwrap(), 42);
288 ///
289 /// let v = DecodeOptions::new()
290 /// .format(Format::Diagnostic)
291 /// .decode("42 / trailing comment is fine /")
292 /// .unwrap();
293 /// assert_eq!(v.to_u32().unwrap(), 42);
294 /// ```
295 pub fn decode<'a, T>(&self, bytes: &'a T) -> Result<Value<'a>>
296 where
297 T: AsRef<[u8]> + ?Sized,
298 {
299 let bytes = bytes.as_ref();
300 match self.format {
301 Format::Binary => {
302 let mut reader = SliceReader(bytes);
303 let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
304 if !reader.0.is_empty() {
305 return Err(Error::InvalidFormat);
306 }
307 Ok(value)
308 }
309 Format::Hex => {
310 let mut reader = HexSliceReader(bytes);
311 let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
312 if !reader.0.is_empty() {
313 return Err(Error::InvalidFormat);
314 }
315 Ok(value)
316 }
317 Format::Diagnostic => {
318 let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
319 parser.parse_complete()
320 }
321 }
322 }
323
324 /// Decode exactly one CBOR data item into an owned [`Value`].
325 ///
326 /// Takes the input by value: `Vec<u8>`, `&[u8]`, `&str`, and
327 /// anything else that implements `AsRef<[u8]>` all work. Unlike
328 /// [`decode`](Self::decode), the result never borrows from the
329 /// input regardless of format: text and byte strings are always
330 /// copied into owned allocations. The returned value can be held
331 /// as `Value<'static>` and stored or sent across threads without
332 /// any lifetime constraint.
333 ///
334 /// Use this when the input is short-lived (a temporary buffer, a
335 /// `Vec` returned from a function, etc.) and the decoded value
336 /// needs to outlive it. When the input already lives long enough,
337 /// [`decode`](Self::decode) avoids the copies.
338 ///
339 /// The input must contain **exactly one** value: any bytes
340 /// remaining after a successful decode cause
341 /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
342 /// trailing whitespace and comments are accepted, but nothing
343 /// else. Use [`sequence_decoder`](Self::sequence_decoder) when
344 /// the input is a CBOR sequence.
345 ///
346 /// An empty buffer (and, for diagnostic notation, one containing
347 /// only whitespace and comments) returns [`Error::UnexpectedEof`].
348 /// A partial value returns [`Error::UnexpectedEof`] too.
349 ///
350 /// ```
351 /// use cbor_core::{DecodeOptions, Format, Value};
352 ///
353 /// // Decode from a short-lived Vec without worrying about lifetimes.
354 /// let bytes: Vec<u8> = vec![0x18, 42];
355 /// let v: Value<'static> = DecodeOptions::new().decode_owned(bytes).unwrap();
356 /// assert_eq!(v.to_u32().unwrap(), 42);
357 ///
358 /// // Hex and diagnostic formats work the same way.
359 /// let v: Value<'static> = DecodeOptions::new()
360 /// .format(Format::Hex)
361 /// .decode_owned("182a")
362 /// .unwrap();
363 /// assert_eq!(v.to_u32().unwrap(), 42);
364 /// ```
365 pub fn decode_owned<'a>(&self, bytes: impl AsRef<[u8]>) -> Result<Value<'a>> {
366 let mut bytes = bytes.as_ref();
367
368 match self.format {
369 Format::Binary | Format::Hex => {
370 let value = self.read_from(&mut bytes).map_err(|err| match err {
371 crate::IoError::Io(_io_error) => unreachable!(),
372 crate::IoError::Data(error) => error,
373 })?;
374
375 if bytes.is_empty() {
376 Ok(value)
377 } else {
378 Err(Error::InvalidFormat)
379 }
380 }
381
382 Format::Diagnostic => {
383 let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit, self.strictness);
384 parser.parse_complete()
385 }
386 }
387 }
388
389 /// Read a single CBOR data item from a stream.
390 ///
391 /// Designed to be called repeatedly to pull successive elements of
392 /// a CBOR sequence:
393 ///
394 /// * In [`Format::Binary`] and [`Format::Hex`] the reader is
395 /// consumed only up to the end of the item; any bytes after
396 /// remain in the stream.
397 /// * In [`Format::Diagnostic`] trailing whitespace and comments
398 /// are consumed up to either end of stream or a top-level
399 /// separator comma (the comma is also consumed). Anything else
400 /// after the value fails with [`Error::InvalidFormat`].
401 ///
402 /// Bytes are read into an internal buffer, so the result is
403 /// always owned and can be held as `Value<'static>`. For
404 /// zero-copy decoding from a byte slice, use
405 /// [`decode`](Self::decode) instead.
406 ///
407 /// I/O failures are returned as [`IoError::Io`](crate::IoError::Io);
408 /// malformed or oversized input as [`IoError::Data`](crate::IoError::Data).
409 ///
410 /// ```
411 /// use cbor_core::{DecodeOptions, Format};
412 ///
413 /// let mut bytes: &[u8] = &[0x18, 42];
414 /// let v = DecodeOptions::new().read_from(&mut bytes).unwrap();
415 /// assert_eq!(v.to_u32().unwrap(), 42);
416 ///
417 /// let mut hex: &[u8] = b"182a";
418 /// let v = DecodeOptions::new().format(Format::Hex).read_from(&mut hex).unwrap();
419 /// assert_eq!(v.to_u32().unwrap(), 42);
420 ///
421 /// // Diagnostic: repeated read_from pulls successive sequence items.
422 /// let mut diag: &[u8] = b"1, 2, 3";
423 /// let opts = DecodeOptions::new().format(Format::Diagnostic);
424 /// let a = opts.read_from(&mut diag).unwrap();
425 /// let b = opts.read_from(&mut diag).unwrap();
426 /// let c = opts.read_from(&mut diag).unwrap();
427 /// assert_eq!(a.to_u32().unwrap(), 1);
428 /// assert_eq!(b.to_u32().unwrap(), 2);
429 /// assert_eq!(c.to_u32().unwrap(), 3);
430 /// ```
431 pub fn read_from<'a>(&self, reader: impl std::io::Read) -> IoResult<Value<'a>> {
432 match self.format {
433 Format::Binary => {
434 let mut reader = reader;
435 self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
436 }
437 Format::Hex => {
438 let mut reader = HexReader(reader);
439 self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
440 }
441 Format::Diagnostic => {
442 let mut parser = Parser::new(reader, self.recursion_limit, self.strictness);
443 parser.parse_stream_item()
444 }
445 }
446 }
447
448 /// Create an iterator over a CBOR sequence stored in memory.
449 ///
450 /// The returned [`SequenceDecoder`] yields each successive item of the
451 /// sequence as `Result<Value<'a>>`, where `'a` is the lifetime of
452 /// the input slice. In binary format, items borrow text and byte
453 /// strings from the input; in hex and diagnostic format the items
454 /// are owned. The iterator captures a snapshot of these options;
455 /// subsequent changes to `self` do not affect it.
456 ///
457 /// ```
458 /// use cbor_core::{DecodeOptions, Format};
459 ///
460 /// let opts = DecodeOptions::new().format(Format::Diagnostic);
461 ///
462 /// let items: Vec<_> = opts
463 /// .sequence_decoder(b"1, 2, 3,")
464 /// .collect::<Result<_, _>>()
465 /// .unwrap();
466 /// assert_eq!(items.len(), 3);
467 /// ```
468 pub fn sequence_decoder<'a, T>(&self, input: &'a T) -> SequenceDecoder<'a>
469 where
470 T: AsRef<[u8]> + ?Sized,
471 {
472 SequenceDecoder::with_options(self.clone(), input.as_ref())
473 }
474
475 /// Create an iterator over a CBOR sequence read from a stream.
476 ///
477 /// The returned [`SequenceReader`] yields each successive item as
478 /// `IoResult<Value<'static>>`. `None` indicates a clean end
479 /// between items; a truncated item produces `Some(Err(_))`. Items
480 /// are always owned (the bytes are read into an internal
481 /// buffer); for zero-copy iteration use
482 /// [`sequence_decoder`](Self::sequence_decoder) on a byte slice
483 /// instead.
484 ///
485 /// ```
486 /// use cbor_core::DecodeOptions;
487 ///
488 /// // Binary CBOR sequence: three one-byte items 0x01 0x02 0x03.
489 /// let bytes: &[u8] = &[0x01, 0x02, 0x03];
490 /// let items: Vec<_> = DecodeOptions::new()
491 /// .sequence_reader(bytes)
492 /// .collect::<Result<_, _>>()
493 /// .unwrap();
494 /// assert_eq!(items.len(), 3);
495 /// ```
496 pub fn sequence_reader<R: std::io::Read>(&self, reader: R) -> SequenceReader<R> {
497 SequenceReader::with_options(self.clone(), reader)
498 }
499
500 /// Decode exactly one CBOR data item from an arbitrary reader.
501 /// Used by the sequence iterators to share the core decoding logic.
502 pub(crate) fn decode_one<'a, R>(&self, reader: &mut R) -> std::result::Result<Value<'a>, R::Error>
503 where
504 R: MyReader<'a>,
505 R::Error: From<Error>,
506 {
507 self.do_read(reader, self.recursion_limit, self.oom_mitigation)
508 }
509
510 fn do_read<'a, R>(
511 &self,
512 reader: &mut R,
513 recursion_limit: u16,
514 oom_mitigation: usize,
515 ) -> std::result::Result<Value<'a>, R::Error>
516 where
517 R: MyReader<'a>,
518 R::Error: From<Error>,
519 {
520 match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
521 Some(value) => Ok(value),
522 // A break code where a value was expected (top level, array
523 // item position, map key position, tag content) is malformed.
524 None => Err(Error::Malformed.into()),
525 }
526 }
527
528 /// Read the next item, returning `Ok(None)` when a break code stops
529 /// the input. Used by indefinite-length container loops, which need
530 /// to terminate on the break.
531 fn read_value_or_break<'a, R>(
532 &self,
533 reader: &mut R,
534 recursion_limit: u16,
535 oom_mitigation: usize,
536 ) -> std::result::Result<Option<Value<'a>>, R::Error>
537 where
538 R: MyReader<'a>,
539 R::Error: From<Error>,
540 {
541 match HeadOrStop::read_from(reader)? {
542 HeadOrStop::Definite(head) => self
543 .process_head(head, reader, recursion_limit, oom_mitigation)
544 .map(Some),
545
546 HeadOrStop::Indefinite(major) => {
547 if self.strictness.allow_indefinite_length {
548 self.process_indefinite(major, reader, recursion_limit, oom_mitigation)
549 .map(Some)
550 } else {
551 Err(Error::NonDeterministic.into())
552 }
553 }
554
555 HeadOrStop::Break => Ok(None),
556 }
557 }
558
559 fn process_head<'a, R>(
560 &self,
561 head: Head,
562 reader: &mut R,
563 recursion_limit: u16,
564 oom_mitigation: usize,
565 ) -> std::result::Result<Value<'a>, R::Error>
566 where
567 R: MyReader<'a>,
568 R::Error: From<Error>,
569 {
570 let is_float = head.initial_byte.major() == Major::SimpleOrFloat
571 && matches!(head.argument, Argument::U16(_) | Argument::U32(_) | Argument::U64(_));
572
573 if !is_float && !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
574 return Err(Error::NonDeterministic.into());
575 }
576
577 let this = match head.initial_byte.major() {
578 Major::Unsigned => Value::Unsigned(head.value()),
579 Major::Negative => Value::Negative(head.value()),
580
581 Major::ByteString => {
582 let len = head.value();
583 if len > self.length_limit {
584 return Err(Error::LengthTooLarge.into());
585 }
586 Value::ByteString(reader.read_cow(len, oom_mitigation)?)
587 }
588
589 Major::TextString => {
590 let len = head.value();
591 if len > self.length_limit {
592 return Err(Error::LengthTooLarge.into());
593 }
594 let text = match reader.read_cow(len, oom_mitigation)? {
595 Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes).map_err(Error::from)?),
596 Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).map_err(Error::from)?),
597 };
598 Value::TextString(text)
599 }
600
601 Major::Array => {
602 let value = head.value();
603
604 if value > self.length_limit {
605 return Err(Error::LengthTooLarge.into());
606 }
607
608 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
609 return Err(Error::NestingTooDeep.into());
610 };
611
612 let request: usize = value.try_into().or(Err(Error::LengthTooLarge))?;
613 let granted = request.min(oom_mitigation / size_of::<Value>());
614 let oom_mitigation = oom_mitigation - granted * size_of::<Value>();
615
616 let mut vec = Vec::with_capacity(granted);
617
618 for _ in 0..value {
619 vec.push(self.do_read(reader, recursion_limit, oom_mitigation)?);
620 }
621
622 Value::Array(vec)
623 }
624
625 Major::Map => {
626 let value = head.value();
627
628 if value > self.length_limit {
629 return Err(Error::LengthTooLarge.into());
630 }
631
632 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
633 return Err(Error::NestingTooDeep.into());
634 };
635
636 let mut map = BTreeMap::new();
637 for _ in 0..value {
638 let key = self.do_read(reader, recursion_limit, oom_mitigation)?;
639 let val = self.do_read(reader, recursion_limit, oom_mitigation)?;
640 self.map_insert(&mut map, key, val)?;
641 }
642
643 Value::Map(map)
644 }
645
646 Major::Tag => {
647 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
648 return Err(Error::NestingTooDeep.into());
649 };
650
651 let tag_number = head.value();
652 let tag_content = self.do_read(reader, recursion_limit, oom_mitigation)?;
653
654 // Big integer canonicalization (tag 2 / tag 3): the
655 // payload must be a byte string longer than 8 bytes
656 // (otherwise the value fits in u64) with no leading
657 // zero byte.
658 match tag_content {
659 Value::ByteString(bytes) if matches!(tag_number, POS_BIG_INT | NEG_BIG_INT) => {
660 let canonical = bytes.len() > 8 && bytes[0] != 0;
661 if canonical {
662 Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
663 } else if self.strictness.allow_oversized_bigints {
664 normalize_bigint(tag_number, bytes)
665 } else {
666 return Err(Error::NonDeterministic.into());
667 }
668 }
669 other => Value::Tag(tag_number, Box::new(other)),
670 }
671 }
672
673 Major::SimpleOrFloat => match head.argument {
674 Argument::None => Value::SimpleValue(SimpleValue(head.initial_byte.info())),
675 Argument::U8(n) if n >= 32 => Value::SimpleValue(SimpleValue(n)),
676
677 Argument::U16(bits) => Value::Float(Float::from_bits_u16(bits)),
678 Argument::U32(bits) => self.checked_float(Float::from_bits_u32(bits))?,
679 Argument::U64(bits) => self.checked_float(Float::from_bits_u64(bits))?,
680
681 _ => return Err(Error::Malformed.into()),
682 },
683 };
684
685 Ok(this)
686 }
687
688 fn checked_float<'a>(&self, float: Float) -> Result<Value<'a>> {
689 if float.is_deterministic() {
690 Ok(Value::Float(float))
691 } else if self.strictness.allow_non_shortest_floats {
692 Ok(Value::Float(float.shortest()))
693 } else {
694 Err(Error::NonDeterministic)
695 }
696 }
697
698 /// Insert a key/value pair into a map under the active determinism
699 /// policy. Used by both definite and indefinite-length map decoders.
700 fn map_insert<'a>(&self, map: &mut BTreeMap<Value<'a>, Value<'a>>, key: Value<'a>, val: Value<'a>) -> Result<()> {
701 if !self.strictness.allow_unsorted_map_keys
702 && let Some(last) = map.last_entry()
703 && *last.key() >= key
704 {
705 Err(Error::NonDeterministic)
706 } else if map.insert(key, val).is_some() && !self.strictness.allow_duplicate_map_keys {
707 Err(Error::NonDeterministic)
708 } else {
709 Ok(())
710 }
711 }
712
713 /// Decode an indefinite-length container of the given major type.
714 /// The break code that terminates the container is consumed.
715 fn process_indefinite<'a, R>(
716 &self,
717 major: Major,
718 reader: &mut R,
719 recursion_limit: u16,
720 oom_mitigation: usize,
721 ) -> std::result::Result<Value<'a>, R::Error>
722 where
723 R: MyReader<'a>,
724 R::Error: From<Error>,
725 {
726 match major {
727 Major::ByteString => self.read_indefinite_bytes(reader, oom_mitigation),
728 Major::TextString => self.read_indefinite_text(reader, oom_mitigation),
729 Major::Array => self.read_indefinite_array(reader, recursion_limit, oom_mitigation),
730 Major::Map => self.read_indefinite_map(reader, recursion_limit, oom_mitigation),
731 _ => unreachable!("process_indefinite: invalid major"),
732 }
733 }
734
735 /// Read a `(_ chunk*)` byte string. Each chunk is itself a
736 /// definite-length byte string; an indefinite-length chunk or a
737 /// chunk of a different major type is malformed even in lenient
738 /// mode.
739 fn read_indefinite_bytes<'a, R>(
740 &self,
741 reader: &mut R,
742 oom_mitigation: usize,
743 ) -> std::result::Result<Value<'a>, R::Error>
744 where
745 R: MyReader<'a>,
746 R::Error: From<Error>,
747 {
748 let mut buf = Vec::new();
749 let mut total: u64 = 0;
750
751 loop {
752 match HeadOrStop::read_from(reader)? {
753 HeadOrStop::Break => break,
754
755 HeadOrStop::Definite(head) if head.initial_byte.major() == Major::ByteString => {
756 if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
757 return Err(Error::NonDeterministic.into());
758 }
759
760 let chunk_len = head.value();
761
762 total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
763 if total > self.length_limit {
764 return Err(Error::LengthTooLarge.into());
765 }
766
767 let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
768 buf.extend_from_slice(&chunk);
769 }
770
771 _ => return Err(Error::Malformed.into()),
772 }
773 }
774
775 Ok(Value::ByteString(Cow::Owned(buf)))
776 }
777
778 /// Read a `(_ chunk*)` text string. Each chunk is independently
779 /// validated as UTF-8 (per RFC 8949 ยง3.2.2).
780 fn read_indefinite_text<'a, R>(
781 &self,
782 reader: &mut R,
783 oom_mitigation: usize,
784 ) -> std::result::Result<Value<'a>, R::Error>
785 where
786 R: MyReader<'a>,
787 R::Error: From<Error>,
788 {
789 let mut buf = String::new();
790 let mut total: u64 = 0;
791
792 loop {
793 match HeadOrStop::read_from(reader)? {
794 HeadOrStop::Break => break,
795
796 HeadOrStop::Definite(head) if head.initial_byte.major() == Major::TextString => {
797 if !head.argument.is_deterministic() && !self.strictness.allow_non_shortest_integers {
798 return Err(Error::NonDeterministic.into());
799 }
800
801 let chunk_len = head.value();
802
803 total = total.checked_add(chunk_len).ok_or(Error::LengthTooLarge)?;
804 if total > self.length_limit {
805 return Err(Error::LengthTooLarge.into());
806 }
807
808 let chunk = reader.read_cow(chunk_len, oom_mitigation)?;
809 buf.push_str(std::str::from_utf8(&chunk).map_err(Error::from)?);
810 }
811
812 _ => return Err(Error::Malformed.into()),
813 }
814 }
815
816 Ok(Value::TextString(Cow::Owned(buf)))
817 }
818
819 fn read_indefinite_array<'a, R>(
820 &self,
821 reader: &mut R,
822 recursion_limit: u16,
823 oom_mitigation: usize,
824 ) -> std::result::Result<Value<'a>, R::Error>
825 where
826 R: MyReader<'a>,
827 R::Error: From<Error>,
828 {
829 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
830 return Err(Error::NestingTooDeep.into());
831 };
832
833 let mut vec = Vec::new();
834
835 for _ in 0..self.length_limit {
836 match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
837 Some(item) => vec.push(item),
838 None => return Ok(Value::Array(vec)),
839 }
840 }
841
842 match HeadOrStop::read_from(reader)? {
843 HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
844 HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
845 HeadOrStop::Break => Ok(Value::Array(vec)),
846 }
847 }
848
849 fn read_indefinite_map<'a, R>(
850 &self,
851 reader: &mut R,
852 recursion_limit: u16,
853 oom_mitigation: usize,
854 ) -> std::result::Result<Value<'a>, R::Error>
855 where
856 R: MyReader<'a>,
857 R::Error: From<Error>,
858 {
859 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
860 return Err(Error::NestingTooDeep.into());
861 };
862
863 let mut map = BTreeMap::new();
864
865 for _ in 0..self.length_limit {
866 match self.read_value_or_break(reader, recursion_limit, oom_mitigation)? {
867 Some(key) => {
868 let value = self.do_read(reader, recursion_limit, oom_mitigation)?;
869 self.map_insert(&mut map, key, value)?;
870 }
871 None => return Ok(Value::Map(map)),
872 }
873 }
874
875 match HeadOrStop::read_from(reader)? {
876 HeadOrStop::Definite(_) => Err(Error::LengthTooLarge.into()),
877 HeadOrStop::Indefinite(_) => Err(Error::Malformed.into()),
878 HeadOrStop::Break => Ok(Value::Map(map)),
879 }
880 }
881}
882
883/// Normalize a non-canonical big integer payload.
884///
885/// Strips leading zero bytes and downcasts to
886/// [`Value::Unsigned`] / [`Value::Negative`] when the magnitude fits
887/// in a `u64`. Otherwise returns a tag 2 / tag 3 with a stripped
888/// payload, preserving the [`Cow`] borrow when the input was borrowed.
889fn normalize_bigint(tag_number: u64, bytes: Cow<'_, [u8]>) -> Value<'_> {
890 fn integer<'b>(tag_number: u64, n: u64) -> Value<'b> {
891 match tag_number {
892 POS_BIG_INT => Value::Unsigned(n),
893 NEG_BIG_INT => Value::Negative(n),
894 _other => unreachable!("normalize_bigint: invalid tag"),
895 }
896 }
897
898 match bytes {
899 Cow::Borrowed(bytes) => {
900 let trimmed = trim_leading_zeros(bytes);
901
902 if let Ok(n) = u64_from_slice(trimmed) {
903 integer(tag_number, n)
904 } else {
905 let bytes = trimmed.into();
906 Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
907 }
908 }
909 Cow::Owned(bytes) => {
910 let trimmed = trim_leading_zeros(&bytes);
911
912 if let Ok(n) = u64_from_slice(trimmed) {
913 integer(tag_number, n)
914 } else {
915 let bytes = if trimmed.len() == bytes.len() {
916 bytes.into()
917 } else {
918 trimmed.to_vec().into()
919 };
920 Value::Tag(tag_number, Box::new(Value::ByteString(bytes)))
921 }
922 }
923 }
924}