cbor_core/decode_options.rs
1use std::collections::BTreeMap;
2
3use crate::{
4 DataType, Error, Float, Format, IoResult, Result, SequenceDecoder, SequenceReader, SimpleValue, Value,
5 codec::{Argument, Head, Major},
6 io::{HexReader, HexSliceReader, MyReader, SliceReader},
7 limits,
8 parse::Parser,
9};
10
11/// Configuration for CBOR decoding.
12///
13/// `DecodeOptions` controls the input format ([`Binary`](Format::Binary),
14/// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic)) and the
15/// limits the decoder enforces against hostile or malformed input.
16/// Construct it with [`DecodeOptions::new`] (or `Default`), adjust
17/// settings with the builder methods, and call [`decode`](Self::decode)
18/// or [`read_from`](Self::read_from) for a single item, or
19/// [`sequence_decoder`](Self::sequence_decoder) / [`sequence_reader`](Self::sequence_reader)
20/// for a CBOR sequence.
21///
22/// The convenience methods on [`Value`] ([`decode`](Value::decode),
23/// [`decode_hex`](Value::decode_hex), [`read_from`](Value::read_from),
24/// [`read_hex_from`](Value::read_hex_from)) all forward to a default
25/// `DecodeOptions`. Use this type directly when you need to decode
26/// diagnostic notation, iterate a sequence, relax a limit for a known
27/// input, or tighten one for untrusted input.
28///
29/// # Options
30///
31/// | Option | Default | Purpose |
32/// |---|---|---|
33/// | [`format`](Self::format) | [`Binary`](Format::Binary) | Input syntax: binary, hex text, or diagnostic notation. |
34/// | [`recursion_limit`](Self::recursion_limit) | 200 | Maximum nesting depth of arrays, maps, and tags. |
35/// | [`length_limit`](Self::length_limit) | 1,000,000,000 | Maximum declared element count of a single array, map, byte string, or text string. |
36/// | [`oom_mitigation`](Self::oom_mitigation) | 100,000,000 | Byte budget for speculative pre-allocation. |
37///
38/// ## `recursion_limit`
39///
40/// Each array, map, or tag consumes one unit of recursion budget for
41/// its contents. Exceeding the limit returns [`Error::NestingTooDeep`].
42/// The limit protects against stack overflow on adversarial input and
43/// should be well below the stack a thread has available.
44///
45/// ## `length_limit`
46///
47/// Applies to the length field in the CBOR head of arrays, maps, byte
48/// strings, and text strings. It caps the declared size before any
49/// bytes are read, so a malicious header claiming a petabyte-long
50/// string is rejected immediately with [`Error::LengthTooLarge`]. The
51/// limit does not restrict total input size; a valid document may
52/// contain many items each up to the limit.
53///
54/// ## `oom_mitigation`
55///
56/// CBOR encodes lengths in the head, so a decoder is tempted to
57/// pre-allocate a `Vec` of the declared capacity. On hostile input
58/// that is a trivial amplification attack: a few bytes on the wire
59/// reserve gigabytes of memory. `oom_mitigation` is a byte budget,
60/// shared across the current decode, that caps the total amount of
61/// speculative capacity the decoder may reserve for array backing
62/// storage. Once the budget is exhausted, further arrays start empty
63/// and grow on demand. Decoding still succeeds if the input is
64/// well-formed; only the up-front reservation is bounded.
65///
66/// The budget is consumed, not refilled: a deeply nested structure
67/// with many small arrays can drain it early and decode the tail with
68/// zero pre-allocation. That is the intended behavior.
69///
70/// # Examples
71///
72/// Decode binary CBOR with default limits:
73///
74/// ```
75/// use cbor_core::DecodeOptions;
76///
77/// let v = DecodeOptions::new().decode([0x18, 42]).unwrap();
78/// assert_eq!(v.to_u32().unwrap(), 42);
79/// ```
80///
81/// Switch the input format to hex text or diagnostic notation:
82///
83/// ```
84/// use cbor_core::{DecodeOptions, Format};
85///
86/// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
87/// assert_eq!(v.to_u32().unwrap(), 42);
88///
89/// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
90/// assert_eq!(v.to_u32().unwrap(), 42);
91/// ```
92///
93/// Tighten limits for input from an untrusted source:
94///
95/// ```
96/// use cbor_core::DecodeOptions;
97///
98/// let strict = DecodeOptions::new()
99/// .recursion_limit(16)
100/// .length_limit(4096)
101/// .oom_mitigation(64 * 1024);
102///
103/// assert!(strict.decode([0x18, 42]).is_ok());
104/// ```
105#[derive(Debug, Clone)]
106pub struct DecodeOptions {
107 format: Format,
108 recursion_limit: u16,
109 length_limit: u64,
110 oom_mitigation: usize,
111}
112
113impl Default for DecodeOptions {
114 fn default() -> Self {
115 Self::new()
116 }
117}
118
119impl DecodeOptions {
120 /// Create a new set of options with the crate defaults.
121 ///
122 /// ```
123 /// use cbor_core::DecodeOptions;
124 ///
125 /// let opts = DecodeOptions::new();
126 /// let v = opts.decode([0x18, 42]).unwrap();
127 /// assert_eq!(v.to_u32().unwrap(), 42);
128 /// ```
129 #[must_use]
130 pub const fn new() -> Self {
131 Self {
132 format: Format::Binary,
133 recursion_limit: limits::RECURSION_LIMIT,
134 length_limit: limits::LENGTH_LIMIT,
135 oom_mitigation: limits::OOM_MITIGATION,
136 }
137 }
138
139 /// Select the input format: [`Binary`](Format::Binary),
140 /// [`Hex`](Format::Hex), or [`Diagnostic`](Format::Diagnostic).
141 ///
142 /// Default: [`Format::Binary`].
143 ///
144 /// ```
145 /// use cbor_core::{DecodeOptions, Format};
146 ///
147 /// let hex = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
148 /// let bin = DecodeOptions::new().decode([0x18, 0x2a]).unwrap();
149 /// assert_eq!(hex, bin);
150 ///
151 /// let v = DecodeOptions::new().format(Format::Diagnostic).decode("42").unwrap();
152 /// assert_eq!(v.to_u32().unwrap(), 42);
153 /// ```
154 pub const fn format(mut self, format: Format) -> Self {
155 self.format = format;
156 self
157 }
158
159 /// Set the maximum nesting depth of arrays, maps, and tags.
160 ///
161 /// Default: 200. Input that exceeds the limit returns
162 /// [`Error::NestingTooDeep`].
163 ///
164 /// ```
165 /// use cbor_core::{DecodeOptions, Error};
166 ///
167 /// // Two nested one-element arrays: 0x81 0x81 0x00
168 /// let err = DecodeOptions::new()
169 /// .recursion_limit(1)
170 /// .decode([0x81, 0x81, 0x00])
171 /// .unwrap_err();
172 /// assert_eq!(err, Error::NestingTooDeep);
173 /// ```
174 pub const fn recursion_limit(mut self, limit: u16) -> Self {
175 self.recursion_limit = limit;
176 self
177 }
178
179 /// Set the maximum declared length for byte strings, text strings,
180 /// arrays, and maps.
181 ///
182 /// Default: 1,000,000,000. Checked against the length field in the
183 /// CBOR head before any bytes are consumed; an oversized declaration
184 /// returns [`Error::LengthTooLarge`].
185 ///
186 /// ```
187 /// use cbor_core::{DecodeOptions, Error};
188 ///
189 /// // A five-byte text string: 0x65 'h' 'e' 'l' 'l' 'o'
190 /// let err = DecodeOptions::new()
191 /// .length_limit(4)
192 /// .decode(b"\x65hello")
193 /// .unwrap_err();
194 /// assert_eq!(err, Error::LengthTooLarge);
195 /// ```
196 pub const fn length_limit(mut self, limit: u64) -> Self {
197 self.length_limit = limit;
198 self
199 }
200
201 /// Set the byte budget for speculative pre-allocation of array
202 /// backing storage.
203 ///
204 /// Default: 100,000,000. Lower values trade a small amount of
205 /// decoding throughput for stronger resistance to memory-amplification
206 /// attacks. Valid input decodes regardless; only the up-front
207 /// reservation is bounded.
208 ///
209 /// ```
210 /// use cbor_core::DecodeOptions;
211 ///
212 /// // A two-element array: 0x82 0x01 0x02
213 /// let v = DecodeOptions::new()
214 /// .oom_mitigation(0)
215 /// .decode([0x82, 0x01, 0x02])
216 /// .unwrap();
217 /// assert_eq!(v.len(), Some(2));
218 /// ```
219 pub const fn oom_mitigation(mut self, bytes: usize) -> Self {
220 self.oom_mitigation = bytes;
221 self
222 }
223
224 /// Decode exactly one CBOR data item from an in-memory buffer.
225 ///
226 /// Accepts any `AsRef<[u8]>`: `&[u8]`, `Vec<u8>`, `&str`, `String`,
227 /// and so on. The input must contain **exactly one** value: any
228 /// bytes remaining after a successful decode cause
229 /// [`Error::InvalidFormat`]. In [`Format::Diagnostic`] mode
230 /// trailing whitespace and comments are accepted, but nothing
231 /// else. Use [`sequence_decoder`](Self::sequence_decoder) when the input is a CBOR
232 /// sequence.
233 ///
234 /// An empty buffer (and, for diagnostic notation, one containing
235 /// only whitespace and comments) returns [`Error::UnexpectedEof`].
236 /// A partial value returns [`Error::UnexpectedEof`] too.
237 ///
238 /// ```
239 /// use cbor_core::{DecodeOptions, Format};
240 ///
241 /// let v = DecodeOptions::new().decode([0x18, 42]).unwrap();
242 /// assert_eq!(v.to_u32().unwrap(), 42);
243 ///
244 /// let v = DecodeOptions::new().format(Format::Hex).decode("182a").unwrap();
245 /// assert_eq!(v.to_u32().unwrap(), 42);
246 ///
247 /// let v = DecodeOptions::new()
248 /// .format(Format::Diagnostic)
249 /// .decode("42 / trailing comment is fine /")
250 /// .unwrap();
251 /// assert_eq!(v.to_u32().unwrap(), 42);
252 /// ```
253 pub fn decode(&self, bytes: impl AsRef<[u8]>) -> Result<Value> {
254 let bytes = bytes.as_ref();
255 match self.format {
256 Format::Binary => {
257 let mut reader = SliceReader(bytes);
258 let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
259 if !reader.0.is_empty() {
260 return Err(Error::InvalidFormat);
261 }
262 Ok(value)
263 }
264 Format::Hex => {
265 let mut reader = HexSliceReader(bytes);
266 let value = self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)?;
267 if !reader.0.is_empty() {
268 return Err(Error::InvalidFormat);
269 }
270 Ok(value)
271 }
272 Format::Diagnostic => {
273 let mut parser = Parser::new(SliceReader(bytes), self.recursion_limit);
274 parser.parse_complete()
275 }
276 }
277 }
278
279 /// Read a single CBOR data item from a stream.
280 ///
281 /// Designed to be called repeatedly to pull successive elements of
282 /// a CBOR sequence:
283 ///
284 /// * In [`Format::Binary`] and [`Format::Hex`] the reader is
285 /// consumed only up to the end of the item; any bytes after
286 /// remain in the stream.
287 /// * In [`Format::Diagnostic`] trailing whitespace and comments
288 /// are consumed up to either end of stream or a top-level
289 /// separator comma (the comma is also consumed). Anything else
290 /// after the value fails with [`Error::InvalidFormat`].
291 ///
292 /// I/O failures are returned as [`IoError::Io`](crate::IoError::Io);
293 /// malformed or oversized input as [`IoError::Data`](crate::IoError::Data).
294 ///
295 /// ```
296 /// use cbor_core::{DecodeOptions, Format};
297 ///
298 /// let mut bytes: &[u8] = &[0x18, 42];
299 /// let v = DecodeOptions::new().read_from(&mut bytes).unwrap();
300 /// assert_eq!(v.to_u32().unwrap(), 42);
301 ///
302 /// let mut hex: &[u8] = b"182a";
303 /// let v = DecodeOptions::new().format(Format::Hex).read_from(&mut hex).unwrap();
304 /// assert_eq!(v.to_u32().unwrap(), 42);
305 ///
306 /// // Diagnostic: repeated read_from pulls successive sequence items.
307 /// let mut diag: &[u8] = b"1, 2, 3";
308 /// let opts = DecodeOptions::new().format(Format::Diagnostic);
309 /// let a = opts.read_from(&mut diag).unwrap();
310 /// let b = opts.read_from(&mut diag).unwrap();
311 /// let c = opts.read_from(&mut diag).unwrap();
312 /// assert_eq!(a.to_u32().unwrap(), 1);
313 /// assert_eq!(b.to_u32().unwrap(), 2);
314 /// assert_eq!(c.to_u32().unwrap(), 3);
315 /// ```
316 pub fn read_from(&self, reader: impl std::io::Read) -> IoResult<Value> {
317 match self.format {
318 Format::Binary => {
319 let mut reader = reader;
320 self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
321 }
322 Format::Hex => {
323 let mut reader = HexReader(reader);
324 self.do_read(&mut reader, self.recursion_limit, self.oom_mitigation)
325 }
326 Format::Diagnostic => {
327 let mut parser = Parser::new(reader, self.recursion_limit);
328 parser.parse_stream_item()
329 }
330 }
331 }
332
333 /// Create an iterator over a CBOR sequence stored in memory.
334 ///
335 /// The returned [`SequenceDecoder`] yields each successive item of the
336 /// sequence as `Result<Value>`. The iterator captures a snapshot
337 /// of these options; subsequent changes to `self` do not affect
338 /// it.
339 ///
340 /// ```
341 /// use cbor_core::{DecodeOptions, Format};
342 ///
343 /// let opts = DecodeOptions::new().format(Format::Diagnostic);
344 ///
345 /// let items: Vec<_> = opts
346 /// .sequence_decoder(b"1, 2, 3,")
347 /// .collect::<Result<_, _>>()
348 /// .unwrap();
349 /// assert_eq!(items.len(), 3);
350 /// ```
351 pub fn sequence_decoder<'a, B: AsRef<[u8]> + ?Sized>(&self, input: &'a B) -> SequenceDecoder<'a> {
352 SequenceDecoder::with_options(self.clone(), input.as_ref())
353 }
354
355 /// Create an iterator over a CBOR sequence read from a stream.
356 ///
357 /// The returned [`SequenceReader`] yields each successive item as
358 /// `IoResult<Value>`. `None` indicates a clean end between items;
359 /// a truncated item produces `Some(Err(_))`.
360 ///
361 /// ```
362 /// use cbor_core::DecodeOptions;
363 ///
364 /// // Binary CBOR sequence: three one-byte items 0x01 0x02 0x03.
365 /// let bytes: &[u8] = &[0x01, 0x02, 0x03];
366 /// let items: Vec<_> = DecodeOptions::new()
367 /// .sequence_reader(bytes)
368 /// .collect::<Result<_, _>>()
369 /// .unwrap();
370 /// assert_eq!(items.len(), 3);
371 /// ```
372 pub fn sequence_reader<R: std::io::Read>(&self, reader: R) -> SequenceReader<R> {
373 SequenceReader::with_options(self.clone(), reader)
374 }
375
376 /// Decode exactly one CBOR data item from an arbitrary reader.
377 /// Used by the sequence iterators to share the core decoding logic.
378 pub(crate) fn decode_one<R>(&self, reader: &mut R) -> std::result::Result<Value, R::Error>
379 where
380 R: MyReader,
381 R::Error: From<Error>,
382 {
383 self.do_read(reader, self.recursion_limit, self.oom_mitigation)
384 }
385
386 /// Expose the parser's recursion limit for sequence iterators.
387 pub(crate) fn recursion_limit_value(&self) -> u16 {
388 self.recursion_limit
389 }
390
391 /// Expose the selected format for sequence iterators.
392 pub(crate) fn format_value(&self) -> Format {
393 self.format
394 }
395
396 fn do_read<R>(
397 &self,
398 reader: &mut R,
399 recursion_limit: u16,
400 oom_mitigation: usize,
401 ) -> std::result::Result<Value, R::Error>
402 where
403 R: MyReader,
404 R::Error: From<Error>,
405 {
406 let head = Head::read_from(reader)?;
407
408 let is_float = head.initial_byte.major() == Major::SimpleOrFloat
409 && matches!(head.argument, Argument::U16(_) | Argument::U32(_) | Argument::U64(_));
410
411 if !is_float && !head.argument.is_deterministic() {
412 return Err(Error::NonDeterministic.into());
413 }
414
415 let this = match head.initial_byte.major() {
416 Major::Unsigned => Value::Unsigned(head.value()),
417 Major::Negative => Value::Negative(head.value()),
418
419 Major::ByteString => {
420 let len = head.value();
421 if len > self.length_limit {
422 return Err(Error::LengthTooLarge.into());
423 }
424 Value::ByteString(reader.read_vec(len, oom_mitigation)?)
425 }
426
427 Major::TextString => {
428 let len = head.value();
429 if len > self.length_limit {
430 return Err(Error::LengthTooLarge.into());
431 }
432 let bytes = reader.read_vec(len, oom_mitigation)?;
433 let string = String::from_utf8(bytes).map_err(Error::from)?;
434 Value::TextString(string)
435 }
436
437 Major::Array => {
438 let value = head.value();
439
440 if value > self.length_limit {
441 return Err(Error::LengthTooLarge.into());
442 }
443
444 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
445 return Err(Error::NestingTooDeep.into());
446 };
447
448 let request: usize = value.try_into().or(Err(Error::LengthTooLarge))?;
449 let granted = request.min(oom_mitigation / size_of::<Value>());
450 let oom_mitigation = oom_mitigation - granted * size_of::<Value>();
451
452 let mut vec = Vec::with_capacity(granted);
453
454 for _ in 0..value {
455 vec.push(self.do_read(reader, recursion_limit, oom_mitigation)?);
456 }
457
458 Value::Array(vec)
459 }
460
461 Major::Map => {
462 let value = head.value();
463
464 if value > self.length_limit {
465 return Err(Error::LengthTooLarge.into());
466 }
467
468 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
469 return Err(Error::NestingTooDeep.into());
470 };
471
472 let mut map = BTreeMap::new();
473 let mut prev = None;
474
475 for _ in 0..value {
476 let key = self.do_read(reader, recursion_limit, oom_mitigation)?;
477 let value = self.do_read(reader, recursion_limit, oom_mitigation)?;
478
479 if let Some((prev_key, prev_value)) = prev.take() {
480 if prev_key >= key {
481 return Err(Error::NonDeterministic.into());
482 }
483 map.insert(prev_key, prev_value);
484 }
485
486 prev = Some((key, value));
487 }
488
489 if let Some((key, value)) = prev.take() {
490 map.insert(key, value);
491 }
492
493 Value::Map(map)
494 }
495
496 Major::Tag => {
497 let Some(recursion_limit) = recursion_limit.checked_sub(1) else {
498 return Err(Error::NestingTooDeep.into());
499 };
500
501 let tag_number = head.value();
502 let tag_content = Box::new(self.do_read(reader, recursion_limit, oom_mitigation)?);
503
504 let this = Value::Tag(tag_number, tag_content);
505
506 if this.data_type() == DataType::BigInt {
507 let bytes = this.as_bytes().unwrap();
508 let valid = bytes.len() >= 8 && bytes[0] != 0;
509 if !valid {
510 return Err(Error::NonDeterministic.into());
511 }
512 }
513
514 this
515 }
516
517 Major::SimpleOrFloat => match head.argument {
518 Argument::None => Value::SimpleValue(SimpleValue(head.initial_byte.info())),
519 Argument::U8(n) if n >= 32 => Value::SimpleValue(SimpleValue(n)),
520
521 Argument::U16(bits) => Value::Float(Float::from_bits_u16(bits)),
522 Argument::U32(bits) => Value::Float(Float::from_bits_u32(bits)?),
523 Argument::U64(bits) => Value::Float(Float::from_bits_u64(bits)?),
524
525 _ => return Err(Error::Malformed.into()),
526 },
527 };
528
529 Ok(this)
530 }
531}