cbor_data/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::{
4    borrow::{Borrow, Cow},
5    collections::BTreeMap,
6    convert::TryFrom,
7    fmt::{Debug, Display, Write},
8    ops::Deref,
9};
10
11mod builder;
12mod canonical;
13mod check;
14pub mod codec;
15pub mod constants;
16mod error;
17mod reader;
18mod validated;
19pub mod value;
20mod visit;
21
22#[cfg(test)]
23mod tests;
24
25pub use builder::{
26    ArrayWriter, CborBuilder, CborOutput, DictWriter, Encoder, KeyBuilder, NoOutput, SingleBuilder,
27    SingleResult, WithOutput, Writer,
28};
29pub use error::{ErrorKind, ParseError, WhileParsing};
30pub use reader::Literal;
31pub use validated::{
32    indexing::{IndexStr, PathElement},
33    item::{ItemKind, ItemKindShort, TaggedItem},
34    iterators::{ArrayIter, BytesIter, DictIter, StringIter},
35    tags::{Tags, TagsShort},
36};
37pub use value::CborValue;
38pub use visit::Visitor;
39
40use canonical::canonicalise;
41use smallvec::SmallVec;
42use validated::indexing::IndexVisitor;
43use visit::visit;
44
45/// Wrapper around a byte slice that encodes a valid CBOR item.
46///
47/// For details on the format see [RFC 8949](https://www.rfc-editor.org/rfc/rfc8949).
48///
49/// When interpreting CBOR messages from the outside (e.g. from the network) it is
50/// advisable to ingest those using the [`CborOwned::canonical`](struct.CborOwned.html#method.canonical) constructor.
51/// In case the message was encoded for example using [`CborBuilder`](./struct.CborBuilder.html)
52/// it is sufficient to use the [`unchecked`](#method.unchecked) constructor.
53///
54/// The Display implementation adheres to the [diagnostic notation](https://datatracker.ietf.org/doc/html/rfc8949#section-8).
55#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
56#[repr(transparent)]
57pub struct Cbor([u8]);
58
59impl From<&Cbor> for SmallVec<[u8; 16]> {
60    fn from(a: &Cbor) -> Self {
61        (&a.0).into()
62    }
63}
64
65impl Debug for Cbor {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        let mut groups = 0;
68        f.write_str("Cbor(")?;
69        if f.alternate() {
70            for chunk in self.0.chunks(4) {
71                let c = if groups & 15 == 0 { '\n' } else { ' ' };
72                f.write_char(c)?;
73                groups += 1;
74                for byte in chunk {
75                    write!(f, "{:02x}", byte)?;
76                }
77            }
78            f.write_char('\n')?;
79        } else {
80            for chunk in self.0.chunks(4) {
81                if groups > 0 {
82                    f.write_char(' ')?;
83                } else {
84                    groups = 1;
85                }
86                for byte in chunk {
87                    write!(f, "{:02x}", byte)?;
88                }
89            }
90        }
91        f.write_char(')')
92    }
93}
94
95impl Display for Cbor {
96    fn fmt(&self, mut f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        visit(&mut f, self.tagged_item())
98    }
99}
100
101impl AsRef<[u8]> for Cbor {
102    fn as_ref(&self) -> &[u8] {
103        &self.0
104    }
105}
106
107impl<'a> TryFrom<&'a [u8]> for &'a Cbor {
108    type Error = error::ParseError;
109
110    fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
111        Cbor::checked(value)
112    }
113}
114
115impl ToOwned for Cbor {
116    type Owned = CborOwned;
117
118    fn to_owned(&self) -> Self::Owned {
119        CborOwned::unchecked(&self.0)
120    }
121}
122
123impl Cbor {
124    /// Unconditionally cast the given byte slice as CBOR item
125    ///
126    /// No checks on the integrity are made, indexing methods may panic if encoded
127    /// lengths are out of bound or when encountering invalid encodings.
128    /// If you want to carefully treat data obtained from unreliable sources, prefer
129    /// [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
130    ///
131    /// The results of [`CborBuilder`](struct.CborBuilder.html) can safely be fed to this method.
132    pub fn unchecked(bytes: &[u8]) -> &Self {
133        unsafe { std::mem::transmute(bytes) }
134    }
135
136    /// Unconditionally convert the given bytes as CBOR item
137    ///
138    /// The borrowed variant is converted using [`unchecked`](#method.unchecked) without
139    /// allocating. The owned variant is converted by either reusing the allocated vector
140    /// or storing the bytes inline (if they fit) and releasing the vector.
141    ///
142    /// No checks on the integrity are made, indexing methods may panic if encoded
143    /// lengths are out of bound or when encountering invalid encodings.
144    /// If you want to carefully treat data obtained from unreliable sources, prefer
145    /// [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
146    pub fn from_cow_unchecked(bytes: Cow<'_, [u8]>) -> Cow<'_, Cbor> {
147        match bytes {
148            Cow::Borrowed(b) => Cow::Borrowed(Cbor::unchecked(b)),
149            Cow::Owned(v) => Cow::Owned(CborOwned::unchecked(v)),
150        }
151    }
152
153    /// Cast the given byte slice as CBOR item if the encoding is valid
154    pub fn checked(bytes: &[u8]) -> Result<&Self, ParseError> {
155        check::validate(bytes, false).map(|(cbor, _rest)| cbor)
156    }
157
158    /// Cast the given byte slice as CBOR item if the encoding is valid, permitting more bytes to follow the item
159    pub fn checked_prefix(bytes: &[u8]) -> Result<(&Self, &[u8]), ParseError> {
160        check::validate(bytes, true)
161    }
162
163    /// Convert the given bytes to a CBOR item if the encoding is valid
164    ///
165    /// The borrowed variant is converted using [`checked`](#method.checked) without
166    /// allocating. The owned variant is converted using [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
167    pub fn from_cow_checked(bytes: Cow<'_, [u8]>) -> Result<Cow<'_, Cbor>, ParseError> {
168        match bytes {
169            Cow::Borrowed(b) => Cbor::checked(b).map(Cow::Borrowed),
170            Cow::Owned(v) => CborOwned::canonical(v).map(Cow::Owned),
171        }
172    }
173
174    /// A view onto the underlying bytes
175    pub fn as_slice(&self) -> &[u8] {
176        &self.0
177    }
178
179    /// Interpret the CBOR item at a higher level
180    ///
181    /// While [`kind`](#method.kind) gives you precise information on how the item is encoded,
182    /// this method interprets the tag-based encoding according to the standard, adding for example
183    /// big integers, decimals, and floats, or turning base64-encoded text strings into binary strings.
184    pub fn decode(&self) -> CborValue<'_> {
185        CborValue::new(self.tagged_item())
186    }
187
188    /// An iterator over the tags present on this item, from outermost to innermost
189    pub fn tags(&self) -> Tags<'_> {
190        reader::tags(self.as_slice()).unwrap().0
191    }
192
193    /// The low-level encoding of this item, without its tags
194    pub fn kind(&self) -> ItemKind<'_> {
195        ItemKind::new(self)
196    }
197
198    /// The low-level encoding of this item with its tags
199    pub fn tagged_item(&self) -> TaggedItem<'_> {
200        TaggedItem::new(self)
201    }
202
203    /// More efficient shortcut for `.decode().is_null()` with error reporting.
204    pub fn try_null(&self) -> Result<(), TypeError> {
205        let item = self.tagged_item();
206        if CborValue::new(item).is_null() {
207            Ok(())
208        } else {
209            Err(TypeError {
210                target: "null",
211                kind: item.kind().into(),
212                tags: item.tags().into(),
213            })
214        }
215    }
216
217    /// More efficient shortcut for `.decode().as_bool()` with error reporting.
218    pub fn try_bool(&self) -> Result<bool, TypeError> {
219        let item = self.tagged_item();
220        CborValue::new(item).as_bool().ok_or(TypeError {
221            target: "boolean",
222            kind: item.kind().into(),
223            tags: item.tags().into(),
224        })
225    }
226
227    /// More efficient shortcut for `.decode().to_number()` with error reporting.
228    pub fn try_number(&self) -> Result<value::Number, TypeError> {
229        let item = self.tagged_item();
230        CborValue::new(item).to_number().ok_or(TypeError {
231            target: "number",
232            kind: item.kind().into(),
233            tags: item.tags().into(),
234        })
235    }
236
237    /// More efficient shortcut for `.decode().as_timestamp()` with error reporting.
238    pub fn try_timestamp(&self) -> Result<value::Timestamp, TypeError> {
239        let item = self.tagged_item();
240        CborValue::new(item).as_timestamp().ok_or(TypeError {
241            target: "timestamp",
242            kind: item.kind().into(),
243            tags: item.tags().into(),
244        })
245    }
246
247    /// More efficient shortcut for `.decode().to_bytes()` with error reporting.
248    pub fn try_bytes(&self) -> Result<Cow<[u8]>, TypeError> {
249        let item = self.tagged_item();
250        CborValue::new(item).to_bytes().ok_or(TypeError {
251            target: "byte string",
252            kind: item.kind().into(),
253            tags: item.tags().into(),
254        })
255    }
256
257    /// More efficient shortcut for `.decode().to_str()` with error reporting.
258    pub fn try_str(&self) -> Result<Cow<str>, TypeError> {
259        let item = self.tagged_item();
260        CborValue::new(item).to_str().ok_or(TypeError {
261            target: "string",
262            kind: item.kind().into(),
263            tags: item.tags().into(),
264        })
265    }
266
267    /// More efficient shortcut for `.decode().to_array()` with error reporting.
268    pub fn try_array(&self) -> Result<Vec<Cow<Cbor>>, TypeError> {
269        let item = self.tagged_item();
270        CborValue::new(item).to_array().ok_or(TypeError {
271            target: "array",
272            kind: item.kind().into(),
273            tags: item.tags().into(),
274        })
275    }
276
277    /// More efficient shortcut for `.decode().to_dict()` with error reporting.
278    pub fn try_dict(&self) -> Result<BTreeMap<Cow<Cbor>, Cow<Cbor>>, TypeError> {
279        let item = self.tagged_item();
280        CborValue::new(item).to_dict().ok_or(TypeError {
281            target: "dictionary",
282            kind: item.kind().into(),
283            tags: item.tags().into(),
284        })
285    }
286
287    /// Extract a value by indexing into arrays and dicts, with path elements yielded by an iterator.
288    ///
289    /// Returns None if an index doesn’t exist or the indexed object is neither an array nor a dict.
290    /// When the object under consideration is an array, the next path element must represent an
291    /// integer number.
292    ///
293    /// Providing an empty iterator will yield the current Cbor item.
294    ///
295    /// Returns a borrowed Cbor unless the traversal entered a TAG_CBOR_ITEM byte string with indefinite
296    /// encoding (in which case the bytes need to be assembled into a Vec before continuing). This cannot
297    /// happen if the item being indexed stems from [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
298    pub fn index<'a, 'b>(
299        &'a self,
300        path: impl IntoIterator<Item = PathElement<'b>>,
301    ) -> Option<Cow<'a, Cbor>> {
302        visit(&mut IndexVisitor::new(path.into_iter()), self.tagged_item()).unwrap_err()
303    }
304
305    /// Extract a value by indexing into arrays and dicts, with path elements yielded by an iterator.
306    ///
307    /// Returns None if an index doesn’t exist or the indexed object is neither an array nor a dict.
308    /// When the object under consideration is an array, the next path element must represent an
309    /// integer number.
310    ///
311    /// Providing an empty iterator will yield the current Cbor item.
312    ///
313    /// # Panics
314    ///
315    /// Panics if this CBOR item contains a TAG_CBOR_ITEM byte string that has been index into by this
316    /// path traversal. Use [`CborOwned::canonical`](struct.CborOwned.html#method.canonical) to ensure
317    /// that this cannot happen.
318    pub fn index_borrowed<'a, 'b>(
319        &'a self,
320        path: impl IntoIterator<Item = PathElement<'b>>,
321    ) -> Option<&'a Cbor> {
322        self.index(path).map(|cow| match cow {
323            Cow::Borrowed(b) => b,
324            Cow::Owned(_) => panic!("indexing required allocation"),
325        })
326    }
327
328    /// Visit the interesting parts of this CBOR item as guided by the given
329    /// [`Visitor`](trait.Visitor.html).
330    ///
331    /// Returns `false` if the visit was not even begun due to invalid or non-canonical CBOR.
332    pub fn visit<'a, 'b, Err, V: Visitor<'a, Err> + 'b>(
333        &'a self,
334        visitor: &'b mut V,
335    ) -> Result<(), Err> {
336        visit(visitor, self.tagged_item())
337    }
338}
339
340#[derive(Debug, PartialEq, Clone, Copy)]
341pub struct TypeError {
342    target: &'static str,
343    kind: ItemKindShort,
344    tags: TagsShort,
345}
346
347impl Display for TypeError {
348    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
349        write!(
350            f,
351            "type error when reading {}: found {} (tags: {:?})",
352            self.target, self.kind, self.tags
353        )
354    }
355}
356impl std::error::Error for TypeError {}
357
358/// Wrapper around a vector of bytes, for parsing as CBOR.
359///
360/// For details on the format see [RFC 8949](https://www.rfc-editor.org/rfc/rfc8949).
361///
362/// When interpreting CBOR messages from the outside (e.g. from the network) it is
363/// advisable to ingest those using the [`canonical`](#method.canonical) constructor.
364/// In case the message was encoded for example using [`CborBuilder`](./struct.CborBuilder.html)
365/// it is sufficient to use the [`trusting`](#method.trusting) constructor.
366///
367/// Canonicalisation rqeuires an intermediary data buffer, which can be supplied (and reused)
368/// by the caller to save on allocations.
369#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
370// 16 bytes is the smallest that makes sense on 64bit platforms (size of a fat pointer)
371pub struct CborOwned(SmallVec<[u8; 16]>);
372
373impl Debug for CborOwned {
374    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
375        Debug::fmt(Borrow::<Cbor>::borrow(self), f)
376    }
377}
378
379impl Display for CborOwned {
380    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
381        Display::fmt(Borrow::<Cbor>::borrow(self), f)
382    }
383}
384
385impl Borrow<Cbor> for CborOwned {
386    fn borrow(&self) -> &Cbor {
387        Cbor::unchecked(&*self.0)
388    }
389}
390
391impl AsRef<Cbor> for CborOwned {
392    fn as_ref(&self) -> &Cbor {
393        Cbor::unchecked(&*self.0)
394    }
395}
396
397impl AsRef<[u8]> for CborOwned {
398    fn as_ref(&self) -> &[u8] {
399        &*self.0
400    }
401}
402
403impl Deref for CborOwned {
404    type Target = Cbor;
405
406    fn deref(&self) -> &Self::Target {
407        self.borrow()
408    }
409}
410
411impl TryFrom<&[u8]> for CborOwned {
412    type Error = error::ParseError;
413
414    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
415        Self::canonical(value)
416    }
417}
418
419impl CborOwned {
420    /// Copy the bytes and wrap for indexing.
421    ///
422    /// No checks on the integrity are made, indexing methods may panic if encoded lengths are out of bound.
423    /// If you want to carefully treat data obtained from unreliable sources, prefer
424    /// [`canonical()`](#method.canonical).
425    pub fn unchecked(bytes: impl Into<SmallVec<[u8; 16]>>) -> Self {
426        Self(bytes.into())
427    }
428
429    /// Copy the bytes while checking for integrity and replacing indefinite (byte) strings with definite ones.
430    ///
431    /// This constructor will go through and decode the whole provided CBOR bytes and write them into a
432    /// vector, thereby
433    ///
434    ///  - writing large arrays and dicts using indefinite size format
435    ///  - writing numbers in their smallest form
436    ///
437    /// For more configuration options like reusing a scratch space or preferring definite size encoding
438    /// see [`CborBuilder`](struct.CborBuilder.html).
439    pub fn canonical(bytes: impl AsRef<[u8]>) -> Result<Self, ParseError> {
440        canonicalise(bytes.as_ref(), CborBuilder::new())
441    }
442
443    /// Hand out the underlying SmallVec as a Vec<u8>
444    ///
445    /// Will only allocate if the item is at most 16 bytes long.
446    pub fn into_vec(self) -> Vec<u8> {
447        self.0.into_vec()
448    }
449}
450
451/// Generate an iterator of [`PathElement`](struct.PathElement.html) from a string
452///
453/// A path element is either
454///
455///  - a string starting with any other character than dot or opening bracket
456///    and delimited by the next dot or opening bracket
457///  - a number enclosed in brackets
458///
459/// `None` is returned in case an opening bracket is not matched with a closing one
460/// or the characters between brackets are not a valid representation of `u64`.
461///
462/// # Examples:
463///
464/// ```rust
465/// use cbor_data::{Cbor, index_str, ItemKind};
466///
467/// let cbor = Cbor::checked(b"eActyx").unwrap();
468///
469/// // dict key `x`, array index 12, dict key `y`
470/// assert_eq!(cbor.index(index_str("x[12].y")), None);
471/// // empty string means the outermost item
472/// assert!(matches!(cbor.index(index_str("")).unwrap().kind(), ItemKind::Str(s) if s == "Actyx"));
473/// ```
474pub fn try_index_str(s: &str) -> Option<IndexStr<'_>> {
475    IndexStr::new(s)
476}
477
478/// Generate an iterator of [`PathElement`](struct.PathElement.html) from a string
479///
480/// # Panics
481///
482/// Panics if the string is not valid, see [`try_index_str`](fn.try_index_str.html) for the
483/// details and a non-panicking version.
484///
485/// # Example
486///
487/// ```rust
488/// use cbor_data::{CborBuilder, index_str, Encoder, value::Number};
489///
490/// let cbor = CborBuilder::new().encode_array(|builder| {
491///     builder.encode_u64(42);
492/// });
493///
494/// let item = cbor.index(index_str("[0]")).unwrap();
495/// assert_eq!(item.decode().to_number().unwrap(), Number::Int(42));
496/// ```
497pub fn index_str(s: &str) -> IndexStr<'_> {
498    try_index_str(s).expect("invalid index string")
499}
500
501struct DebugUsingDisplay<'a, T>(&'a T);
502impl<'a, T: Display> Debug for DebugUsingDisplay<'a, T> {
503    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
504        Display::fmt(self.0, f)
505    }
506}