cbor_data/lib.rs
1#![doc = include_str!("../README.md")]
2
3use std::{
4 borrow::{Borrow, Cow},
5 collections::BTreeMap,
6 convert::TryFrom,
7 fmt::{Debug, Display, Write},
8 ops::Deref,
9};
10
11mod builder;
12mod canonical;
13mod check;
14pub mod codec;
15pub mod constants;
16mod error;
17mod reader;
18mod validated;
19pub mod value;
20mod visit;
21
22#[cfg(test)]
23mod tests;
24
25pub use builder::{
26 ArrayWriter, CborBuilder, CborOutput, DictWriter, Encoder, KeyBuilder, NoOutput, SingleBuilder,
27 SingleResult, WithOutput, Writer,
28};
29pub use error::{ErrorKind, ParseError, WhileParsing};
30pub use reader::Literal;
31pub use validated::{
32 indexing::{IndexStr, PathElement},
33 item::{ItemKind, ItemKindShort, TaggedItem},
34 iterators::{ArrayIter, BytesIter, DictIter, StringIter},
35 tags::{Tags, TagsShort},
36};
37pub use value::CborValue;
38pub use visit::Visitor;
39
40use canonical::canonicalise;
41use smallvec::SmallVec;
42use validated::indexing::IndexVisitor;
43use visit::visit;
44
45/// Wrapper around a byte slice that encodes a valid CBOR item.
46///
47/// For details on the format see [RFC 8949](https://www.rfc-editor.org/rfc/rfc8949).
48///
49/// When interpreting CBOR messages from the outside (e.g. from the network) it is
50/// advisable to ingest those using the [`CborOwned::canonical`](struct.CborOwned.html#method.canonical) constructor.
51/// In case the message was encoded for example using [`CborBuilder`](./struct.CborBuilder.html)
52/// it is sufficient to use the [`unchecked`](#method.unchecked) constructor.
53///
54/// The Display implementation adheres to the [diagnostic notation](https://datatracker.ietf.org/doc/html/rfc8949#section-8).
55#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
56#[repr(transparent)]
57pub struct Cbor([u8]);
58
59impl From<&Cbor> for SmallVec<[u8; 16]> {
60 fn from(a: &Cbor) -> Self {
61 (&a.0).into()
62 }
63}
64
65impl Debug for Cbor {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 let mut groups = 0;
68 f.write_str("Cbor(")?;
69 if f.alternate() {
70 for chunk in self.0.chunks(4) {
71 let c = if groups & 15 == 0 { '\n' } else { ' ' };
72 f.write_char(c)?;
73 groups += 1;
74 for byte in chunk {
75 write!(f, "{:02x}", byte)?;
76 }
77 }
78 f.write_char('\n')?;
79 } else {
80 for chunk in self.0.chunks(4) {
81 if groups > 0 {
82 f.write_char(' ')?;
83 } else {
84 groups = 1;
85 }
86 for byte in chunk {
87 write!(f, "{:02x}", byte)?;
88 }
89 }
90 }
91 f.write_char(')')
92 }
93}
94
95impl Display for Cbor {
96 fn fmt(&self, mut f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 visit(&mut f, self.tagged_item())
98 }
99}
100
101impl AsRef<[u8]> for Cbor {
102 fn as_ref(&self) -> &[u8] {
103 &self.0
104 }
105}
106
107impl<'a> TryFrom<&'a [u8]> for &'a Cbor {
108 type Error = error::ParseError;
109
110 fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
111 Cbor::checked(value)
112 }
113}
114
115impl ToOwned for Cbor {
116 type Owned = CborOwned;
117
118 fn to_owned(&self) -> Self::Owned {
119 CborOwned::unchecked(&self.0)
120 }
121}
122
123impl Cbor {
124 /// Unconditionally cast the given byte slice as CBOR item
125 ///
126 /// No checks on the integrity are made, indexing methods may panic if encoded
127 /// lengths are out of bound or when encountering invalid encodings.
128 /// If you want to carefully treat data obtained from unreliable sources, prefer
129 /// [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
130 ///
131 /// The results of [`CborBuilder`](struct.CborBuilder.html) can safely be fed to this method.
132 pub fn unchecked(bytes: &[u8]) -> &Self {
133 unsafe { std::mem::transmute(bytes) }
134 }
135
136 /// Unconditionally convert the given bytes as CBOR item
137 ///
138 /// The borrowed variant is converted using [`unchecked`](#method.unchecked) without
139 /// allocating. The owned variant is converted by either reusing the allocated vector
140 /// or storing the bytes inline (if they fit) and releasing the vector.
141 ///
142 /// No checks on the integrity are made, indexing methods may panic if encoded
143 /// lengths are out of bound or when encountering invalid encodings.
144 /// If you want to carefully treat data obtained from unreliable sources, prefer
145 /// [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
146 pub fn from_cow_unchecked(bytes: Cow<'_, [u8]>) -> Cow<'_, Cbor> {
147 match bytes {
148 Cow::Borrowed(b) => Cow::Borrowed(Cbor::unchecked(b)),
149 Cow::Owned(v) => Cow::Owned(CborOwned::unchecked(v)),
150 }
151 }
152
153 /// Cast the given byte slice as CBOR item if the encoding is valid
154 pub fn checked(bytes: &[u8]) -> Result<&Self, ParseError> {
155 check::validate(bytes, false).map(|(cbor, _rest)| cbor)
156 }
157
158 /// Cast the given byte slice as CBOR item if the encoding is valid, permitting more bytes to follow the item
159 pub fn checked_prefix(bytes: &[u8]) -> Result<(&Self, &[u8]), ParseError> {
160 check::validate(bytes, true)
161 }
162
163 /// Convert the given bytes to a CBOR item if the encoding is valid
164 ///
165 /// The borrowed variant is converted using [`checked`](#method.checked) without
166 /// allocating. The owned variant is converted using [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
167 pub fn from_cow_checked(bytes: Cow<'_, [u8]>) -> Result<Cow<'_, Cbor>, ParseError> {
168 match bytes {
169 Cow::Borrowed(b) => Cbor::checked(b).map(Cow::Borrowed),
170 Cow::Owned(v) => CborOwned::canonical(v).map(Cow::Owned),
171 }
172 }
173
174 /// A view onto the underlying bytes
175 pub fn as_slice(&self) -> &[u8] {
176 &self.0
177 }
178
179 /// Interpret the CBOR item at a higher level
180 ///
181 /// While [`kind`](#method.kind) gives you precise information on how the item is encoded,
182 /// this method interprets the tag-based encoding according to the standard, adding for example
183 /// big integers, decimals, and floats, or turning base64-encoded text strings into binary strings.
184 pub fn decode(&self) -> CborValue<'_> {
185 CborValue::new(self.tagged_item())
186 }
187
188 /// An iterator over the tags present on this item, from outermost to innermost
189 pub fn tags(&self) -> Tags<'_> {
190 reader::tags(self.as_slice()).unwrap().0
191 }
192
193 /// The low-level encoding of this item, without its tags
194 pub fn kind(&self) -> ItemKind<'_> {
195 ItemKind::new(self)
196 }
197
198 /// The low-level encoding of this item with its tags
199 pub fn tagged_item(&self) -> TaggedItem<'_> {
200 TaggedItem::new(self)
201 }
202
203 /// More efficient shortcut for `.decode().is_null()` with error reporting.
204 pub fn try_null(&self) -> Result<(), TypeError> {
205 let item = self.tagged_item();
206 if CborValue::new(item).is_null() {
207 Ok(())
208 } else {
209 Err(TypeError {
210 target: "null",
211 kind: item.kind().into(),
212 tags: item.tags().into(),
213 })
214 }
215 }
216
217 /// More efficient shortcut for `.decode().as_bool()` with error reporting.
218 pub fn try_bool(&self) -> Result<bool, TypeError> {
219 let item = self.tagged_item();
220 CborValue::new(item).as_bool().ok_or(TypeError {
221 target: "boolean",
222 kind: item.kind().into(),
223 tags: item.tags().into(),
224 })
225 }
226
227 /// More efficient shortcut for `.decode().to_number()` with error reporting.
228 pub fn try_number(&self) -> Result<value::Number, TypeError> {
229 let item = self.tagged_item();
230 CborValue::new(item).to_number().ok_or(TypeError {
231 target: "number",
232 kind: item.kind().into(),
233 tags: item.tags().into(),
234 })
235 }
236
237 /// More efficient shortcut for `.decode().as_timestamp()` with error reporting.
238 pub fn try_timestamp(&self) -> Result<value::Timestamp, TypeError> {
239 let item = self.tagged_item();
240 CborValue::new(item).as_timestamp().ok_or(TypeError {
241 target: "timestamp",
242 kind: item.kind().into(),
243 tags: item.tags().into(),
244 })
245 }
246
247 /// More efficient shortcut for `.decode().to_bytes()` with error reporting.
248 pub fn try_bytes(&self) -> Result<Cow<[u8]>, TypeError> {
249 let item = self.tagged_item();
250 CborValue::new(item).to_bytes().ok_or(TypeError {
251 target: "byte string",
252 kind: item.kind().into(),
253 tags: item.tags().into(),
254 })
255 }
256
257 /// More efficient shortcut for `.decode().to_str()` with error reporting.
258 pub fn try_str(&self) -> Result<Cow<str>, TypeError> {
259 let item = self.tagged_item();
260 CborValue::new(item).to_str().ok_or(TypeError {
261 target: "string",
262 kind: item.kind().into(),
263 tags: item.tags().into(),
264 })
265 }
266
267 /// More efficient shortcut for `.decode().to_array()` with error reporting.
268 pub fn try_array(&self) -> Result<Vec<Cow<Cbor>>, TypeError> {
269 let item = self.tagged_item();
270 CborValue::new(item).to_array().ok_or(TypeError {
271 target: "array",
272 kind: item.kind().into(),
273 tags: item.tags().into(),
274 })
275 }
276
277 /// More efficient shortcut for `.decode().to_dict()` with error reporting.
278 pub fn try_dict(&self) -> Result<BTreeMap<Cow<Cbor>, Cow<Cbor>>, TypeError> {
279 let item = self.tagged_item();
280 CborValue::new(item).to_dict().ok_or(TypeError {
281 target: "dictionary",
282 kind: item.kind().into(),
283 tags: item.tags().into(),
284 })
285 }
286
287 /// Extract a value by indexing into arrays and dicts, with path elements yielded by an iterator.
288 ///
289 /// Returns None if an index doesn’t exist or the indexed object is neither an array nor a dict.
290 /// When the object under consideration is an array, the next path element must represent an
291 /// integer number.
292 ///
293 /// Providing an empty iterator will yield the current Cbor item.
294 ///
295 /// Returns a borrowed Cbor unless the traversal entered a TAG_CBOR_ITEM byte string with indefinite
296 /// encoding (in which case the bytes need to be assembled into a Vec before continuing). This cannot
297 /// happen if the item being indexed stems from [`CborOwned::canonical`](struct.CborOwned.html#method.canonical).
298 pub fn index<'a, 'b>(
299 &'a self,
300 path: impl IntoIterator<Item = PathElement<'b>>,
301 ) -> Option<Cow<'a, Cbor>> {
302 visit(&mut IndexVisitor::new(path.into_iter()), self.tagged_item()).unwrap_err()
303 }
304
305 /// Extract a value by indexing into arrays and dicts, with path elements yielded by an iterator.
306 ///
307 /// Returns None if an index doesn’t exist or the indexed object is neither an array nor a dict.
308 /// When the object under consideration is an array, the next path element must represent an
309 /// integer number.
310 ///
311 /// Providing an empty iterator will yield the current Cbor item.
312 ///
313 /// # Panics
314 ///
315 /// Panics if this CBOR item contains a TAG_CBOR_ITEM byte string that has been index into by this
316 /// path traversal. Use [`CborOwned::canonical`](struct.CborOwned.html#method.canonical) to ensure
317 /// that this cannot happen.
318 pub fn index_borrowed<'a, 'b>(
319 &'a self,
320 path: impl IntoIterator<Item = PathElement<'b>>,
321 ) -> Option<&'a Cbor> {
322 self.index(path).map(|cow| match cow {
323 Cow::Borrowed(b) => b,
324 Cow::Owned(_) => panic!("indexing required allocation"),
325 })
326 }
327
328 /// Visit the interesting parts of this CBOR item as guided by the given
329 /// [`Visitor`](trait.Visitor.html).
330 ///
331 /// Returns `false` if the visit was not even begun due to invalid or non-canonical CBOR.
332 pub fn visit<'a, 'b, Err, V: Visitor<'a, Err> + 'b>(
333 &'a self,
334 visitor: &'b mut V,
335 ) -> Result<(), Err> {
336 visit(visitor, self.tagged_item())
337 }
338}
339
340#[derive(Debug, PartialEq, Clone, Copy)]
341pub struct TypeError {
342 target: &'static str,
343 kind: ItemKindShort,
344 tags: TagsShort,
345}
346
347impl Display for TypeError {
348 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
349 write!(
350 f,
351 "type error when reading {}: found {} (tags: {:?})",
352 self.target, self.kind, self.tags
353 )
354 }
355}
356impl std::error::Error for TypeError {}
357
358/// Wrapper around a vector of bytes, for parsing as CBOR.
359///
360/// For details on the format see [RFC 8949](https://www.rfc-editor.org/rfc/rfc8949).
361///
362/// When interpreting CBOR messages from the outside (e.g. from the network) it is
363/// advisable to ingest those using the [`canonical`](#method.canonical) constructor.
364/// In case the message was encoded for example using [`CborBuilder`](./struct.CborBuilder.html)
365/// it is sufficient to use the [`trusting`](#method.trusting) constructor.
366///
367/// Canonicalisation rqeuires an intermediary data buffer, which can be supplied (and reused)
368/// by the caller to save on allocations.
369#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
370// 16 bytes is the smallest that makes sense on 64bit platforms (size of a fat pointer)
371pub struct CborOwned(SmallVec<[u8; 16]>);
372
373impl Debug for CborOwned {
374 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
375 Debug::fmt(Borrow::<Cbor>::borrow(self), f)
376 }
377}
378
379impl Display for CborOwned {
380 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
381 Display::fmt(Borrow::<Cbor>::borrow(self), f)
382 }
383}
384
385impl Borrow<Cbor> for CborOwned {
386 fn borrow(&self) -> &Cbor {
387 Cbor::unchecked(&*self.0)
388 }
389}
390
391impl AsRef<Cbor> for CborOwned {
392 fn as_ref(&self) -> &Cbor {
393 Cbor::unchecked(&*self.0)
394 }
395}
396
397impl AsRef<[u8]> for CborOwned {
398 fn as_ref(&self) -> &[u8] {
399 &*self.0
400 }
401}
402
403impl Deref for CborOwned {
404 type Target = Cbor;
405
406 fn deref(&self) -> &Self::Target {
407 self.borrow()
408 }
409}
410
411impl TryFrom<&[u8]> for CborOwned {
412 type Error = error::ParseError;
413
414 fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
415 Self::canonical(value)
416 }
417}
418
419impl CborOwned {
420 /// Copy the bytes and wrap for indexing.
421 ///
422 /// No checks on the integrity are made, indexing methods may panic if encoded lengths are out of bound.
423 /// If you want to carefully treat data obtained from unreliable sources, prefer
424 /// [`canonical()`](#method.canonical).
425 pub fn unchecked(bytes: impl Into<SmallVec<[u8; 16]>>) -> Self {
426 Self(bytes.into())
427 }
428
429 /// Copy the bytes while checking for integrity and replacing indefinite (byte) strings with definite ones.
430 ///
431 /// This constructor will go through and decode the whole provided CBOR bytes and write them into a
432 /// vector, thereby
433 ///
434 /// - writing large arrays and dicts using indefinite size format
435 /// - writing numbers in their smallest form
436 ///
437 /// For more configuration options like reusing a scratch space or preferring definite size encoding
438 /// see [`CborBuilder`](struct.CborBuilder.html).
439 pub fn canonical(bytes: impl AsRef<[u8]>) -> Result<Self, ParseError> {
440 canonicalise(bytes.as_ref(), CborBuilder::new())
441 }
442
443 /// Hand out the underlying SmallVec as a Vec<u8>
444 ///
445 /// Will only allocate if the item is at most 16 bytes long.
446 pub fn into_vec(self) -> Vec<u8> {
447 self.0.into_vec()
448 }
449}
450
451/// Generate an iterator of [`PathElement`](struct.PathElement.html) from a string
452///
453/// A path element is either
454///
455/// - a string starting with any other character than dot or opening bracket
456/// and delimited by the next dot or opening bracket
457/// - a number enclosed in brackets
458///
459/// `None` is returned in case an opening bracket is not matched with a closing one
460/// or the characters between brackets are not a valid representation of `u64`.
461///
462/// # Examples:
463///
464/// ```rust
465/// use cbor_data::{Cbor, index_str, ItemKind};
466///
467/// let cbor = Cbor::checked(b"eActyx").unwrap();
468///
469/// // dict key `x`, array index 12, dict key `y`
470/// assert_eq!(cbor.index(index_str("x[12].y")), None);
471/// // empty string means the outermost item
472/// assert!(matches!(cbor.index(index_str("")).unwrap().kind(), ItemKind::Str(s) if s == "Actyx"));
473/// ```
474pub fn try_index_str(s: &str) -> Option<IndexStr<'_>> {
475 IndexStr::new(s)
476}
477
478/// Generate an iterator of [`PathElement`](struct.PathElement.html) from a string
479///
480/// # Panics
481///
482/// Panics if the string is not valid, see [`try_index_str`](fn.try_index_str.html) for the
483/// details and a non-panicking version.
484///
485/// # Example
486///
487/// ```rust
488/// use cbor_data::{CborBuilder, index_str, Encoder, value::Number};
489///
490/// let cbor = CborBuilder::new().encode_array(|builder| {
491/// builder.encode_u64(42);
492/// });
493///
494/// let item = cbor.index(index_str("[0]")).unwrap();
495/// assert_eq!(item.decode().to_number().unwrap(), Number::Int(42));
496/// ```
497pub fn index_str(s: &str) -> IndexStr<'_> {
498 try_index_str(s).expect("invalid index string")
499}
500
501struct DebugUsingDisplay<'a, T>(&'a T);
502impl<'a, T: Display> Debug for DebugUsingDisplay<'a, T> {
503 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
504 Display::fmt(self.0, f)
505 }
506}