dcbor/
cbor.rs

1import_stdlib!();
2
3#[cfg(not(feature = "std"))]
4extern crate alloc;
5
6#[cfg(all(not(feature = "multithreaded"), not(feature = "std")))]
7use alloc::rc::Rc as RefCounted;
8#[cfg(all(feature = "multithreaded", not(feature = "std")))]
9use alloc::sync::Arc as RefCounted;
10#[cfg(all(not(feature = "multithreaded"), feature = "std"))]
11use std::rc::Rc as RefCounted;
12#[cfg(all(feature = "multithreaded", feature = "std"))]
13use std::sync::Arc as RefCounted;
14
15use unicode_normalization::UnicodeNormalization;
16
17use super::string_util::flanked;
18use crate::{
19    ByteString, Map, Simple,
20    decode::decode_cbor,
21    error::Result,
22    tag::Tag,
23    varint::{EncodeVarInt, MajorType},
24};
25
26/// A symbolic representation of CBOR data.
27///
28/// The `CBOR` type is the central type in the dCBOR library, representing any
29/// CBOR data item using a reference-counted wrapper around a [`CBORCase`] enum.
30/// This design allows efficient sharing of CBOR data structures in memory
31/// without excessive copying.
32///
33/// # Features
34///
35/// - **Deterministic encoding**: Guarantees that semantically equivalent data
36///   structures will always be encoded to identical byte sequences
37///
38/// - **Immutability**: `CBOR` is immutable. Operations that appear to "modify"
39///   a `CBOR` value actually create a new value.
40///
41/// - **O(1) Cloning**: `CBOR` uses reference counting (`Rc` or `Arc` when the
42///   `multithreaded` feature is enabled) and structure sharing to enable
43///   efficient O(1) cloning of a `CBOR` or recursively, any `CBOR`s it
44///   contains. Cloning a `CBOR` simply increments the reference count, allowing
45///   multiple owners without duplicating the underlying data.
46///
47/// - **Reference counting**: Enables efficient sharing of CBOR structures using
48///   `Rc` or `Arc` when the `multithreaded` feature is enabled
49///
50/// - **Type safety**: Uses Rust's type system to safely handle different CBOR
51///   data types
52///
53/// - **Conversion traits**: Implements Rust's standard conversion traits for
54///   ergonomic use
55///
56/// # Thread Safety
57///
58/// With the `multithreaded` feature enabled, `CBOR` uses `Arc` for reference
59/// counting, making it thread-safe. Without this feature, it uses `Rc`, which
60/// is more efficient but not thread-safe.
61///
62/// # Example
63///
64/// ```
65/// use dcbor::prelude::*;
66///
67/// // 1. Create and round-trip a homogeneous array
68/// let array = CBOR::from(vec![1, 2, 3]);
69///
70/// // Encode to bytes
71/// let encoded = array.to_cbor_data();
72/// assert_eq!(hex::encode(&encoded), "83010203");
73///
74/// // Decode from bytes
75/// let decoded = CBOR::try_from_data(&encoded).unwrap();
76/// assert_eq!(decoded, array);
77///
78/// // 2. Create and round-trip a heterogeneous array
79/// let mixed_array: Vec<CBOR> =
80///     vec![1.into(), "Hello".into(), vec![1, 2, 3].into()];
81/// let mixed = CBOR::from(mixed_array);
82///
83/// // Encode the heterogeneous array to bytes
84/// let mixed_encoded = mixed.to_cbor_data();
85/// assert_eq!(hex::encode(&mixed_encoded), "83016548656c6c6f83010203");
86///
87/// // Decode from bytes
88/// let mixed_decoded = CBOR::try_from_data(&mixed_encoded).unwrap();
89/// assert_eq!(mixed_decoded, mixed);
90/// // Use diagnostic_flat() for a compact single-line representation
91/// assert_eq!(
92///     mixed_decoded.diagnostic_flat(),
93///     r#"[1, "Hello", [1, 2, 3]]"#
94/// );
95/// ```
96#[derive(Clone, Eq)]
97pub struct CBOR(RefCounted<CBORCase>);
98
99impl CBOR {
100    pub fn as_case(&self) -> &CBORCase { &self.0 }
101
102    pub fn into_case(self) -> CBORCase {
103        match RefCounted::try_unwrap(self.0) {
104            Ok(b) => b,
105            Err(ref_counted) => (*ref_counted).clone(),
106        }
107    }
108}
109
110impl From<CBORCase> for CBOR {
111    fn from(case: CBORCase) -> Self { Self(RefCounted::new(case)) }
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Hash)]
115/// An enum representing all possible CBOR data types.
116///
117/// `CBORCase` is the core enum that represents all possible CBOR data types
118/// according to [RFC 8949](https://www.rfc-editor.org/rfc/rfc8949.html) and the dCBOR specification.
119/// Each variant corresponds to one of the eight major types in CBOR.
120///
121/// This enum is not typically used directly by users of the library. Instead,
122/// it's wrapped by the reference-counted [`CBOR`] type, which provides a more
123/// ergonomic API.
124///
125/// # Major Types
126///
127/// CBOR defines eight major types, numbered 0 through 7:
128///
129/// | Major Type | Name | Description |
130/// |------------|------|-------------|
131/// | 0 | Unsigned integer | A non-negative integer |
132/// | 1 | Negative integer | A negative integer |
133/// | 2 | Byte string | A sequence of bytes |
134/// | 3 | Text string | A UTF-8 string |
135/// | 4 | Array | A sequence of data items |
136/// | 5 | Map | A collection of key-value pairs |
137/// | 6 | Tagged value | A data item with a semantic tag |
138/// | 7 | Simple value | A simple value like true, false, null, or float |
139///
140/// # dCBOR Constraints
141///
142/// According to the dCBOR specification, deterministic encoding adds several
143/// constraints:
144///
145/// - Maps must have lexicographically ordered keys
146/// - Numeric values must use the smallest possible encoding
147/// - Floats with integer values are reduced to integers
148/// - All NaN values are canonicalized to a single representation
149/// - Strings must be in Unicode Normalization Form C (NFC)
150///
151/// # Example
152///
153/// ```
154/// use dcbor::{CBORCase, Simple, prelude::*};
155///
156/// // Create a CBOR value using the CBORCase enum
157/// let case = CBORCase::Array(vec![
158///     CBORCase::Unsigned(1).into(),
159///     CBORCase::Text("hello".to_string()).into(),
160///     CBORCase::Simple(Simple::True).into(),
161/// ]);
162///
163/// // Wrap in the CBOR type for easier handling
164/// let cbor = CBOR::from(case);
165/// assert_eq!(cbor.diagnostic(), "[1, \"hello\", true]");
166/// ```
167pub enum CBORCase {
168    /// Unsigned integer (major type 0).
169    ///
170    /// Represents a non-negative integer from 0 to 2^64-1.
171    Unsigned(u64),
172
173    /// Negative integer (major type 1).
174    ///
175    /// Actual value is -1 - n, allowing representation of negative integers
176    /// from -1 to -2^64.
177    Negative(u64),
178
179    /// Byte string (major type 2).
180    ///
181    /// Represents a sequence of bytes. In dCBOR, byte strings must use
182    /// the most compact representation possible.
183    ByteString(ByteString),
184
185    /// UTF-8 string (major type 3).
186    ///
187    /// Represents a UTF-8 encoded string. In dCBOR, text strings must
188    /// be in Unicode Normalization Form C (NFC).
189    Text(String),
190
191    /// Array (major type 4).
192    ///
193    /// Represents a sequence of CBOR data items. dCBOR does not support
194    /// indefinite-length arrays.
195    Array(Vec<CBOR>),
196
197    /// Map (major type 5).
198    ///
199    /// Represents a collection of key-value pairs. In dCBOR, map keys
200    /// must be in lexicographic order, and duplicate keys are not allowed.
201    Map(Map),
202
203    /// Tagged value (major type 6).
204    ///
205    /// Represents a data item with a semantic tag. The tag provides
206    /// additional information about how to interpret the data.
207    Tagged(Tag, CBOR),
208
209    /// Simple value (major type 7).
210    ///
211    /// Represents simple values like true, false, null, and floating-point
212    /// numbers. In dCBOR, only a limited set of simple values are allowed.
213    Simple(Simple),
214}
215
216/// Methods for decoding CBOR from binary representation and encoding to binary.
217impl CBOR {
218    /// Decodes binary data into CBOR symbolic representation.
219    ///
220    /// This method parses the provided binary data according to the CBOR and
221    /// dCBOR specifications, validating that it follows all deterministic
222    /// encoding rules.
223    ///
224    /// # Arguments
225    ///
226    /// * `data` - The binary data to decode, which can be any type that can be
227    ///   referenced as a byte slice (e.g., `Vec<u8>`, `&[u8]`, etc.)
228    ///
229    /// # Returns
230    ///
231    /// * `Ok(CBOR)` - A CBOR value if decoding was successful
232    /// * `Err` - If the data is not valid CBOR or violates dCBOR encoding rules
233    ///
234    /// # Examples
235    ///
236    /// ```
237    /// use dcbor::prelude::*;
238    ///
239    /// // Decode a CBOR array [1, 2, 3]
240    /// let data = hex_literal::hex!("83010203");
241    /// let cbor = CBOR::try_from_data(&data).unwrap();
242    ///
243    /// // Get the array contents
244    /// let array: Vec<u64> = cbor.try_into().unwrap();
245    /// assert_eq!(array, vec![1, 2, 3]);
246    /// ```
247    ///
248    /// # Errors
249    ///
250    /// This method will return an error if:
251    /// - The data is not valid CBOR
252    /// - The data violates dCBOR encoding rules (e.g., non-canonical integer
253    ///   encoding)
254    /// - The data has content after the end of the CBOR item
255    pub fn try_from_data(data: impl AsRef<[u8]>) -> Result<CBOR> {
256        decode_cbor(data)
257    }
258
259    /// Decodes a hexadecimal string into CBOR symbolic representation.
260    ///
261    /// This is a convenience method that converts a hexadecimal string to
262    /// binary data and then calls [`try_from_data`](Self::try_from_data).
263    ///
264    /// # Arguments
265    ///
266    /// * `hex` - A string containing hexadecimal characters (no spaces or other
267    ///   characters)
268    ///
269    /// # Returns
270    ///
271    /// * `Ok(CBOR)` - A CBOR value if decoding was successful
272    /// * `Err` - If the hex string is invalid or the resulting data is not
273    ///   valid dCBOR
274    ///
275    /// # Examples
276    ///
277    /// ```
278    /// use dcbor::prelude::*;
279    ///
280    /// // Decode a CBOR array [1, 2, 3] from hex
281    /// let cbor = CBOR::try_from_hex("83010203").unwrap();
282    /// assert_eq!(cbor.diagnostic(), "[1, 2, 3]");
283    /// ```
284    ///
285    /// # Panics
286    ///
287    /// This method will panic if the hex string is not well-formed hexadecimal
288    /// (contains non-hex characters or an odd number of digits).
289    pub fn try_from_hex(hex: &str) -> Result<CBOR> {
290        let data = hex::decode(hex).unwrap();
291        Self::try_from_data(data)
292    }
293
294    /// Encodes this CBOR value to binary data following dCBOR encoding rules.
295    ///
296    /// This method converts the CBOR value to a byte vector according to the
297    /// dCBOR specification, ensuring deterministic encoding.
298    ///
299    /// # Returns
300    ///
301    /// A `Vec<u8>` containing the encoded CBOR data.
302    ///
303    /// # Examples
304    ///
305    /// ```
306    /// use dcbor::prelude::*;
307    ///
308    /// // Create a CBOR map
309    /// let mut map = Map::new();
310    /// map.insert(CBOR::from("key"), CBOR::from(123));
311    /// let cbor = CBOR::from(map);
312    ///
313    /// // Encode to binary
314    /// let encoded = cbor.to_cbor_data();
315    /// assert_eq!(hex::encode(&encoded), "a1636b6579187b");
316    ///
317    /// // Round-trip through encoding and decoding
318    /// let decoded = CBOR::try_from_data(&encoded).unwrap();
319    /// assert_eq!(decoded, cbor);
320    /// ```
321    pub fn to_cbor_data(&self) -> Vec<u8> {
322        match self.as_case() {
323            CBORCase::Unsigned(x) => x.encode_varint(MajorType::Unsigned),
324            CBORCase::Negative(x) => x.encode_varint(MajorType::Negative),
325            CBORCase::ByteString(x) => {
326                let mut buf = x.len().encode_varint(MajorType::ByteString);
327                buf.extend(x);
328                buf
329            }
330            CBORCase::Text(x) => {
331                let nfc = x.nfc().collect::<String>();
332                let mut buf = nfc.len().encode_varint(MajorType::Text);
333                buf.extend(nfc.as_bytes());
334                buf
335            }
336            CBORCase::Array(x) => {
337                let mut buf = x.len().encode_varint(MajorType::Array);
338                for item in x {
339                    buf.extend(item.to_cbor_data());
340                }
341                buf
342            }
343            CBORCase::Map(x) => x.cbor_data(),
344            CBORCase::Tagged(tag, item) => {
345                let mut buf = tag.value().encode_varint(MajorType::Tagged);
346                buf.extend(item.to_cbor_data());
347                buf
348            }
349            CBORCase::Simple(x) => x.cbor_data(),
350        }
351    }
352}
353
354impl PartialEq for CBOR {
355    fn eq(&self, other: &Self) -> bool {
356        match (self.as_case(), other.as_case()) {
357            (CBORCase::Unsigned(l0), CBORCase::Unsigned(r0)) => l0 == r0,
358            (CBORCase::Negative(l0), CBORCase::Negative(r0)) => l0 == r0,
359            (CBORCase::ByteString(l0), CBORCase::ByteString(r0)) => l0 == r0,
360            (CBORCase::Text(l0), CBORCase::Text(r0)) => l0 == r0,
361            (CBORCase::Array(l0), CBORCase::Array(r0)) => l0 == r0,
362            (CBORCase::Map(l0), CBORCase::Map(r0)) => l0 == r0,
363            (CBORCase::Tagged(l0, l1), CBORCase::Tagged(r0, r1)) => {
364                l0 == r0 && l1 == r1
365            }
366            (CBORCase::Simple(l0), CBORCase::Simple(r0)) => l0 == r0,
367            _ => false,
368        }
369    }
370}
371
372impl hash::Hash for CBOR {
373    fn hash<H: hash::Hasher>(&self, state: &mut H) {
374        use CBORCase::*;
375        match self.as_case() {
376            Unsigned(x) => {
377                0u8.hash(state);
378                x.hash(state);
379            }
380            Negative(x) => {
381                1u8.hash(state);
382                x.hash(state);
383            }
384            ByteString(x) => {
385                2u8.hash(state);
386                x.hash(state);
387            }
388            Text(x) => {
389                3u8.hash(state);
390                x.hash(state);
391            }
392            Array(x) => {
393                4u8.hash(state);
394                x.hash(state);
395            }
396            Map(x) => {
397                5u8.hash(state);
398                x.hash(state);
399            }
400            Tagged(tag, item) => {
401                6u8.hash(state);
402                tag.hash(state);
403                item.hash(state);
404            }
405            Simple(x) => {
406                7u8.hash(state);
407                x.hash(state);
408            }
409        }
410    }
411}
412
413fn format_string(s: &str) -> String {
414    let mut result = "".to_string();
415    for c in s.chars() {
416        if c == '"' {
417            result.push_str(r#"\""#);
418        } else {
419            result.push(c);
420        }
421    }
422    flanked(&result, r#"""#, r#"""#)
423}
424
425fn format_array(a: &[CBOR]) -> String {
426    let s: Vec<String> = a.iter().map(|x| format!("{}", x)).collect();
427    flanked(&s.join(", "), "[", "]")
428}
429
430fn format_map(m: &Map) -> String {
431    let s: Vec<String> =
432        m.iter().map(|x| format!("{}: {}", x.0, x.1)).collect();
433    flanked(&s.join(", "), "{", "}")
434}
435
436impl fmt::Debug for CBOR {
437    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438        match self.as_case() {
439            CBORCase::Unsigned(x) => {
440                f.debug_tuple("unsigned").field(x).finish()
441            }
442            CBORCase::Negative(x) => f
443                .debug_tuple("negative")
444                .field(&(-1 - (*x as i128)))
445                .finish(),
446            CBORCase::ByteString(x) => {
447                f.write_fmt(format_args!("bytes({})", hex::encode(x)))
448            }
449            CBORCase::Text(x) => f.debug_tuple("text").field(x).finish(),
450            CBORCase::Array(x) => f.debug_tuple("array").field(x).finish(),
451            CBORCase::Map(x) => f.debug_tuple("map").field(x).finish(),
452            CBORCase::Tagged(tag, item) => {
453                f.write_fmt(format_args!("tagged({}, {:?})", tag, item))
454            }
455            CBORCase::Simple(x) => {
456                f.write_fmt(format_args!("simple({})", x.name()))
457            }
458        }
459    }
460}
461
462impl fmt::Display for CBOR {
463    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
464        let s = match self.as_case() {
465            CBORCase::Unsigned(x) => format!("{}", x),
466            CBORCase::Negative(x) => format!("{}", -1 - (*x as i128)),
467            CBORCase::ByteString(x) => format!("h'{}'", hex::encode(x)),
468            CBORCase::Text(x) => format_string(x),
469            CBORCase::Array(x) => format_array(x),
470            CBORCase::Map(x) => format_map(x),
471            CBORCase::Tagged(tag, item) => format!("{}({})", tag, item),
472            CBORCase::Simple(x) => format!("{}", x),
473        };
474        f.write_str(&s)
475    }
476}