dcbor/cbor.rs
1import_stdlib!();
2
3#[cfg(not(feature = "std"))]
4extern crate alloc;
5
6#[cfg(all(not(feature = "multithreaded"), not(feature = "std")))]
7use alloc::rc::Rc as RefCounted;
8#[cfg(all(feature = "multithreaded", not(feature = "std")))]
9use alloc::sync::Arc as RefCounted;
10#[cfg(all(not(feature = "multithreaded"), feature = "std"))]
11use std::rc::Rc as RefCounted;
12#[cfg(all(feature = "multithreaded", feature = "std"))]
13use std::sync::Arc as RefCounted;
14
15use unicode_normalization::UnicodeNormalization;
16
17use super::string_util::flanked;
18use crate::{
19 ByteString, Map, Simple,
20 decode::decode_cbor,
21 error::Result,
22 tag::Tag,
23 varint::{EncodeVarInt, MajorType},
24};
25
26/// A symbolic representation of CBOR data.
27///
28/// The `CBOR` type is the central type in the dCBOR library, representing any
29/// CBOR data item using a reference-counted wrapper around a [`CBORCase`] enum.
30/// This design allows efficient sharing of CBOR data structures in memory
31/// without excessive copying.
32///
33/// # Features
34///
35/// - **Deterministic encoding**: Guarantees that semantically equivalent data
36/// structures will always be encoded to identical byte sequences
37///
38/// - **Immutability**: `CBOR` is immutable. Operations that appear to "modify"
39/// a `CBOR` value actually create a new value.
40///
41/// - **O(1) Cloning**: `CBOR` uses reference counting (`Rc` or `Arc` when the
42/// `multithreaded` feature is enabled) and structure sharing to enable
43/// efficient O(1) cloning of a `CBOR` or recursively, any `CBOR`s it
44/// contains. Cloning a `CBOR` simply increments the reference count, allowing
45/// multiple owners without duplicating the underlying data.
46///
47/// - **Reference counting**: Enables efficient sharing of CBOR structures using
48/// `Rc` or `Arc` when the `multithreaded` feature is enabled
49///
50/// - **Type safety**: Uses Rust's type system to safely handle different CBOR
51/// data types
52///
53/// - **Conversion traits**: Implements Rust's standard conversion traits for
54/// ergonomic use
55///
56/// # Thread Safety
57///
58/// With the `multithreaded` feature enabled, `CBOR` uses `Arc` for reference
59/// counting, making it thread-safe. Without this feature, it uses `Rc`, which
60/// is more efficient but not thread-safe.
61///
62/// # Example
63///
64/// ```
65/// use dcbor::prelude::*;
66///
67/// // 1. Create and round-trip a homogeneous array
68/// let array = CBOR::from(vec![1, 2, 3]);
69///
70/// // Encode to bytes
71/// let encoded = array.to_cbor_data();
72/// assert_eq!(hex::encode(&encoded), "83010203");
73///
74/// // Decode from bytes
75/// let decoded = CBOR::try_from_data(&encoded).unwrap();
76/// assert_eq!(decoded, array);
77///
78/// // 2. Create and round-trip a heterogeneous array
79/// let mixed_array: Vec<CBOR> =
80/// vec![1.into(), "Hello".into(), vec![1, 2, 3].into()];
81/// let mixed = CBOR::from(mixed_array);
82///
83/// // Encode the heterogeneous array to bytes
84/// let mixed_encoded = mixed.to_cbor_data();
85/// assert_eq!(hex::encode(&mixed_encoded), "83016548656c6c6f83010203");
86///
87/// // Decode from bytes
88/// let mixed_decoded = CBOR::try_from_data(&mixed_encoded).unwrap();
89/// assert_eq!(mixed_decoded, mixed);
90/// // Use diagnostic_flat() for a compact single-line representation
91/// assert_eq!(
92/// mixed_decoded.diagnostic_flat(),
93/// r#"[1, "Hello", [1, 2, 3]]"#
94/// );
95/// ```
96#[derive(Clone, Eq)]
97pub struct CBOR(RefCounted<CBORCase>);
98
99impl CBOR {
100 pub fn as_case(&self) -> &CBORCase { &self.0 }
101
102 pub fn into_case(self) -> CBORCase {
103 match RefCounted::try_unwrap(self.0) {
104 Ok(b) => b,
105 Err(ref_counted) => (*ref_counted).clone(),
106 }
107 }
108}
109
110impl From<CBORCase> for CBOR {
111 fn from(case: CBORCase) -> Self { Self(RefCounted::new(case)) }
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Hash)]
115/// An enum representing all possible CBOR data types.
116///
117/// `CBORCase` is the core enum that represents all possible CBOR data types
118/// according to [RFC 8949](https://www.rfc-editor.org/rfc/rfc8949.html) and the dCBOR specification.
119/// Each variant corresponds to one of the eight major types in CBOR.
120///
121/// This enum is not typically used directly by users of the library. Instead,
122/// it's wrapped by the reference-counted [`CBOR`] type, which provides a more
123/// ergonomic API.
124///
125/// # Major Types
126///
127/// CBOR defines eight major types, numbered 0 through 7:
128///
129/// | Major Type | Name | Description |
130/// |------------|------|-------------|
131/// | 0 | Unsigned integer | A non-negative integer |
132/// | 1 | Negative integer | A negative integer |
133/// | 2 | Byte string | A sequence of bytes |
134/// | 3 | Text string | A UTF-8 string |
135/// | 4 | Array | A sequence of data items |
136/// | 5 | Map | A collection of key-value pairs |
137/// | 6 | Tagged value | A data item with a semantic tag |
138/// | 7 | Simple value | A simple value like true, false, null, or float |
139///
140/// # dCBOR Constraints
141///
142/// According to the dCBOR specification, deterministic encoding adds several
143/// constraints:
144///
145/// - Maps must have lexicographically ordered keys
146/// - Numeric values must use the smallest possible encoding
147/// - Floats with integer values are reduced to integers
148/// - All NaN values are canonicalized to a single representation
149/// - Strings must be in Unicode Normalization Form C (NFC)
150///
151/// # Example
152///
153/// ```
154/// use dcbor::{CBORCase, Simple, prelude::*};
155///
156/// // Create a CBOR value using the CBORCase enum
157/// let case = CBORCase::Array(vec![
158/// CBORCase::Unsigned(1).into(),
159/// CBORCase::Text("hello".to_string()).into(),
160/// CBORCase::Simple(Simple::True).into(),
161/// ]);
162///
163/// // Wrap in the CBOR type for easier handling
164/// let cbor = CBOR::from(case);
165/// assert_eq!(cbor.diagnostic(), "[1, \"hello\", true]");
166/// ```
167pub enum CBORCase {
168 /// Unsigned integer (major type 0).
169 ///
170 /// Represents a non-negative integer from 0 to 2^64-1.
171 Unsigned(u64),
172
173 /// Negative integer (major type 1).
174 ///
175 /// Actual value is -1 - n, allowing representation of negative integers
176 /// from -1 to -2^64.
177 Negative(u64),
178
179 /// Byte string (major type 2).
180 ///
181 /// Represents a sequence of bytes. In dCBOR, byte strings must use
182 /// the most compact representation possible.
183 ByteString(ByteString),
184
185 /// UTF-8 string (major type 3).
186 ///
187 /// Represents a UTF-8 encoded string. In dCBOR, text strings must
188 /// be in Unicode Normalization Form C (NFC).
189 Text(String),
190
191 /// Array (major type 4).
192 ///
193 /// Represents a sequence of CBOR data items. dCBOR does not support
194 /// indefinite-length arrays.
195 Array(Vec<CBOR>),
196
197 /// Map (major type 5).
198 ///
199 /// Represents a collection of key-value pairs. In dCBOR, map keys
200 /// must be in lexicographic order, and duplicate keys are not allowed.
201 Map(Map),
202
203 /// Tagged value (major type 6).
204 ///
205 /// Represents a data item with a semantic tag. The tag provides
206 /// additional information about how to interpret the data.
207 Tagged(Tag, CBOR),
208
209 /// Simple value (major type 7).
210 ///
211 /// Represents simple values like true, false, null, and floating-point
212 /// numbers. In dCBOR, only a limited set of simple values are allowed.
213 Simple(Simple),
214}
215
216/// Methods for decoding CBOR from binary representation and encoding to binary.
217impl CBOR {
218 /// Decodes binary data into CBOR symbolic representation.
219 ///
220 /// This method parses the provided binary data according to the CBOR and
221 /// dCBOR specifications, validating that it follows all deterministic
222 /// encoding rules.
223 ///
224 /// # Arguments
225 ///
226 /// * `data` - The binary data to decode, which can be any type that can be
227 /// referenced as a byte slice (e.g., `Vec<u8>`, `&[u8]`, etc.)
228 ///
229 /// # Returns
230 ///
231 /// * `Ok(CBOR)` - A CBOR value if decoding was successful
232 /// * `Err` - If the data is not valid CBOR or violates dCBOR encoding rules
233 ///
234 /// # Examples
235 ///
236 /// ```
237 /// use dcbor::prelude::*;
238 ///
239 /// // Decode a CBOR array [1, 2, 3]
240 /// let data = hex_literal::hex!("83010203");
241 /// let cbor = CBOR::try_from_data(&data).unwrap();
242 ///
243 /// // Get the array contents
244 /// let array: Vec<u64> = cbor.try_into().unwrap();
245 /// assert_eq!(array, vec![1, 2, 3]);
246 /// ```
247 ///
248 /// # Errors
249 ///
250 /// This method will return an error if:
251 /// - The data is not valid CBOR
252 /// - The data violates dCBOR encoding rules (e.g., non-canonical integer
253 /// encoding)
254 /// - The data has content after the end of the CBOR item
255 pub fn try_from_data(data: impl AsRef<[u8]>) -> Result<CBOR> {
256 decode_cbor(data)
257 }
258
259 /// Decodes a hexadecimal string into CBOR symbolic representation.
260 ///
261 /// This is a convenience method that converts a hexadecimal string to
262 /// binary data and then calls [`try_from_data`](Self::try_from_data).
263 ///
264 /// # Arguments
265 ///
266 /// * `hex` - A string containing hexadecimal characters (no spaces or other
267 /// characters)
268 ///
269 /// # Returns
270 ///
271 /// * `Ok(CBOR)` - A CBOR value if decoding was successful
272 /// * `Err` - If the hex string is invalid or the resulting data is not
273 /// valid dCBOR
274 ///
275 /// # Examples
276 ///
277 /// ```
278 /// use dcbor::prelude::*;
279 ///
280 /// // Decode a CBOR array [1, 2, 3] from hex
281 /// let cbor = CBOR::try_from_hex("83010203").unwrap();
282 /// assert_eq!(cbor.diagnostic(), "[1, 2, 3]");
283 /// ```
284 ///
285 /// # Panics
286 ///
287 /// This method will panic if the hex string is not well-formed hexadecimal
288 /// (contains non-hex characters or an odd number of digits).
289 pub fn try_from_hex(hex: &str) -> Result<CBOR> {
290 let data = hex::decode(hex).unwrap();
291 Self::try_from_data(data)
292 }
293
294 /// Encodes this CBOR value to binary data following dCBOR encoding rules.
295 ///
296 /// This method converts the CBOR value to a byte vector according to the
297 /// dCBOR specification, ensuring deterministic encoding.
298 ///
299 /// # Returns
300 ///
301 /// A `Vec<u8>` containing the encoded CBOR data.
302 ///
303 /// # Examples
304 ///
305 /// ```
306 /// use dcbor::prelude::*;
307 ///
308 /// // Create a CBOR map
309 /// let mut map = Map::new();
310 /// map.insert(CBOR::from("key"), CBOR::from(123));
311 /// let cbor = CBOR::from(map);
312 ///
313 /// // Encode to binary
314 /// let encoded = cbor.to_cbor_data();
315 /// assert_eq!(hex::encode(&encoded), "a1636b6579187b");
316 ///
317 /// // Round-trip through encoding and decoding
318 /// let decoded = CBOR::try_from_data(&encoded).unwrap();
319 /// assert_eq!(decoded, cbor);
320 /// ```
321 pub fn to_cbor_data(&self) -> Vec<u8> {
322 match self.as_case() {
323 CBORCase::Unsigned(x) => x.encode_varint(MajorType::Unsigned),
324 CBORCase::Negative(x) => x.encode_varint(MajorType::Negative),
325 CBORCase::ByteString(x) => {
326 let mut buf = x.len().encode_varint(MajorType::ByteString);
327 buf.extend(x);
328 buf
329 }
330 CBORCase::Text(x) => {
331 let nfc = x.nfc().collect::<String>();
332 let mut buf = nfc.len().encode_varint(MajorType::Text);
333 buf.extend(nfc.as_bytes());
334 buf
335 }
336 CBORCase::Array(x) => {
337 let mut buf = x.len().encode_varint(MajorType::Array);
338 for item in x {
339 buf.extend(item.to_cbor_data());
340 }
341 buf
342 }
343 CBORCase::Map(x) => x.cbor_data(),
344 CBORCase::Tagged(tag, item) => {
345 let mut buf = tag.value().encode_varint(MajorType::Tagged);
346 buf.extend(item.to_cbor_data());
347 buf
348 }
349 CBORCase::Simple(x) => x.cbor_data(),
350 }
351 }
352}
353
354impl PartialEq for CBOR {
355 fn eq(&self, other: &Self) -> bool {
356 match (self.as_case(), other.as_case()) {
357 (CBORCase::Unsigned(l0), CBORCase::Unsigned(r0)) => l0 == r0,
358 (CBORCase::Negative(l0), CBORCase::Negative(r0)) => l0 == r0,
359 (CBORCase::ByteString(l0), CBORCase::ByteString(r0)) => l0 == r0,
360 (CBORCase::Text(l0), CBORCase::Text(r0)) => l0 == r0,
361 (CBORCase::Array(l0), CBORCase::Array(r0)) => l0 == r0,
362 (CBORCase::Map(l0), CBORCase::Map(r0)) => l0 == r0,
363 (CBORCase::Tagged(l0, l1), CBORCase::Tagged(r0, r1)) => {
364 l0 == r0 && l1 == r1
365 }
366 (CBORCase::Simple(l0), CBORCase::Simple(r0)) => l0 == r0,
367 _ => false,
368 }
369 }
370}
371
372impl hash::Hash for CBOR {
373 fn hash<H: hash::Hasher>(&self, state: &mut H) {
374 use CBORCase::*;
375 match self.as_case() {
376 Unsigned(x) => {
377 0u8.hash(state);
378 x.hash(state);
379 }
380 Negative(x) => {
381 1u8.hash(state);
382 x.hash(state);
383 }
384 ByteString(x) => {
385 2u8.hash(state);
386 x.hash(state);
387 }
388 Text(x) => {
389 3u8.hash(state);
390 x.hash(state);
391 }
392 Array(x) => {
393 4u8.hash(state);
394 x.hash(state);
395 }
396 Map(x) => {
397 5u8.hash(state);
398 x.hash(state);
399 }
400 Tagged(tag, item) => {
401 6u8.hash(state);
402 tag.hash(state);
403 item.hash(state);
404 }
405 Simple(x) => {
406 7u8.hash(state);
407 x.hash(state);
408 }
409 }
410 }
411}
412
413fn format_string(s: &str) -> String {
414 let mut result = "".to_string();
415 for c in s.chars() {
416 if c == '"' {
417 result.push_str(r#"\""#);
418 } else {
419 result.push(c);
420 }
421 }
422 flanked(&result, r#"""#, r#"""#)
423}
424
425fn format_array(a: &[CBOR]) -> String {
426 let s: Vec<String> = a.iter().map(|x| format!("{}", x)).collect();
427 flanked(&s.join(", "), "[", "]")
428}
429
430fn format_map(m: &Map) -> String {
431 let s: Vec<String> =
432 m.iter().map(|x| format!("{}: {}", x.0, x.1)).collect();
433 flanked(&s.join(", "), "{", "}")
434}
435
436impl fmt::Debug for CBOR {
437 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438 match self.as_case() {
439 CBORCase::Unsigned(x) => {
440 f.debug_tuple("unsigned").field(x).finish()
441 }
442 CBORCase::Negative(x) => f
443 .debug_tuple("negative")
444 .field(&(-1 - (*x as i128)))
445 .finish(),
446 CBORCase::ByteString(x) => {
447 f.write_fmt(format_args!("bytes({})", hex::encode(x)))
448 }
449 CBORCase::Text(x) => f.debug_tuple("text").field(x).finish(),
450 CBORCase::Array(x) => f.debug_tuple("array").field(x).finish(),
451 CBORCase::Map(x) => f.debug_tuple("map").field(x).finish(),
452 CBORCase::Tagged(tag, item) => {
453 f.write_fmt(format_args!("tagged({}, {:?})", tag, item))
454 }
455 CBORCase::Simple(x) => {
456 f.write_fmt(format_args!("simple({})", x.name()))
457 }
458 }
459 }
460}
461
462impl fmt::Display for CBOR {
463 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
464 let s = match self.as_case() {
465 CBORCase::Unsigned(x) => format!("{}", x),
466 CBORCase::Negative(x) => format!("{}", -1 - (*x as i128)),
467 CBORCase::ByteString(x) => format!("h'{}'", hex::encode(x)),
468 CBORCase::Text(x) => format_string(x),
469 CBORCase::Array(x) => format_array(x),
470 CBORCase::Map(x) => format_map(x),
471 CBORCase::Tagged(tag, item) => format!("{}({})", tag, item),
472 CBORCase::Simple(x) => format!("{}", x),
473 };
474 f.write_str(&s)
475 }
476}