cbor_core/lib.rs
1#![forbid(unsafe_code)]
2#![deny(rustdoc::broken_intra_doc_links)]
3#![deny(rustdoc::private_intra_doc_links)]
4#![cfg_attr(docsrs, feature(doc_cfg))]
5
6//! Deterministic CBOR encoder and decoder following the
7//! [CBOR::Core](https://www.ietf.org/archive/id/draft-rundgren-cbor-core-25.html)
8//! profile (`draft-rundgren-cbor-core-25`).
9//!
10//! The central type is [`Value`]. It can be constructed, inspected,
11//! modified in place, encoded to bytes, and decoded back. The API
12//! follows CBOR's own shape, so tagged values, simple values, and
13//! arbitrary map keys stay directly reachable.
14//! [`Value`] carries a lifetime parameter so that decoded text and
15//! byte strings can borrow zero-copy from the input slice.
16//!
17//! # Types
18//!
19//! [`Value`] is the central representation of any CBOR data item. It handles
20//! construction, inspection, encoding, and decoding, and is what most code
21//! works with directly.
22//!
23//! * [`Array`], [`Map`], [`Float`], [`DateTime`], [`EpochTime`], and
24//! [`SimpleValue`] appear in `From`/`Into` bounds for `Value` and are
25//! rarely constructed by hand.
26//! * [`DataType`] reports a value's kind for type-based dispatch.
27//! [`ValueKey`] is the key type for maps.
28//! * [`DecodeOptions`] configures the decoder and [`Format`] selects
29//! binary, hex, or diagnostic input. [`SequenceDecoder`] and
30//! [`SequenceReader`] iterate over CBOR sequences; [`SequenceWriter`]
31//! is their encode-side counterpart, configured with [`EncodeFormat`].
32//! * [`Error`] and [`Result`] cover in-memory decoding; [`IoError`] and
33//! [`IoResult`] cover `io::Read` sources.
34//!
35//! # Quick start
36//!
37//! ```
38//! use cbor_core::{Value, array, map};
39//!
40//! // Build a value
41//! let value = map! {
42//! 1 => "hello",
43//! 2 => array![10, 20, 30],
44//! };
45//!
46//! // Encode to bytes and decode back
47//! let bytes = value.encode();
48//! let decoded = Value::decode(&bytes).unwrap();
49//! assert_eq!(value, decoded);
50//!
51//! // Access inner data
52//! let greeting = decoded[1].as_str().unwrap();
53//! assert_eq!(greeting, "hello");
54//!
55//! // Round-trip through diagnostic notation
56//! let text = format!("{value:?}");
57//! let parsed: Value = text.parse().unwrap();
58//! assert_eq!(value, parsed);
59//! ```
60//!
61//! # Diagnostic notation
62//!
63//! [`Value`] implements [`FromStr`](std::str::FromStr), so any CBOR value can
64//! be written as text and parsed with `str::parse`. This is often the
65//! shortest way to build a literal value in a test, a fixture, or an
66//! example, and it avoids manual `Value::from` chains for nested data.
67//!
68//! The grammar is Section 2.3.6 of the CBOR::Core draft. Examples:
69//!
70//! ```
71//! use cbor_core::Value;
72//!
73//! // Integers in any base, with `_` as a digit separator
74//! let v: Value = "0xff_ff_00_00".parse().unwrap();
75//! assert_eq!(v, Value::from(0xff_ff_00_00_u32));
76//!
77//! // Arbitrary precision: parsed as tag 2 / tag 3 big integers
78//! let big: Value = "18446744073709551616".parse().unwrap();
79//! assert_eq!(big, Value::from(u64::MAX as u128 + 1));
80//!
81//! // Floats, including explicit bit patterns for NaN payloads
82//! let f: Value = "1.5e2".parse().unwrap();
83//! assert_eq!(f, Value::from(150.0));
84//! let nan: Value = "float'7f800001'".parse().unwrap();
85//! assert_eq!(nan.encode(), vec![0xfa, 0x7f, 0x80, 0x00, 0x01]);
86//!
87//! // Byte strings: hex, base64, ASCII, or embedded CBOR
88//! assert_eq!("h'48656c6c6f'".parse::<Value>().unwrap(), Value::from(b"Hello".to_vec()));
89//! assert_eq!("b64'SGVsbG8'".parse::<Value>().unwrap(), Value::from(b"Hello".to_vec()));
90//! assert_eq!("'Hello'".parse::<Value>().unwrap(), Value::from(b"Hello".to_vec()));
91//! // << ... >> wraps a CBOR sequence into a byte string
92//! assert_eq!(
93//! "<< 1, 2, 3 >>".parse::<Value>().unwrap(),
94//! Value::from(vec![0x01, 0x02, 0x03]),
95//! );
96//! ```
97//!
98//! Nested structures are written directly, and maps may appear in any
99//! order. The parser sorts keys and rejects duplicates:
100//!
101//! ```
102//! use cbor_core::Value;
103//!
104//! let cert: Value = r#"{
105//! / CWT-style claims, written out of canonical order /
106//! "iss": "https://issuer.example",
107//! "sub": "user-42",
108//! "iat": 1700000000,
109//! "cnf": {
110//! "kty": "OKP",
111//! "crv": "Ed25519",
112//! "x": h'd75a980182b10ab7d54bfed3c964073a0ee172f3daa62325af021a68f707511a'
113//! },
114//! "scope": ["read", "write"]
115//! }"#.parse().unwrap();
116//!
117//! assert_eq!(cert["sub"].as_str().unwrap(), "user-42");
118//! assert_eq!(cert["cnf"]["crv"].as_str().unwrap(), "Ed25519");
119//! ```
120//!
121//! Supported grammar elements: integers (decimal, `0x`, `0o`, `0b`, with
122//! `_` separators), arbitrary-precision integers, floats (decimal,
123//! scientific, `NaN`, `Infinity`, `float'<hex>'`), text strings with
124//! JSON-style escapes and surrogate pairs, byte strings (`h'...'`,
125//! `b64'...'`, `'...'`, `<<...>>`), arrays, maps, tagged values `N(...)`,
126//! `simple(N)`, `true`, `false`, `null`, single-line `# ...` comments, and
127//! block `/ ... /` comments.
128//!
129//! The parser accepts non-canonical input (for example unsorted maps and
130//! non-shortest numbers), normalizes it, and produces a canonical [`Value`].
131//! Round-tripping `format!("{v:?}").parse::<Value>()` always yields the
132//! original value.
133//!
134//! # Borrowing and ownership
135//!
136//! [`Value`] carries a lifetime parameter so that text and byte
137//! strings can either own their storage or borrow it. The variants
138//! that hold strings are
139//! [`TextString(Cow<'a, str>)`](Value::TextString) and
140//! [`ByteString(Cow<'a, [u8]>)`](Value::ByteString).
141//!
142//! Decoding binary CBOR from a byte slice is zero-copy: each text
143//! and byte string in the result is a `Cow::Borrowed` pointing into
144//! the input slice. The returned value's lifetime is the slice's
145//! lifetime:
146//!
147//! ```
148//! use cbor_core::Value;
149//!
150//! let bytes: &[u8] = b"\x65hello"; // text string "hello"
151//! let v = Value::decode(bytes).unwrap();
152//! assert_eq!(v.as_str().unwrap(), "hello");
153//! // `v` borrows from `bytes`; dropping `bytes` would be a borrow error.
154//! ```
155//!
156//! Hex decoding ([`Value::decode_hex`]) and stream decoding from any
157//! [`io::Read`](std::io::Read) source ([`Value::read_from`],
158//! [`SequenceReader`]) cannot borrow: hex pairs have to be decoded
159//! into bytes, and a stream is read into an internal buffer. Those
160//! paths always produce an owned `Value<'static>`.
161//!
162//! Values built in code follow the same split: passing an owned
163//! `String`, `Vec<u8>`, integer, float, etc. produces an owned
164//! `Value<'static>`, while passing a reference (`&str`, `&[u8]`,
165//! `&[u8; N]`) produces a `Value<'a>` borrowing from that reference.
166//! A `&'static str` literal naturally yields `Value<'static>`. The
167//! [`array!`](crate::array) and [`map!`](crate::map) macros and the
168//! `From`/`TryFrom` conversions follow whatever the element type
169//! does.
170//!
171//! `Value` is covariant in its lifetime, so a `Value<'static>` can
172//! be passed wherever a shorter `Value<'a>` is expected. To store a
173//! decoded or constructed `Value` in a struct field where a lifetime
174//! parameter would be inconvenient, name it `Value<'static>`:
175//!
176//! ```
177//! use cbor_core::Value;
178//!
179//! struct Config {
180//! metadata: Value<'static>,
181//! }
182//!
183//! impl Config {
184//! fn new() -> Self {
185//! // `Value::read_from` and owned-input conversions like
186//! // `Value::from(42)` or `Value::from(String::from("..."))`
187//! // both yield values that can be stored as `Value<'static>`.
188//! Self { metadata: Value::from(42) }
189//! }
190//! }
191//! ```
192//!
193//! When a borrowed value needs to outlive its source slice, detach
194//! it explicitly. Three methods produce a `Value` that borrows
195//! nothing from the original input:
196//!
197//! * [`Value::into_owned`] consumes a [`Value`] and copies any
198//! borrowed strings into owned allocations. Cheapest when you
199//! can give up ownership of the original.
200//! * [`Value::to_owned`] does the same from `&Value`, leaving the
201//! original intact at the cost of cloning all owned data too.
202//! * [`Value::decode_owned`] decodes directly into an owned
203//! [`Value`], skipping the borrowed intermediate. Useful when
204//! the input buffer is local to the decode call.
205//!
206//! All three produce a `Value` that can be assigned to any lifetime,
207//! including `Value<'static>`.
208//!
209//! # Encoding rules
210//!
211//! Encoding is deterministic: integers and floats use their shortest
212//! form, and map keys are sorted in canonical order. The decoder
213//! rejects input that deviates.
214//!
215//! NaN payloads, including signaling NaNs, survive round-trips
216//! bit-for-bit. Float-width conversions go through bit patterns to
217//! avoid hardware canonicalization.
218//!
219//! # Sequences
220//!
221//! A CBOR sequence is zero or more items concatenated
222//! without framing. The read side is configured with [`Format`]; the
223//! encode side uses [`EncodeFormat`], which adds output-only variants
224//! ([`DiagnosticPretty`](EncodeFormat::DiagnosticPretty)) and accepts
225//! any [`Format`] through `impl Into<EncodeFormat>`.
226//!
227//! On the read side, [`DecodeOptions::sequence_decoder`] wraps a byte
228//! slice and yields a [`SequenceDecoder`] with
229//! `Item = Result<Value, Error>`.
230//! [`DecodeOptions::sequence_reader`] wraps any `io::Read` and yields
231//! a [`SequenceReader`] with `Item = Result<Value, IoError>`.
232//!
233//! In binary and hex, items sit back-to-back. In diagnostic notation,
234//! items are comma-separated, with an optional trailing comma.
235//!
236//! On the encode side, [`SequenceWriter::new`] takes an `io::Write`
237//! and an `impl Into<EncodeFormat>`, so a [`Format`] or an
238//! [`EncodeFormat`] can be passed directly. Items are fed in through:
239//!
240//! * [`write_item`](SequenceWriter::write_item) for a single `&Value`.
241//! * [`write_items`](SequenceWriter::write_items) for any
242//! `IntoIterator<Item = &Value>`.
243//! * [`write_pairs`](SequenceWriter::write_pairs) for an
244//! `IntoIterator<Item = (&Value, &Value)>`, which emits each key
245//! and value as two consecutive items. This matches the shape of
246//! `&BTreeMap::iter()`, so a map held in a `Value` streams straight
247//! into a sequence.
248//!
249//! [`Array`] and [`Map`] bridge between a sequence and an owned
250//! collection:
251//!
252//! * [`Array::from_sequence`] collects an `IntoIterator<Item = Value>`
253//! into an array.
254//! * [`Array::try_from_sequence`] takes a fallible iterator
255//! (`Item = Result<Value, E>`) and short-circuits on the first
256//! error.
257//! * [`Map::from_pairs`] consumes `(Value, Value)` pairs with
258//! last-write-wins on duplicate keys.
259//! * [`Map::try_from_pairs`] rejects duplicates with
260//! [`Error::NonDeterministic`].
261//! * [`Map::from_sequence`] takes an `IntoIterator<Item = Value>` of
262//! alternating key and value items in strict canonical order.
263//! * [`Map::try_from_sequence`] is the fallible-input form of
264//! [`from_sequence`](Map::from_sequence).
265//!
266//! The `try_*` forms take fallible iterators directly, so a
267//! [`SequenceDecoder`] or [`SequenceReader`] can feed an [`Array`] or
268//! [`Map`] without an intermediate `Vec`.
269//! [`Map::try_from_sequence`] uses the bound `E: From<Error>`, which
270//! covers both iterators because [`IoError`] already has
271//! `From<Error>`.
272//!
273//! ```
274//! use cbor_core::{Array, DecodeOptions, Format, SequenceWriter, Value};
275//!
276//! let items = [Value::from(1), Value::from("hi"), Value::from(true)];
277//!
278//! let mut buf = Vec::new();
279//! SequenceWriter::new(&mut buf, Format::Binary)
280//! .write_items(items.iter())
281//! .unwrap();
282//!
283//! let array = Array::try_from_sequence(
284//! DecodeOptions::new().sequence_decoder(&buf),
285//! ).unwrap();
286//! assert_eq!(array.get_ref().as_slice(), &items);
287//! ```
288//!
289//! # Optional features
290//!
291//! | Feature | Adds |
292//! |---|---|
293//! | `serde` | `Serialize`/`Deserialize` for `Value`, [`Value::serialized`], [`Value::deserialized`] |
294//! | `chrono` | Conversions between `chrono::DateTime` and `DateTime`/`EpochTime`/`Value` |
295//! | `time` | Conversions between `time::UtcDateTime`/`OffsetDateTime` and `DateTime`/`EpochTime`/`Value` |
296//! | `jiff` | Conversions between `jiff::Timestamp`/`Zoned` and `DateTime`/`EpochTime`/`Value` |
297//! | `half` | `From`/`TryFrom` between `Float`/`Value` and `half::f16` |
298//! | `num-bigint` | `From`/`TryFrom` between `Value` and `num_bigint::BigInt`/`BigUint` |
299//! | `crypto-bigint` | `From`/`TryFrom` between `Value` and `crypto_bigint::Uint`/`Int`/`NonZero` |
300//! | `rug` | `From`/`TryFrom` between `Value` and `rug::Integer` |
301
302mod array;
303mod bytes;
304mod codec;
305mod data_type;
306mod date_time;
307mod decode_options;
308mod decoder;
309mod encoder;
310mod epoch_time;
311mod error;
312mod ext;
313mod float;
314mod format;
315mod integer;
316mod io;
317mod iso3339;
318mod limits;
319mod macros;
320mod map;
321mod parse;
322mod simple_value;
323mod tag;
324mod text;
325mod util;
326mod value;
327mod value_key;
328mod view;
329
330pub use array::Array;
331pub use bytes::ByteString;
332pub use data_type::DataType;
333pub use date_time::DateTime;
334pub use decode_options::DecodeOptions;
335pub use decoder::{SequenceDecoder, SequenceReader};
336pub use encoder::SequenceWriter;
337pub use epoch_time::EpochTime;
338pub use error::{Error, IoError, IoResult, Result};
339pub use float::Float;
340pub use format::{EncodeFormat, Format};
341pub use map::Map;
342pub use simple_value::SimpleValue;
343pub use text::TextString;
344pub use value::Value;
345pub use value_key::ValueKey;
346
347#[cfg(feature = "serde")]
348pub use ext::serde;
349#[cfg(feature = "serde")]
350#[doc(no_inline)]
351pub use serde::SerdeError;
352
353use integer::*;
354
355#[cfg(test)]
356mod tests;