cbor_tools/
format.rs

1use crate::truncate::Truncate;
2use crate::{
3    Array, ByteString, CborType, Decode, DecodeError, DecodeSymbolic, Encode, EncodeSymbolic,
4    Float, Indefinite, Integer, Map, Tag, Tagged, TextString, ZeroTo23,
5};
6use half::f16;
7use num_enum::TryFromPrimitive;
8use std::convert::TryFrom;
9use std::convert::TryInto;
10#[cfg(feature = "display")]
11use strum_macros::AsRefStr;
12
13/// The major number in a CBOR encoding
14///
15/// The major number is 3 bits long, and identifies the basic
16/// type of a CBOR-encoded value.
17#[repr(u8)]
18#[derive(Clone, Copy, Debug, PartialEq, TryFromPrimitive)]
19#[cfg_attr(feature = "display", derive(AsRefStr))]
20pub enum Major {
21    /// An unsigned integer
22    Uint = 0,
23    /// A negative integer
24    Nint = 1,
25    /// A byte string
26    Bstr = 2,
27    /// A text string
28    Tstr = 3,
29    /// An array
30    Array = 4,
31    /// A map
32    Map = 5,
33    /// A tagged value
34    Tag = 6,
35    /// Miscellaneous types (floats, bool, null, etc)
36    Misc = 7,
37}
38
39/// The "additional information" field
40///
41/// This is a 5-bit field used to communicate some more
42/// detail about the value; it's commonly used to communicate
43/// simple values (True, False, Null) or specify how many bytes
44/// are to follow.
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub struct AdnInfo(pub(crate) u8);
47
48#[allow(missing_docs)]
49impl AdnInfo {
50    /// 1 byte to follow.
51    pub const MORE1: AdnInfo = AdnInfo(24);
52    /// 2 bytes to follow.
53    pub const MORE2: AdnInfo = AdnInfo(25);
54    /// 4 bytes to follow.
55    pub const MORE4: AdnInfo = AdnInfo(26);
56    /// 8 bytes to follow.
57    pub const MORE8: AdnInfo = AdnInfo(27);
58
59    /// Indefinite-length encoding is used.
60    pub const INDEFINITE: AdnInfo = AdnInfo(31);
61
62    // In major type 7, a number of values are used for special purposes.
63    pub const FALSE: AdnInfo = AdnInfo(20);
64    pub const TRUE: AdnInfo = AdnInfo(21);
65    pub const NULL: AdnInfo = AdnInfo(22);
66    pub const UNDEFINED: AdnInfo = AdnInfo(23);
67    pub const FLOAT16: AdnInfo = AdnInfo(25);
68    pub const FLOAT32: AdnInfo = AdnInfo(26);
69    pub const FLOAT64: AdnInfo = AdnInfo(27);
70    /// Terminate an indefinite-length encoding.
71    pub const BREAK: AdnInfo = AdnInfo(31);
72}
73
74impl From<u8> for AdnInfo {
75    fn from(n: u8) -> Self {
76        if n >= 24 {
77            panic!("can't create AdnInfo for n={}", n);
78        }
79        AdnInfo(n)
80    }
81}
82
83/// A zero-to-8 byte immediate value
84///
85/// Many CBOR encodings use an integer value that can range from
86/// zero to 8 bytes in length.  This is often used to encode the
87/// length of some data, or may be the payload value itself (in
88/// the case of integers or floats).
89///
90/// The contained array is in big-endian format.
91#[derive(Clone, Copy, Debug, PartialEq)]
92pub enum ImmediateValue {
93    /// No immediate value.
94    Empty,
95    /// One-byte immediate value.
96    Bytes1([u8; 1]),
97    /// Two-byte immediate value.
98    Bytes2([u8; 2]),
99    /// Four-byte immediate value.
100    Bytes4([u8; 4]),
101    /// Eight-byte immediate value.
102    Bytes8([u8; 8]),
103}
104
105impl<'a> IntoIterator for &'a ImmediateValue {
106    type Item = &'a u8;
107    type IntoIter = std::slice::Iter<'a, u8>;
108
109    fn into_iter(self) -> Self::IntoIter {
110        // We need something so the Empty case returns the same type as the others.
111        const IMM_DUMMY: [u8; 0] = [];
112
113        match self {
114            ImmediateValue::Empty => IMM_DUMMY.iter(),
115            ImmediateValue::Bytes1(b) => b.iter(),
116            ImmediateValue::Bytes2(b) => b.iter(),
117            ImmediateValue::Bytes4(b) => b.iter(),
118            ImmediateValue::Bytes8(b) => b.iter(),
119        }
120    }
121}
122
123impl From<u8> for ImmediateValue {
124    fn from(n: u8) -> Self {
125        let bytes = n.to_be_bytes();
126        ImmediateValue::Bytes1(bytes)
127    }
128}
129
130impl From<u16> for ImmediateValue {
131    fn from(n: u16) -> Self {
132        let bytes = n.to_be_bytes();
133        ImmediateValue::Bytes2(bytes)
134    }
135}
136
137impl From<u32> for ImmediateValue {
138    fn from(n: u32) -> Self {
139        let bytes = n.to_be_bytes();
140        ImmediateValue::Bytes4(bytes)
141    }
142}
143
144impl From<u64> for ImmediateValue {
145    fn from(n: u64) -> Self {
146        let bytes = n.to_be_bytes();
147        ImmediateValue::Bytes8(bytes)
148    }
149}
150
151impl From<f16> for ImmediateValue {
152    fn from(n: f16) -> Self {
153        let bytes = n.to_be_bytes();
154        ImmediateValue::Bytes2(bytes)
155    }
156}
157
158impl From<f32> for ImmediateValue {
159    fn from(n: f32) -> Self {
160        let bytes = n.to_be_bytes();
161        ImmediateValue::Bytes4(bytes)
162    }
163}
164
165impl From<f64> for ImmediateValue {
166    fn from(n: f64) -> Self {
167        let bytes = n.to_be_bytes();
168        ImmediateValue::Bytes8(bytes)
169    }
170}
171
172impl From<ImmediateValue> for u64 {
173    fn from(imm: ImmediateValue) -> u64 {
174        match imm {
175            ImmediateValue::Bytes1(b) => u8::from_be_bytes(b).into(),
176            ImmediateValue::Bytes2(b) => u16::from_be_bytes(b).into(),
177            ImmediateValue::Bytes4(b) => u32::from_be_bytes(b).into(),
178            ImmediateValue::Bytes8(b) => u64::from_be_bytes(b),
179            _ => panic!("can't convert {:?} to u64", imm),
180        }
181    }
182}
183
184// Used as a function argument to specify definite-length or
185// indefinite-length encoding.
186#[derive(PartialEq)]
187struct UseDefLen(bool);
188const AS_DEF: UseDefLen = UseDefLen(true);
189const AS_INDEF: UseDefLen = UseDefLen(false);
190
191/// A unit of CBOR-encoded data
192///
193/// Each `Element` contains:
194/// - A major number, indicating the type of data.
195/// - An "additional information" field.
196/// - A Zero-to-eight byte immediate value (containing a numeric value or length).
197/// - An optional sequence of payload bytes.
198///
199/// See RFC 7049 for details.
200///
201/// Some [`CborType`] values will require multiple [`Element`]s to encode.
202#[derive(Clone, Debug, PartialEq)]
203pub struct Element {
204    // The major number.
205    pub(crate) major: Major,
206    // The "additional information" field.
207    pub(crate) adn_info: AdnInfo,
208    pub(crate) imm: ImmediateValue,
209    pub(crate) bytes: Vec<u8>,
210}
211
212impl Element {
213    /// Create a new Element, with no payload buffer.
214    pub fn new(major: Major, adn_info: AdnInfo, imm: ImmediateValue) -> Element {
215        Element {
216            major,
217            adn_info,
218            imm,
219            bytes: Vec::new(),
220        }
221    }
222
223    /// Create a new Element, with no payload buffer or immediate data.
224    pub fn simple(major: Major, adn_info: AdnInfo) -> Element {
225        Element {
226            major,
227            adn_info,
228            imm: ImmediateValue::Empty,
229            bytes: Vec::new(),
230        }
231    }
232
233    /// Add a payload buffer to this Element.
234    pub fn set_bytes(&mut self, bytes: &[u8]) {
235        if !self.bytes.is_empty() {
236            panic!("set_bytes on nonempty Element");
237        }
238        self.bytes.extend(bytes);
239    }
240
241    // This is part of the decoding phase.
242    // We will decode the major and adn_info only.
243    fn from_byte(byte: u8) -> Element {
244        // This can't actually fail, since all 8 values are valid variants.
245        let major = Major::try_from(byte >> 5).unwrap();
246        let adn_info = AdnInfo(byte & 0x1F);
247        let imm = ImmediateValue::Empty;
248        Element {
249            major,
250            adn_info,
251            imm,
252            bytes: Vec::new(),
253        }
254    }
255
256    /// Extract a length value from the `Element`.
257    ///
258    /// In many elements, the `adn_info` and `imm` fields encode a length parameter.
259    /// This function attempts to extract that value.
260    /// If the adn_info is not a value between 0 and 27, it will return an error.
261    pub fn get_length(&self) -> Result<usize, DecodeError> {
262        match self.adn_info {
263            AdnInfo(n) if n < 24 => Ok(n as usize),
264            AdnInfo::MORE1 | AdnInfo::MORE2 | AdnInfo::MORE4 | AdnInfo::MORE8 => {
265                let length: u64 = self.imm.into();
266                let length: usize = length.try_into().unwrap();
267                Ok(length)
268            }
269            AdnInfo::INDEFINITE => Err(DecodeError::Indefinite),
270            _ => Err(DecodeError::Undecodable),
271        }
272    }
273}
274
275impl Decode for Vec<Element> {
276    fn decode(&self) -> Result<Vec<CborType>, DecodeError> {
277        let mut result = Vec::new();
278        let mut input = self.iter();
279        loop {
280            let decoded = decode_one(&mut input);
281            match decoded {
282                Ok(val) => result.push(val),
283                Err(DecodeError::End) => {
284                    // At this, the topmost level of decoding, reaching the end of the input
285                    // data is not an error. We're just done.
286                    break;
287                }
288                Err(e) => return Err(e),
289            }
290        }
291        Ok(result)
292    }
293}
294
295// Decode one CborType from the input iterator, with special error handling.
296//
297// If we reach the end of the input data, return Err(Underrun) instead of Err(End).
298// Otherwise, it behaves exactly the same as decode_one.
299//
300// Reason:
301// End errors will be thrown away at the top-level decode function;
302// hitting the end of the input data at other points should be a hard error.
303// So we convert it to a different error that will propagate.
304fn require_one(input: &mut std::slice::Iter<'_, Element>) -> Result<CborType, DecodeError> {
305    let decoded = decode_one(input);
306    match decoded {
307        Err(DecodeError::End) => Err(DecodeError::Underrun),
308        x => x,
309    }
310}
311
312// Decode one CborType from the input iterator.
313//
314// If the input iterator returns None, this will return Err(End).
315// If the input is a BREAK, this will return Err(Break).
316// All other well-formed inputs will return a single CborType.
317fn decode_one(input: &mut std::slice::Iter<'_, Element>) -> Result<CborType, DecodeError> {
318    let decoded = match input.next() {
319        None => {
320            return Err(DecodeError::End);
321        }
322        Some(element) => match element.major {
323            Major::Uint => decode_uint(element),
324            Major::Nint => decode_nint(element),
325            Major::Bstr => {
326                if element.adn_info == AdnInfo::INDEFINITE {
327                    // Indefinite-length byte string
328                    // FIXME: is it possible for the element to have improper fields?
329                    // imm? bytes? either it's guaranteed to be well-formed here, or
330                    // we need extra checks?
331                    decode_bstr_indef(input)
332                } else {
333                    decode_bstr(element)
334                }
335            }
336            Major::Tstr => {
337                if element.adn_info == AdnInfo::INDEFINITE {
338                    // Indefinite-length text string
339                    decode_tstr_indef(input)
340                } else {
341                    decode_tstr(element)
342                }
343            }
344            Major::Array => decode_array(element, input),
345            Major::Map => decode_map(element, input),
346            Major::Tag => decode_tag(element, input),
347            Major::Misc => decode_misc(element),
348        },
349    }?;
350    Ok(decoded)
351}
352
353fn decode_misc(element: &Element) -> Result<CborType, DecodeError> {
354    let decoded = match element.adn_info {
355        AdnInfo::FALSE => CborType::Bool(false),
356        AdnInfo::TRUE => CborType::Bool(true),
357        AdnInfo::NULL => CborType::Null,
358        AdnInfo::UNDEFINED => CborType::Undefined,
359        AdnInfo::BREAK => return Err(DecodeError::Break),
360        AdnInfo::MORE1 => match element.imm {
361            // There is a possible weird encoding here where a simple value
362            // (FALSE, TRUE, NULL, UNDEFINED) could be encoded as a 1-byte
363            // immediate value. Should those encodings be treated as legitimate?
364            // RFC 7049 3.2 says:
365            // "Even though CBOR attempts to minimize these cases, not all well-
366            // formed CBOR data is valid: for example, the format excludes simple
367            // values below 32 that are encoded with an extension byte."
368            //
369            // As of 1/2021, the CBOR simple values registry only contains those
370            // four values:
371            // https://www.iana.org/assignments/cbor-simple-values/cbor-simple-values.xhtml
372            //
373            // As this implementation does not decode unknown Simple Values,
374            // for now, we return an error.
375            ImmediateValue::Bytes1([n]) => return Err(DecodeError::UnknownSimple(n)),
376            _ => return Err(DecodeError::Undecodable),
377        },
378        AdnInfo::FLOAT16 => {
379            if let ImmediateValue::Bytes2(b) = element.imm {
380                CborType::Float(Float::F16(f16::from_be_bytes(b)))
381            } else {
382                return Err(DecodeError::Undecodable);
383            }
384        }
385        AdnInfo::FLOAT32 => {
386            if let ImmediateValue::Bytes4(b) = element.imm {
387                CborType::Float(Float::F32(f32::from_be_bytes(b)))
388            } else {
389                return Err(DecodeError::Undecodable);
390            }
391        }
392        AdnInfo::FLOAT64 => {
393            if let ImmediateValue::Bytes8(b) = element.imm {
394                CborType::Float(Float::F64(f64::from_be_bytes(b)))
395            } else {
396                return Err(DecodeError::Undecodable);
397            }
398        }
399        AdnInfo(n) => {
400            // Because AdnInfo is only 5 bytes wide, this could only be 0..19
401            return Err(DecodeError::UnknownSimple(n));
402        }
403    };
404    Ok(decoded)
405}
406
407fn decode_uint(element: &Element) -> Result<CborType, DecodeError> {
408    let decoded = match (element.adn_info, element.imm) {
409        (AdnInfo(n), ImmediateValue::Empty) if n < 24 => Integer::U5(ZeroTo23::from(n)),
410        (AdnInfo::MORE1, ImmediateValue::Bytes1(b)) => Integer::U8(b[0]),
411        (AdnInfo::MORE2, ImmediateValue::Bytes2(b)) => Integer::U16(u16::from_be_bytes(b)),
412        (AdnInfo::MORE4, ImmediateValue::Bytes4(b)) => Integer::U32(u32::from_be_bytes(b)),
413        (AdnInfo::MORE8, ImmediateValue::Bytes8(b)) => Integer::U64(u64::from_be_bytes(b)),
414        _ => return Err(DecodeError::Undecodable),
415    };
416    Ok(decoded.into())
417}
418
419fn decode_nint(element: &Element) -> Result<CborType, DecodeError> {
420    let decoded = match (element.adn_info, element.imm) {
421        (AdnInfo(n), ImmediateValue::Empty) if n < 24 => Integer::N5(ZeroTo23::from(n)),
422        (AdnInfo::MORE1, ImmediateValue::Bytes1(b)) => Integer::N8(b[0]),
423        (AdnInfo::MORE2, ImmediateValue::Bytes2(b)) => Integer::N16(u16::from_be_bytes(b)),
424        (AdnInfo::MORE4, ImmediateValue::Bytes4(b)) => Integer::N32(u32::from_be_bytes(b)),
425        (AdnInfo::MORE8, ImmediateValue::Bytes8(b)) => Integer::N64(u64::from_be_bytes(b)),
426        _ => return Err(DecodeError::Undecodable),
427    };
428    Ok(decoded.into())
429}
430
431fn decode_bstr(element: &Element) -> Result<CborType, DecodeError> {
432    Ok(CborType::ByteString(ByteString(element.bytes.clone())))
433}
434
435fn decode_tstr(element: &Element) -> Result<CborType, DecodeError> {
436    let text = String::from_utf8(element.bytes.clone()).map_err(|_| DecodeError::Utf8Error)?;
437    Ok(CborType::TextString(TextString(text)))
438}
439
440fn decode_bstr_indef(input: &mut std::slice::Iter<'_, Element>) -> Result<CborType, DecodeError> {
441    // Consume a run of elements containing bstrs, terminated by a BREAK symbol.
442    let mut result: Vec<ByteString> = Vec::new();
443    loop {
444        match require_one(input) {
445            Ok(CborType::ByteString(b)) => result.push(b),
446            Ok(_) => return Err(DecodeError::BadSubString),
447            Err(DecodeError::Break) => break,
448            Err(e) => return Err(e),
449        }
450    }
451    Ok(CborType::Indefinite(Indefinite::ByteString(result)))
452}
453
454fn decode_tstr_indef(input: &mut std::slice::Iter<'_, Element>) -> Result<CborType, DecodeError> {
455    // Consume a run of elements containing tstrs, terminated by a BREAK symbol.
456    let mut result: Vec<TextString> = Vec::new();
457    loop {
458        match require_one(input) {
459            Ok(CborType::TextString(t)) => result.push(t),
460            Ok(_) => return Err(DecodeError::BadSubString),
461            Err(DecodeError::Break) => break,
462            Err(e) => return Err(e),
463        }
464    }
465    Ok(CborType::Indefinite(Indefinite::TextString(result)))
466}
467
468// A counter for keeping track of how many more elements we want.
469enum RunLength {
470    Indefinite,
471    Definite(usize),
472}
473
474impl RunLength {
475    // Given an element, create a RunLength counter.
476    fn get(element: &Element) -> Result<Self, DecodeError> {
477        match element.get_length() {
478            Ok(len) => {
479                // FIXME: if this is a map, double the length?
480                // Or let the caller handle it?
481                Ok(RunLength::Definite(len))
482            }
483            Err(DecodeError::Indefinite) => Ok(RunLength::Indefinite),
484            Err(e) => Err(e),
485        }
486    }
487
488    fn is_zero(&self) -> bool {
489        match self {
490            RunLength::Indefinite => false,
491            RunLength::Definite(len) => *len == 0,
492        }
493    }
494
495    fn decrement(&mut self) {
496        match self {
497            RunLength::Indefinite => {}
498            RunLength::Definite(ref mut len) => {
499                if *len == 0 {
500                    panic!("RunLength underflow");
501                }
502                *len -= 1;
503            }
504        }
505    }
506}
507
508fn decode_array(
509    element: &Element,
510    input: &mut std::slice::Iter<'_, Element>,
511) -> Result<CborType, DecodeError> {
512    let mut rlen = RunLength::get(element)?;
513    let is_indef = matches!(rlen, RunLength::Indefinite);
514    let mut result: Vec<CborType> = Vec::new();
515    while !rlen.is_zero() {
516        // Recursively decode one CborType, by traversing one or more Elements.
517        let val = require_one(input);
518        if is_indef && matches!(val, Err(DecodeError::Break)) {
519            // This is an indefinite-length array, properly terminated.
520            break;
521        }
522        // Keep any Ok result; return any remaining error.
523        result.push(val?);
524        // Decrement the array-length counter.
525        rlen.decrement();
526    }
527
528    // rlen is now zero; return the result
529    if is_indef {
530        Ok(CborType::Indefinite(Indefinite::Array(Array::from(result))))
531    } else {
532        Ok(CborType::from(result))
533    }
534}
535
536fn decode_map(
537    element: &Element,
538    input: &mut std::slice::Iter<'_, Element>,
539) -> Result<CborType, DecodeError> {
540    let rlen = RunLength::get(element)?;
541
542    // Maps consume 2n items, so double the length.
543    let mut rlen = match rlen {
544        RunLength::Indefinite => rlen,
545        RunLength::Definite(n) => RunLength::Definite(2 * n),
546    };
547
548    let is_indef = matches!(rlen, RunLength::Indefinite);
549    let mut result: Vec<CborType> = Vec::new();
550    while !rlen.is_zero() {
551        // Recursively decode one CborType, by traversing one or more Elements.
552        let val = require_one(input);
553        if is_indef && matches!(val, Err(DecodeError::Break)) {
554            // This is an indefinite-length map, properly terminated.
555            break;
556        }
557        // Keep any Ok result; return any remaining error.
558        result.push(val?);
559        // Decrement the map-length counter.
560        rlen.decrement();
561    }
562    // rlen is now zero; return the result
563
564    // convert the result to a Vec of (key, value) pairs.
565    let mut results = result.drain(..);
566    let mut map_pairs = Vec::new();
567    loop {
568        let kv_pair = (results.next(), results.next());
569        match kv_pair {
570            (Some(k), Some(v)) => map_pairs.push((k, v)),
571            (None, None) => break,
572            _ => return Err(DecodeError::MapPairError),
573        }
574    }
575
576    if is_indef {
577        Ok(CborType::Indefinite(Indefinite::Map(Map::from(map_pairs))))
578    } else {
579        Ok(CborType::from(map_pairs))
580    }
581}
582
583fn decode_tag(
584    element: &Element,
585    input: &mut std::slice::Iter<'_, Element>,
586) -> Result<CborType, DecodeError> {
587    let child_value = require_one(input)?;
588    // FIXME: are there ways the "tag" element might be malformed?
589    // FIXME: need a better approach to the usize/u64 adaptations.
590    let tag = element.get_length()? as u64;
591
592    Ok(CborType::Tagged(Tagged {
593        tag: Tag(tag),
594        child: Box::new(child_value),
595    }))
596}
597
598// In the future this could complete the convertion to [u8; N]
599// using const generics.
600fn try_split(slice: &[u8], index: usize) -> Result<(&[u8], &[u8]), DecodeError> {
601    if slice.len() < index {
602        Err(DecodeError::Underrun)
603    } else {
604        Ok(slice.split_at(index))
605    }
606}
607
608fn decode_imm(element: &mut Element, buf: &mut &[u8]) -> Result<(), DecodeError> {
609    match element.imm {
610        ImmediateValue::Empty => (),
611        _ => {
612            panic!("decode_imm called on element that already has immediate data");
613        }
614    }
615
616    match element.adn_info {
617        AdnInfo::MORE1 => {
618            let (head, tail) = try_split(*buf, 1)?;
619            let imm: [u8; 1] = head.as_ref().try_into().unwrap();
620            element.imm = ImmediateValue::Bytes1(imm);
621            *buf = tail;
622        }
623        AdnInfo::MORE2 => {
624            let (head, tail) = try_split(*buf, 2)?;
625            let imm: [u8; 2] = head.as_ref().try_into().unwrap();
626            element.imm = ImmediateValue::Bytes2(imm);
627            *buf = tail;
628        }
629        AdnInfo::MORE4 => {
630            let (head, tail) = try_split(*buf, 4)?;
631            let imm: [u8; 4] = head.as_ref().try_into().unwrap();
632            element.imm = ImmediateValue::Bytes4(imm);
633            *buf = tail;
634        }
635        AdnInfo::MORE8 => {
636            let (head, tail) = try_split(*buf, 8)?;
637            let imm: [u8; 8] = head.as_ref().try_into().unwrap();
638            element.imm = ImmediateValue::Bytes8(imm);
639            *buf = tail;
640        }
641        _ => {
642            // FIXME: Not entirely sure what I want to do here.
643            // Right now, adn_info is a strange value I'll just punt
644            // to the next level of decode.
645        }
646    }
647    Ok(())
648}
649
650impl DecodeSymbolic for [u8] {
651    fn decode_symbolic(&self) -> Result<Vec<Element>, DecodeError> {
652        let mut remaining = self;
653        let mut result = Vec::new();
654        loop {
655            if remaining.is_empty() {
656                break;
657            }
658            // Convert first byte's fields into an Element
659            let mut element = Element::from_byte(remaining[0]);
660            remaining = &remaining[1..];
661            // take 0-8 bytes based on adn_info
662            decode_imm(&mut element, &mut remaining)?;
663            // Examine the Element; based on its values, take some
664            // bytes into its 'bytes' field.
665            match element.major {
666                Major::Uint | Major::Nint => {
667                    // Nothing further needed here.
668                }
669                Major::Bstr | Major::Tstr => {
670                    match element.get_length() {
671                        Ok(length) => {
672                            // This is a definite-length encoding;
673                            // take that many more bytes as payload.
674                            let (head, tail) = try_split(remaining, length)?;
675                            element.bytes = head.into();
676                            remaining = tail;
677                        }
678                        Err(DecodeError::Indefinite) => {}
679                        Err(e) => return Err(e),
680                    }
681                }
682                Major::Array | Major::Map => {
683                    // Nothing further needed here.
684                    // Each array or map member is its own element.
685                }
686                Major::Tag => {
687                    // Nothing further needed here.
688                }
689                Major::Misc => {
690                    // No further action needed for bool/null/undefined
691                    // FIXME: handle floats
692                    // FIXME: handle other weird/invalid values?
693                }
694            }
695            result.push(element);
696        }
697
698        Ok(result)
699    }
700}
701
702impl Decode for [u8] {
703    fn decode(&self) -> Result<Vec<CborType>, DecodeError> {
704        self.decode_symbolic()?.decode()
705    }
706}
707
708impl EncodeSymbolic for CborType {
709    fn encode_symbolic(&self) -> Vec<Element> {
710        match self {
711            CborType::Null => {
712                let element = Element::new(Major::Misc, AdnInfo::NULL, ImmediateValue::Empty);
713                vec![element]
714            }
715            CborType::Undefined => {
716                let element = Element::new(Major::Misc, AdnInfo::UNDEFINED, ImmediateValue::Empty);
717                vec![element]
718            }
719            CborType::Bool(val) => {
720                let adn_info = if *val { AdnInfo::TRUE } else { AdnInfo::FALSE };
721                let element = Element::new(Major::Misc, adn_info, ImmediateValue::Empty);
722                vec![element]
723            }
724            CborType::Integer(x) => encode_integer(x),
725            CborType::ByteString(x) => encode_bytestring(x),
726            CborType::TextString(x) => encode_textstring(x),
727            CborType::Array(x) => encode_array(x, AS_DEF),
728            CborType::Map(m) => encode_map(m, AS_DEF),
729            CborType::Indefinite(x) => encode_indefinite(x),
730            CborType::Tagged(x) => encode_tagged(x),
731            CborType::Float(x) => encode_float(x),
732        }
733    }
734}
735
736fn encode_indefinite(ind: &Indefinite) -> Vec<Element> {
737    match ind {
738        Indefinite::ByteString(x) => encode_indef_bytestring(x),
739        Indefinite::TextString(x) => encode_indef_textstring(x),
740        Indefinite::Array(x) => encode_array(x, AS_INDEF),
741        Indefinite::Map(x) => encode_map(x, AS_INDEF),
742    }
743}
744
745/// Encode an integer.
746///
747/// This does not attempt to canonicalize the integer size; a small number stored
748/// as an Integer::U32, for example, will be encoded as 5 bytes.
749fn encode_integer(x: &Integer) -> Vec<Element> {
750    let element = match *x {
751        Integer::U5(n) => Element::new(Major::Uint, AdnInfo(*n), ImmediateValue::Empty),
752        Integer::U8(n) => Element::new(Major::Uint, AdnInfo::MORE1, n.into()),
753        Integer::U16(n) => Element::new(Major::Uint, AdnInfo::MORE2, n.into()),
754        Integer::U32(n) => Element::new(Major::Uint, AdnInfo::MORE4, n.into()),
755        Integer::U64(n) => Element::new(Major::Uint, AdnInfo::MORE8, n.into()),
756        Integer::N5(n) => Element::new(Major::Nint, AdnInfo(*n), ImmediateValue::Empty),
757        Integer::N8(n) => Element::new(Major::Nint, AdnInfo::MORE1, n.into()),
758        Integer::N16(n) => Element::new(Major::Nint, AdnInfo::MORE2, n.into()),
759        Integer::N32(n) => Element::new(Major::Nint, AdnInfo::MORE4, n.into()),
760        Integer::N64(n) => Element::new(Major::Nint, AdnInfo::MORE8, n.into()),
761    };
762    vec![element]
763}
764
765// Encode a text- or byte-string into an Element.
766fn encode_bytes(major: Major, v: &[u8]) -> Element {
767    let mut element = encode_length(major, v.len());
768    element.set_bytes(v);
769    element
770}
771
772// Adapter for places that have a usize in hand.
773// Hopefully this is optimized away, as usize->u64 should
774// always succeed.
775#[inline]
776fn encode_length(major: Major, len: usize) -> Element {
777    let len: u64 = len.try_into().expect("usize to u64");
778    encode_immediate(major, len)
779}
780
781// Helper function for length values
782//
783// It returns an Element with no payload.
784// This is incomplete for definite-length byte/text-strings,
785// which need to have the string bytes appended.
786// It is correct for arrays or maps.
787fn encode_immediate(major: Major, len: u64) -> Element {
788    if len < 24 {
789        Element::new(major, AdnInfo(len.truncate()), ImmediateValue::Empty)
790    } else if len < 0x100 {
791        // 1 byte needed to express length.
792        let len: u8 = len.truncate();
793        Element::new(major, AdnInfo::MORE1, len.into())
794    } else if len < 0x10000 {
795        // 2 bytes needed to express length.
796        let len: u16 = len.truncate();
797        Element::new(major, AdnInfo::MORE2, len.into())
798    } else if len < 0x100000000 {
799        // 4 bytes needed to express length.
800        let len: u32 = len.truncate();
801        Element::new(major, AdnInfo::MORE4, len.into())
802    } else {
803        // 8 bytes needed to express length.
804        let len = len as u64;
805        Element::new(major, AdnInfo::MORE8, len.into())
806    }
807}
808
809/// Encode a byte string.
810fn encode_bytestring(bstr: &ByteString) -> Vec<Element> {
811    let element = encode_bytes(Major::Bstr, &bstr.0);
812    vec![element]
813}
814
815fn encode_indef_bytestring(list: &[ByteString]) -> Vec<Element> {
816    let mut elements = Vec::with_capacity(1 + list.len());
817    elements.push(Element::simple(Major::Bstr, AdnInfo::INDEFINITE));
818    for bstr in list {
819        elements.push(encode_bytes(Major::Bstr, &bstr.0));
820    }
821    elements.push(Element::simple(Major::Misc, AdnInfo::BREAK));
822    elements
823}
824
825/// Encode a text string.
826fn encode_textstring(text: &TextString) -> Vec<Element> {
827    let bytes = text.0.as_bytes();
828    let element = encode_bytes(Major::Tstr, bytes);
829    vec![element]
830}
831
832fn encode_indef_textstring(list: &[TextString]) -> Vec<Element> {
833    let mut elements = Vec::with_capacity(1 + list.len());
834    elements.push(Element::simple(Major::Tstr, AdnInfo::INDEFINITE));
835    for text in list {
836        let bytes = text.0.as_bytes();
837        elements.push(encode_bytes(Major::Tstr, bytes));
838    }
839    elements.push(Element::simple(Major::Misc, AdnInfo::BREAK));
840    elements
841}
842
843fn encode_array(a: &Array, definite: UseDefLen) -> Vec<Element> {
844    let list = &a.0;
845    let mut elements = Vec::with_capacity(1 + list.len());
846    if definite == AS_INDEF {
847        elements.push(Element::simple(Major::Array, AdnInfo::INDEFINITE));
848    } else {
849        elements.push(encode_length(Major::Array, list.len()));
850    }
851    for item in list {
852        elements.extend(item.encode_symbolic());
853    }
854    if definite == AS_INDEF {
855        elements.push(Element::simple(Major::Misc, AdnInfo::BREAK));
856    }
857    elements
858}
859
860fn encode_map(map: &Map, definite: UseDefLen) -> Vec<Element> {
861    let kv_list = &map.0;
862    let mut elements = Vec::with_capacity(1 + kv_list.len());
863    if definite == AS_INDEF {
864        elements.push(Element::simple(Major::Map, AdnInfo::INDEFINITE));
865    } else {
866        elements.push(encode_length(Major::Map, kv_list.len()));
867    }
868    for (k, v) in kv_list {
869        elements.extend(k.encode_symbolic());
870        elements.extend(v.encode_symbolic());
871    }
872    if definite == AS_INDEF {
873        elements.push(Element::simple(Major::Misc, AdnInfo::BREAK));
874    }
875    elements
876}
877
878fn encode_float(f: &Float) -> Vec<Element> {
879    let element = match *f {
880        Float::F16(n) => Element::new(Major::Misc, AdnInfo::FLOAT16, n.into()),
881        Float::F32(n) => Element::new(Major::Misc, AdnInfo::FLOAT32, n.into()),
882        Float::F64(n) => Element::new(Major::Misc, AdnInfo::FLOAT64, n.into()),
883    };
884    vec![element]
885}
886
887fn encode_tagged(x: &Tagged) -> Vec<Element> {
888    let tag = encode_immediate(Major::Tag, x.tag.0);
889    let mut v = vec![tag];
890    v.extend(x.child.encode_symbolic());
891    v
892}
893
894impl Encode for Vec<Element> {
895    fn encode(&self) -> Vec<u8> {
896        self.iter().map(Encode::encode).flatten().collect()
897    }
898}
899
900impl Encode for Element {
901    fn encode(&self) -> Vec<u8> {
902        let major = self.major as u8;
903        if major > 7 {
904            panic!("major out of range");
905        }
906        if self.adn_info.0 > 31 {
907            panic!("additional-info out of range");
908        }
909        let mut buf = Vec::with_capacity(1 + self.bytes.len());
910        buf.push(major << 5 | self.adn_info.0);
911        buf.extend(&self.imm);
912        buf.extend(&self.bytes);
913        buf
914    }
915}