minicbor 0.19.0

A small CBOR codec suitable for no_std environments.
Documentation
//! Generic CBOR tokenization.

use core::fmt;
use crate::Decoder;
use crate::data::{Int, Tag, Type};
use crate::decode::Error;

/// Representation of possible CBOR tokens.
///
/// *Requires feature* `"half"`.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Token<'b> {
    Bool(bool),
    U8(u8),
    U16(u16),
    U32(u32),
    U64(u64),
    I8(i8),
    I16(i16),
    I32(i32),
    I64(i64),
    Int(Int),
    F16(f32),
    F32(f32),
    F64(f64),
    Bytes(&'b [u8]),
    String(&'b str),
    Array(u64),
    Map(u64),
    Tag(Tag),
    Simple(u8),
    Break,
    Null,
    Undefined,
    /// Start of indefinite byte string.
    BeginBytes,
    /// Start of indefinite text string.
    BeginString,
    /// Start of indefinite array.
    BeginArray,
    /// Start of indefinite map.
    BeginMap
}

/// An [`Iterator`] over CBOR tokens.
///
/// The `Iterator` implementation calls [`Tokenizer::token`] until end of input has been reached.
///
/// *Requires feature* `"half"`.
#[derive(Debug, Clone)]
pub struct Tokenizer<'b> {
    decoder: Decoder<'b>
}

impl<'b> Iterator for Tokenizer<'b> {
    type Item = Result<Token<'b>, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.token() {
            Ok(t) => Some(Ok(t)),
            Err(e) if e.is_end_of_input() => None,
            Err(e) => Some(Err(e))
        }
    }
}

impl<'b> From<Decoder<'b>> for Tokenizer<'b> {
    fn from(d: Decoder<'b>) -> Self {
        Tokenizer { decoder: d }
    }
}

impl<'b> Tokenizer<'b> {
    /// Create a new Tokenizer for the given input bytes.
    pub fn new(bytes: &'b[u8]) -> Self {
        Tokenizer { decoder: Decoder::new(bytes) }
    }

    /// Decode the next token.
    ///
    /// Note that a sequence of tokens may not necessarily represent
    /// well-formed CBOR items.
    pub fn token(&mut self) -> Result<Token<'b>, Error> {
        match self.try_token() {
            Ok(tk) => Ok(tk),
            Err(e) => {
                self.decoder.set_position(self.decoder.input().len()); // drain decoder
                Err(e)
            }
        }
    }

    fn try_token(&mut self) -> Result<Token<'b>, Error> {
        match self.decoder.datatype()? {
            Type::Bool         => self.decoder.bool().map(Token::Bool),
            Type::U8           => self.decoder.u8().map(Token::U8),
            Type::U16          => self.decoder.u16().map(Token::U16),
            Type::U32          => self.decoder.u32().map(Token::U32),
            Type::U64          => self.decoder.u64().map(Token::U64),
            Type::I8           => self.decoder.i8().map(Token::I8),
            Type::I16          => self.decoder.i16().map(Token::I16),
            Type::I32          => self.decoder.i32().map(Token::I32),
            Type::I64          => self.decoder.i64().map(Token::I64),
            Type::Int          => self.decoder.int().map(Token::Int),
            Type::F16          => self.decoder.f16().map(Token::F16),
            Type::F32          => self.decoder.f32().map(Token::F32),
            Type::F64          => self.decoder.f64().map(Token::F64),
            Type::Bytes        => self.decoder.bytes().map(Token::Bytes),
            Type::String       => self.decoder.str().map(Token::String),
            Type::Tag          => self.decoder.tag().map(Token::Tag),
            Type::Simple       => self.decoder.simple().map(Token::Simple),
            Type::Array        => {
                let p = self.decoder.position();
                if let Some(n) = self.decoder.array()? {
                    Ok(Token::Array(n))
                } else {
                    Err(Error::type_mismatch(Type::Array).at(p).with_message("missing array length"))
                }
            }
            Type::Map          => {
                let p = self.decoder.position();
                if let Some(n) = self.decoder.map()? {
                    Ok(Token::Map(n))
                } else {
                    Err(Error::type_mismatch(Type::Array).at(p).with_message("missing map length"))
                }
            }
            Type::BytesIndef   => { self.skip_byte(); Ok(Token::BeginBytes)  }
            Type::StringIndef  => { self.skip_byte(); Ok(Token::BeginString) }
            Type::ArrayIndef   => { self.skip_byte(); Ok(Token::BeginArray)  }
            Type::MapIndef     => { self.skip_byte(); Ok(Token::BeginMap)    }
            Type::Null         => { self.skip_byte(); Ok(Token::Null)        }
            Type::Undefined    => { self.skip_byte(); Ok(Token::Undefined)   }
            Type::Break        => { self.skip_byte(); Ok(Token::Break)       }
            t@Type::Unknown(_) => Err(Error::type_mismatch(t)
                .at(self.decoder.position())
                .with_message("unknown cbor type"))
        }
    }

    fn skip_byte(&mut self) {
        self.decoder.set_position(self.decoder.position() + 1)
    }
}

#[cfg(feature = "alloc")]
impl fmt::Display for Tokenizer<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        /// Control stack element.
        enum E {
            N,               // get next token
            T,               // tag
            A(Option<u64>),  // array
            M(Option<u64>),  // map
            B,               // indefinite bytes
            D,               // indefinite text
            S(&'static str), // display string
            X(&'static str)  // display string (unless next token is BREAK)
        }

        let mut iter = Tokenizer::from(self.decoder.clone()).peekable();
        let mut stack = alloc::vec::Vec::new();

        while iter.peek().is_some() {
            stack.push(E::N);
            while let Some(elt) = stack.pop() {
                match elt {
                    E::N => match iter.next() {
                        Some(Ok(Token::Array(n))) => {
                            stack.push(E::A(Some(n)));
                            f.write_str("[")?
                        }
                        Some(Ok(Token::Map(n))) => {
                            stack.push(E::M(Some(n)));
                            f.write_str("{")?
                        }
                        Some(Ok(Token::BeginArray)) => {
                            stack.push(E::A(None));
                            f.write_str("[_ ")?
                        }
                        Some(Ok(Token::BeginMap)) => {
                            stack.push(E::M(None));
                            f.write_str("{_ ")?
                        }
                        Some(Ok(Token::BeginBytes)) => if let Some(Ok(Token::Break)) = iter.peek() {
                            iter.next();
                            f.write_str("''_")?
                        } else {
                            stack.push(E::B);
                            f.write_str("(_ ")?
                        }
                        Some(Ok(Token::BeginString)) => if let Some(Ok(Token::Break)) = iter.peek() {
                            iter.next();
                            f.write_str("\"\"_")?
                        } else {
                            stack.push(E::D);
                            f.write_str("(_ ")?
                        }
                        Some(Ok(Token::Tag(t))) => {
                            stack.push(E::T);
                            write!(f, "{}(", t.numeric())?
                        }
                        Some(Ok(t))  => t.fmt(f)?,
                        Some(Err(e)) => {
                            write!(f, " !!! decoding error: {}", e)?;
                            return Ok(())
                        }
                        None => continue
                    }
                    E::S(s) => f.write_str(s)?,
                    E::X(s) => match iter.peek() {
                        Some(Ok(Token::Break)) | None => continue,
                        Some(Ok(_))  => f.write_str(s)?,
                        Some(Err(e)) => {
                            write!(f, " !!! decoding error: {}", e)?;
                            return Ok(())
                        }
                    }
                    E::T => {
                        stack.push(E::S(")"));
                        stack.push(E::N)
                    }
                    E::A(Some(0)) => f.write_str("]")?,
                    E::A(Some(1)) => {
                        stack.push(E::A(Some(0)));
                        stack.push(E::N)
                    }
                    E::A(Some(n)) => {
                        stack.push(E::A(Some(n - 1)));
                        stack.push(E::S(", "));
                        stack.push(E::N)
                    }
                    E::A(None) => match iter.peek() {
                        None => {
                            write!(f, " !!! indefinite array not closed")?;
                            return Ok(())
                        }
                        Some(Ok(Token::Break)) => {
                            iter.next();
                            f.write_str("]")?
                        }
                        _ => {
                            stack.push(E::A(None));
                            stack.push(E::X(", "));
                            stack.push(E::N)
                        }
                    }
                    E::M(Some(0)) => f.write_str("}")?,
                    E::M(Some(1)) => {
                        stack.push(E::M(Some(0)));
                        stack.push(E::N);
                        stack.push(E::S(": "));
                        stack.push(E::N)
                    }
                    E::M(Some(n)) => {
                        stack.push(E::M(Some(n - 1)));
                        stack.push(E::S(", "));
                        stack.push(E::N);
                        stack.push(E::S(": "));
                        stack.push(E::N)
                    }
                    E::M(None) => match iter.peek() {
                        None => {
                            write!(f, " !!! indefinite map not closed")?;
                            return Ok(())
                        }
                        Some(Ok(Token::Break)) => {
                            iter.next();
                            f.write_str("}")?
                        }
                        _ => {
                            stack.push(E::M(None));
                            stack.push(E::X(", "));
                            stack.push(E::N);
                            stack.push(E::S(": "));
                            stack.push(E::N)
                        }
                    }
                    E::B => match iter.peek() {
                        None => {
                            write!(f, " !!! indefinite byte string not closed")?;
                            return Ok(())
                        }
                        Some(Ok(Token::Break)) => {
                            iter.next();
                            f.write_str(")")?
                        }
                        _ => {
                            stack.push(E::B);
                            stack.push(E::X(", "));
                            stack.push(E::N)
                        }
                    }
                    E::D => match iter.peek() {
                        None => {
                            write!(f, " !!! indefinite string not closed")?;
                            return Ok(())
                        }
                        Some(Ok(Token::Break)) => {
                            iter.next();
                            f.write_str(")")?
                        }
                        _ => {
                            stack.push(E::D);
                            stack.push(E::X(", "));
                            stack.push(E::N)
                        }
                    }
                }
            }
        }

        Ok(())
    }
}

/// Pretty print a token.
///
/// Since we only show a single token we can not use diagnostic notation
/// as in the `Display` impl of [`Tokenizer`]. Instead, the following
/// syntax is used:
///
/// - Numeric values and booleans are displayed as in Rust. Floats are always
///   shown in scientific notation.
/// - Text strings are displayed in double quotes.
/// - Byte strings are displayed in single quotes prefixed with `h` and
///   hex-encoded, e.g. `h'01 02 ef'`.
/// - An array is displayed as `A[n]` where `n` denotes the number of elements.
///   The following `n` tokens are elements of this array.
/// - A map is displayed as `M[n]` where `n` denotes the number of pairs.
///   The following `n` tokens are entries of this map.
/// - Tags are displayed with `T(t)` where `t` is the tag number.
/// - Simple values are displayed as `simple(n)` where `n` denotes the numeric
///   value.
/// - Indefinite items start with:
///     * `?B[` for byte strings,
///     * `?S[` for text strings,
///     * `?A[` for arrays,
///     * `?M[` for maps,
///   and end with `]` when a `Token::Break` is encountered. All tokens
///   in between belong to the indefinite container.
/// - `Token::Null` is displayed as `null` and `Token::Undefined` as `undefined`.
impl fmt::Display for Token<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Token::Bool(b)     => write!(f, "{}", b),
            Token::U8(n)       => write!(f, "{}", n),
            Token::U16(n)      => write!(f, "{}", n),
            Token::U32(n)      => write!(f, "{}", n),
            Token::U64(n)      => write!(f, "{}", n),
            Token::I8(n)       => write!(f, "{}", n),
            Token::I16(n)      => write!(f, "{}", n),
            Token::I32(n)      => write!(f, "{}", n),
            Token::I64(n)      => write!(f, "{}", n),
            Token::Int(n)      => write!(f, "{}", n),
            Token::F16(n)      => write!(f, "{:e}", n),
            Token::F32(n)      => write!(f, "{:e}", n),
            Token::F64(n)      => write!(f, "{:e}", n),
            Token::String(n)   => write!(f, "\"{}\"", n),
            Token::Array(n)    => write!(f, "A[{}]", n),
            Token::Map(n)      => write!(f, "M[{}]", n),
            Token::Tag(t)      => write!(f, "T({})", t.numeric()),
            Token::Simple(n)   => write!(f, "simple({})", n),
            Token::Break       => f.write_str("]"),
            Token::Null        => f.write_str("null"),
            Token::Undefined   => f.write_str("undefined"),
            Token::BeginBytes  => f.write_str("?B["),
            Token::BeginString => f.write_str("?S["),
            Token::BeginArray  => f.write_str("?A["),
            Token::BeginMap    => f.write_str("?M["),
            Token::Bytes(b)    => {
                f.write_str("h'")?;
                let mut i = b.len();
                for x in *b {
                    if i > 1 {
                        write!(f, "{:02x} ", x)?
                    } else {
                        write!(f, "{:02x}", x)?
                    }
                    i -= 1;
                }
                f.write_str("'")
            }
        }
    }
}