dcbor_parse/
token.rs

1use base64::Engine as _;
2use bc_ur::prelude::*;
3use logos::Logos;
4
5use crate::error::{Error, Result};
6
7#[derive(Debug, Clone, Logos, PartialEq)]
8#[rustfmt::skip]
9#[logos(error = Error)]
10#[logos(skip r"(?:[ \t\r\n\f]|/[^/]*/|#[^\n]*)+")]
11pub enum Token {
12    #[token("false", |_| false)]
13    #[token("true", |_| true)]
14    Bool(bool),
15
16    #[token("{")]
17    BraceOpen,
18
19    #[token("}")]
20    BraceClose,
21
22    #[token("[")]
23    BracketOpen,
24
25    #[token("]")]
26    BracketClose,
27
28    #[token("(")]
29    ParenthesisOpen,
30
31    #[token(")")]
32    ParenthesisClose,
33
34    #[token(":")]
35    Colon,
36
37    #[token(",")]
38    Comma,
39
40    #[token("null")]
41    Null,
42
43    #[token("NaN")]
44    NaN,
45
46    #[token("Infinity")]
47    Infinity,
48
49    #[token("-Infinity")]
50    NegInfinity,
51
52    /// Binary string in hex format.
53    #[regex(r"h'[0-9a-fA-F]*'", |lex| {
54        let hex = lex.slice();
55        let raw_hex = hex[2..hex.len() - 1].as_bytes();
56        if raw_hex.len() % 2 != 0 {
57            return Err(Error::InvalidHexString(lex.span()));
58        }
59        hex::decode(raw_hex)
60            .map_err(|_|
61                Error::InvalidHexString(lex.span())
62            )
63    })]
64    ByteStringHex(Result<Vec<u8>>),
65
66    /// Binary string in base64 format.
67    #[regex(r"b64'([A-Za-z0-9+/=]{2,})'", |lex| {
68        let base64 = lex.slice();
69        let s = &base64[4..base64.len() - 1];
70        base64::engine::general_purpose::STANDARD
71        .decode(s)
72        .map_err(|_| Error::InvalidBase64String(lex.span()))
73    })]
74    ByteStringBase64(Result<Vec<u8>>),
75
76    /// JavaScript-style number.
77    #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex|
78        lex.slice().parse::<f64>().unwrap()
79    )]
80    Number(f64),
81
82    /// JavaScript-style string.
83    #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex|
84        lex.slice().to_owned()
85    )]
86    String(String),
87
88    /// Integer followed immediately by an opening parenthesis.
89    #[regex(r#"0\(|[1-9][0-9]*\("#, |lex|
90        let span = (lex.span().start)..(lex.span().end - 1);
91        let stripped = lex.slice().strip_suffix('(').unwrap();
92        stripped.parse::<TagValue>().map_err(|_|
93                Error::InvalidTagValue(stripped.to_string(), span)
94            )
95    )]
96    TagValue(Result<TagValue>),
97
98    /// Tag name followed immediately by an opening parenthesis.
99    #[regex(r#"[a-zA-Z_][a-zA-Z0-9_-]*\("#, |lex|
100        // safe to drop the trailing '('
101        lex.slice()[..lex.slice().len()-1].to_string()
102    )]
103    TagName(String),
104
105    /// Integer (same regex as TagValue) enclosed in single quotes.
106    #[regex(r#"'0'|'[1-9][0-9]*'"#, |lex|
107        let span = (lex.span().start + 1)..(lex.span().end - 1);
108        let slice = lex.slice();
109        let stripped = slice[1..slice.len() - 1].to_string();
110        stripped.parse::<TagValue>().map_err(|_|
111                Error::InvalidKnownValue(stripped, span)
112            )
113    )]
114    KnownValueNumber(Result<u64>),
115
116    /// Single-quoted empty string (i.e., `''`) (Unit) or Identifier (same regex
117    /// as for tag names) enclosed in single quotes.
118    #[regex(r#"''|'[a-zA-Z_][a-zA-Z0-9_-]*'"#, |lex|
119        lex.slice()[1..lex.slice().len()-1].to_string()
120    )]
121    KnownValueName(String),
122
123    /// The _unit_ known value `40000(0)`.
124    #[token("Unit")]
125    Unit,
126
127    #[regex(r#"ur:([a-zA-Z0-9][a-zA-Z0-9-]*)/([a-zA-Z]{8,})"#, |lex|
128        let s = lex.slice();
129        let ur = UR::from_ur_string(s);
130        ur.map_err(|e| {
131            Error::InvalidUr(e.to_string(), lex.span())
132        })
133    )]
134    UR(Result<UR>),
135}