Skip to main content

dcbor_parse/
token.rs

1use base64::Engine as _;
2use bc_ur::prelude::*;
3use logos::Logos;
4
5use crate::error::{Error, Result};
6
7#[derive(Debug, Clone, Logos, PartialEq)]
8#[rustfmt::skip]
9#[logos(error = Error)]
10#[logos(skip r"(?:[ \t\r\n\f]|/[^/]*/|#[^\n]*)+")]
11pub enum Token {
12    #[token("false", |_| false)]
13    #[token("true", |_| true)]
14    Bool(bool),
15
16    #[token("{")]
17    BraceOpen,
18
19    #[token("}")]
20    BraceClose,
21
22    #[token("[")]
23    BracketOpen,
24
25    #[token("]")]
26    BracketClose,
27
28    #[token("(")]
29    ParenthesisOpen,
30
31    #[token(")")]
32    ParenthesisClose,
33
34    #[token(":")]
35    Colon,
36
37    #[token(",")]
38    Comma,
39
40    #[token("null")]
41    Null,
42
43    #[token("NaN")]
44    NaN,
45
46    #[token("Infinity")]
47    Infinity,
48
49    #[token("-Infinity")]
50    NegInfinity,
51
52    /// Binary string in hex format.
53    #[regex(r"h'[0-9a-fA-F]*'", |lex| {
54        let hex = lex.slice();
55        let raw_hex = &hex.as_bytes()[2..hex.len() - 1];
56        if !raw_hex.len().is_multiple_of(2) {
57            return Err(Error::InvalidHexString(lex.span()));
58        }
59        hex::decode(raw_hex)
60            .map_err(|_|
61                Error::InvalidHexString(lex.span())
62            )
63    })]
64    ByteStringHex(Result<Vec<u8>>),
65
66    /// Binary string in base64 format.
67    #[cfg(not(feature = "simplified-patterns"))]
68    #[regex(r"b64'([A-Za-z0-9+/=]{2,})'", |lex| {
69        let base64 = lex.slice();
70        let s = &base64[4..base64.len() - 1];
71        base64::engine::general_purpose::STANDARD
72        .decode(s)
73        .map_err(|_| Error::InvalidBase64String(lex.span()))
74    })]
75    ByteStringBase64(Result<Vec<u8>>),
76
77    /// Binary string in base64 format (simplified for IDE).
78    #[cfg(feature = "simplified-patterns")]
79    #[regex(r"b64'[A-Za-z0-9+/=]*'", |lex| {
80        let base64 = lex.slice();
81        let s = &base64[4..base64.len() - 1];
82        base64::engine::general_purpose::STANDARD
83        .decode(s)
84        .map_err(|_| Error::InvalidBase64String(lex.span()))
85    })]
86    ByteStringBase64(Result<Vec<u8>>),
87
88    /// ISO-8601 date literal (date-only or date-time).
89    #[cfg(not(feature = "simplified-patterns"))]
90    #[regex(r"\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)?", |lex| {
91        let date_str = lex.slice();
92        Date::from_string(date_str).map_err(|_| {
93            Error::InvalidDateString(date_str.to_string(), lex.span())
94        })
95    })]
96    DateLiteral(Result<Date>),
97
98    /// ISO-8601 date literal (simplified for IDE).
99    #[cfg(feature = "simplified-patterns")]
100    #[regex(r"\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2})?", |lex| {
101        let date_str = lex.slice();
102        Date::from_string(date_str).map_err(|_| {
103            Error::InvalidDateString(date_str.to_string(), lex.span())
104        })
105    })]
106    DateLiteral(Result<Date>),
107
108    /// JavaScript-style number.
109    #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex|
110        lex.slice().parse::<f64>().unwrap()
111    )]
112    Number(f64),
113
114    /// JavaScript-style string.
115    #[cfg(not(feature = "simplified-patterns"))]
116    #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex|
117        lex.slice().to_owned()
118    )]
119    String(String),
120
121    /// JavaScript-style string (simplified for IDE).
122    #[cfg(feature = "simplified-patterns")]
123    #[regex(r#""[^"]*""#, |lex|
124        lex.slice().to_owned()
125    )]
126    String(String),
127
128    /// Integer followed immediately by an opening parenthesis.
129    #[regex(r#"0\(|[1-9][0-9]*\("#, |lex|
130        let span = (lex.span().start)..(lex.span().end - 1);
131        let stripped = lex.slice().strip_suffix('(').unwrap();
132        stripped.parse::<TagValue>().map_err(|_|
133                Error::InvalidTagValue(stripped.to_string(), span)
134            )
135    )]
136    TagValue(Result<TagValue>),
137
138    /// Tag name followed immediately by an opening parenthesis.
139    #[regex(r#"[a-zA-Z_][a-zA-Z0-9_-]*\("#, |lex|
140        // safe to drop the trailing '('
141        lex.slice()[..lex.slice().len()-1].to_string()
142    )]
143    TagName(String),
144
145    /// Integer (same regex as TagValue) enclosed in single quotes.
146    #[regex(r#"'0'|'[1-9][0-9]*'"#, |lex|
147        let span = (lex.span().start + 1)..(lex.span().end - 1);
148        let slice = lex.slice();
149        let stripped = slice[1..slice.len() - 1].to_string();
150        stripped.parse::<TagValue>().map_err(|_|
151                Error::InvalidKnownValue(stripped, span)
152            )
153    )]
154    KnownValueNumber(Result<u64>),
155
156    /// Single-quoted empty string (i.e., `''`) (Unit) or Identifier (same regex
157    /// as for tag names) enclosed in single quotes.
158    #[regex(r#"''|'[a-zA-Z_][a-zA-Z0-9_-]*'"#, |lex|
159        lex.slice()[1..lex.slice().len()-1].to_string()
160    )]
161    KnownValueName(String),
162
163    /// The _unit_ known value `40000(0)`.
164    #[token("Unit")]
165    Unit,
166
167    #[regex(r#"ur:([a-zA-Z0-9][a-zA-Z0-9-]*)/([a-zA-Z]{8,})"#, |lex|
168        let s = lex.slice();
169        let ur = UR::from_ur_string(s);
170        ur.map_err(|e| {
171            Error::InvalidUr(e.to_string(), lex.span())
172        })
173    )]
174    UR(Result<UR>),
175}