Skip to main content

dcbor_parse/
parse.rs

1use bc_ur::prelude::*;
2use known_values::KnownValue;
3use logos::{Lexer, Logos, Span};
4
5use crate::{
6    Token,
7    error::{Error, Result},
8};
9
10/// Parses a dCBOR item from a string input.
11///
12/// This function takes a string slice containing a dCBOR diagnostic notation
13/// encoded value and attempts to parse it into a `CBOR` object. If the input
14/// contains extra tokens after a valid item, an error is returned.
15///
16/// # Arguments
17///
18/// * `src` - A string slice containing the dCBOR-encoded data.
19///
20/// # Returns
21///
22/// * `Ok(CBOR)` if parsing is successful and the input contains exactly one
23///   valid dCBOR item, which itself might be an atomic value like a number or
24///   string, or a complex value like an array or map.
25/// * `Err(Error)` if parsing fails or if extra tokens are found after the item.
26///
27/// # Errors
28///
29/// Returns an error if the input is invalid, contains extra tokens, or if any
30/// token cannot be parsed as expected.
31///
32/// # Example
33///
34/// ```rust
35/// # use dcbor_parse::parse_dcbor_item;
36/// let cbor = parse_dcbor_item("[1, 2, 3]").unwrap();
37/// assert_eq!(cbor.diagnostic(), "[1, 2, 3]");
38/// ```
39pub fn parse_dcbor_item(src: &str) -> Result<CBOR> {
40    let mut lexer = Token::lexer(src);
41    let first_token = expect_token(&mut lexer);
42    match first_token {
43        Ok(token) => parse_item_token(&token, &mut lexer).and_then(|cbor| {
44            if lexer.next().is_some() {
45                Err(Error::ExtraData(lexer.span()))
46            } else {
47                Ok(cbor)
48            }
49        }),
50        Err(e) => {
51            if e == Error::UnexpectedEndOfInput {
52                return Err(Error::EmptyInput);
53            }
54            Err(e)
55        }
56    }
57}
58
59/// Parses a dCBOR item from the beginning of a string and returns the parsed
60/// [`CBOR`] along with the number of bytes consumed.
61///
62/// Unlike [`parse_dcbor_item`], this function succeeds even if additional
63/// characters follow the first item. The returned index points to the first
64/// unparsed character after skipping any trailing whitespace or comments.
65///
66/// # Example
67///
68/// ```rust
69/// # use dcbor_parse::parse_dcbor_item_partial;
70/// # use dcbor::prelude::*;
71/// let (cbor, used) = parse_dcbor_item_partial("true )").unwrap();
72/// assert_eq!(cbor, CBOR::from(true));
73/// assert_eq!(used, 5);
74/// ```
75pub fn parse_dcbor_item_partial(src: &str) -> Result<(CBOR, usize)> {
76    let mut lexer = Token::lexer(src);
77    let first_token = expect_token(&mut lexer);
78    match first_token {
79        Ok(token) => parse_item_token(&token, &mut lexer).map(|cbor| {
80            let consumed = match lexer.next() {
81                Some(_) => lexer.span().start,
82                None => src.len(),
83            };
84            (cbor, consumed)
85        }),
86        Err(e) => {
87            if e == Error::UnexpectedEndOfInput {
88                Err(Error::EmptyInput)
89            } else {
90                Err(e)
91            }
92        }
93    }
94}
95
96//
97// === Private Functions ===
98//
99
100fn parse_item(lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
101    let token = expect_token(lexer)?;
102    parse_item_token(&token, lexer)
103}
104
105fn expect_token(lexer: &mut Lexer<'_, Token>) -> Result<Token> {
106    let span = lexer.span();
107    match lexer.next() {
108        Some(token_or_err) => match token_or_err {
109            Ok(token) => Ok(token),
110            Err(e) => {
111                if e.is_default() {
112                    Err(Error::UnrecognizedToken(span))
113                } else {
114                    Err(e)
115                }
116            }
117        },
118        None => Err(Error::UnexpectedEndOfInput),
119    }
120}
121
122fn parse_item_token(
123    token: &Token,
124    lexer: &mut Lexer<'_, Token>,
125) -> Result<CBOR> {
126    // Handle embedded lexing errors in token payloads
127    if let Token::ByteStringHex(Err(e)) = token {
128        return Err(e.clone());
129    }
130    if let Token::ByteStringBase64(Err(e)) = token {
131        return Err(e.clone());
132    }
133    if let Token::DateLiteral(Err(e)) = token {
134        return Err(e.clone());
135    }
136    if let Token::TagValue(Err(e)) = token {
137        return Err(e.clone());
138    }
139    if let Token::UR(Err(e)) = token {
140        return Err(e.clone());
141    }
142    if let Token::KnownValueNumber(Err(e)) = token {
143        return Err(e.clone());
144    }
145
146    match token {
147        Token::Bool(b) => Ok((*b).into()),
148        Token::Null => Ok(CBOR::null()),
149        Token::ByteStringHex(Ok(bytes)) => Ok(CBOR::to_byte_string(bytes)),
150        Token::ByteStringBase64(Ok(bytes)) => Ok(CBOR::to_byte_string(bytes)),
151        Token::DateLiteral(Ok(date)) => Ok((*date).into()),
152        Token::Number(num) => Ok((*num).into()),
153        Token::NaN => Ok(f64::NAN.into()),
154        Token::Infinity => Ok(f64::INFINITY.into()),
155        Token::NegInfinity => Ok(f64::NEG_INFINITY.into()),
156        Token::String(s) => parse_string(s, lexer.span()),
157        Token::UR(Ok(ur)) => parse_ur(ur, lexer.span()),
158        Token::TagValue(Ok(tag_value)) => parse_number_tag(*tag_value, lexer),
159        Token::TagName(name) => parse_name_tag(name, lexer),
160        Token::KnownValueNumber(Ok(value)) => {
161            Ok(KnownValue::new(*value).into())
162        }
163        Token::KnownValueName(name) => {
164            if let Some(known_value) = known_value_for_name(name) {
165                Ok(known_value.into())
166            } else {
167                let span = lexer.span().start + 1..lexer.span().end - 1;
168                Err(Error::UnknownKnownValueName(name.clone(), span))
169            }
170        }
171        Token::Unit => Ok(KnownValue::new(0).into()),
172        Token::BracketOpen => parse_array(lexer),
173        Token::BraceOpen => parse_map(lexer),
174        _ => Err(Error::UnexpectedToken(
175            Box::new(token.clone()),
176            lexer.span(),
177        )),
178    }
179}
180
181fn parse_string(s: &str, span: Span) -> Result<CBOR> {
182    if s.starts_with('"') && s.ends_with('"') {
183        let s = &s[1..s.len() - 1];
184        Ok(s.into())
185    } else {
186        Err(Error::UnrecognizedToken(span))
187    }
188}
189
190fn tag_for_name(name: &str) -> Option<Tag> {
191    with_tags!(|tags: &TagsStore| tags.tag_for_name(name))
192}
193
194fn known_value_for_name(name: &str) -> Option<KnownValue> {
195    let binding = known_values::KNOWN_VALUES.get();
196    let known_values = binding.as_ref().unwrap();
197    known_values.known_value_named(name).cloned()
198}
199
200fn parse_ur(ur: &UR, span: Span) -> Result<CBOR> {
201    let ur_type = ur.ur_type_str();
202    if let Some(tag) = tag_for_name(ur_type) {
203        Ok(CBOR::to_tagged_value(tag, ur.cbor()))
204    } else {
205        Err(Error::UnknownUrType(
206            ur_type.to_string(),
207            span.start + 3..span.start + 3 + ur_type.len(),
208        ))
209    }
210}
211
212fn parse_number_tag(
213    tag_value: TagValue,
214    lexer: &mut Lexer<'_, Token>,
215) -> Result<CBOR> {
216    let item = parse_item(lexer)?;
217    match expect_token(lexer) {
218        Ok(Token::ParenthesisClose) => {
219            Ok(CBOR::to_tagged_value(tag_value, item))
220        }
221        Ok(_) => Err(Error::UnmatchedParentheses(lexer.span())),
222        Err(e) => {
223            if e == Error::UnexpectedEndOfInput {
224                return Err(Error::UnmatchedParentheses(lexer.span()));
225            }
226            Err(e)
227        }
228    }
229}
230
231fn parse_name_tag(name: &str, lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
232    let span = lexer.span().start..lexer.span().end - 1;
233    let item = parse_item(lexer)?;
234    match expect_token(lexer)? {
235        Token::ParenthesisClose => {
236            if let Some(tag) = tag_for_name(name) {
237                Ok(CBOR::to_tagged_value(tag, item))
238            } else {
239                Err(Error::UnknownTagName(name.to_string(), span))
240            }
241        }
242        _ => Err(Error::UnmatchedParentheses(lexer.span())),
243    }
244}
245
246fn parse_array(lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
247    let mut items = Vec::new();
248    let mut awaits_comma = false;
249    let mut awaits_item = false;
250
251    loop {
252        match expect_token(lexer)? {
253            Token::Bool(b) if !awaits_comma => {
254                items.push(b.into());
255                awaits_item = false;
256            }
257            Token::Null if !awaits_comma => {
258                items.push(CBOR::null());
259                awaits_item = false;
260            }
261            Token::ByteStringHex(Ok(bytes)) if !awaits_comma => {
262                items.push(CBOR::to_byte_string(bytes));
263                awaits_item = false;
264            }
265            Token::ByteStringBase64(Ok(bytes)) if !awaits_comma => {
266                items.push(CBOR::to_byte_string(bytes));
267                awaits_item = false;
268            }
269            Token::DateLiteral(Ok(date)) if !awaits_comma => {
270                items.push(date.into());
271                awaits_item = false;
272            }
273            Token::Number(num) if !awaits_comma => {
274                items.push(num.into());
275                awaits_item = false;
276            }
277            Token::NaN if !awaits_comma => {
278                items.push(f64::NAN.into());
279                awaits_item = false;
280            }
281            Token::Infinity if !awaits_comma => {
282                items.push(f64::INFINITY.into());
283                awaits_item = false;
284            }
285            Token::NegInfinity if !awaits_comma => {
286                items.push(f64::NEG_INFINITY.into());
287                awaits_item = false;
288            }
289            Token::String(s) if !awaits_comma => {
290                items.push(parse_string(&s, lexer.span())?);
291                awaits_item = false;
292            }
293            Token::UR(Ok(ur)) if !awaits_comma => {
294                items.push(parse_ur(&ur, lexer.span())?);
295                awaits_item = false;
296            }
297            Token::TagValue(Ok(tag_value)) if !awaits_comma => {
298                items.push(parse_number_tag(tag_value, lexer)?);
299                awaits_item = false;
300            }
301            Token::TagName(name) if !awaits_comma => {
302                items.push(parse_name_tag(&name, lexer)?);
303                awaits_item = false;
304            }
305            Token::KnownValueNumber(Ok(value)) if !awaits_comma => {
306                items.push(KnownValue::new(value).into());
307                awaits_item = false;
308            }
309            Token::KnownValueName(name) if !awaits_comma => {
310                if let Some(known_value) = known_value_for_name(&name) {
311                    items.push(known_value.into());
312                } else {
313                    return Err(Error::UnknownKnownValueName(
314                        name,
315                        lexer.span(),
316                    ));
317                }
318                awaits_item = false;
319            }
320            Token::BracketOpen if !awaits_comma => {
321                items.push(parse_array(lexer)?);
322                awaits_item = false;
323            }
324            Token::BraceOpen if !awaits_comma => {
325                items.push(parse_map(lexer)?);
326                awaits_item = false;
327            }
328            Token::Comma if awaits_comma => {
329                awaits_item = true;
330            }
331            Token::BracketClose if !awaits_item => {
332                return Ok(items.into());
333            }
334            token => {
335                if awaits_comma {
336                    return Err(Error::ExpectedComma(lexer.span()));
337                }
338                return Err(Error::UnexpectedToken(
339                    Box::new(token),
340                    lexer.span(),
341                ));
342            }
343        }
344        awaits_comma = !awaits_item;
345    }
346}
347
348fn parse_map(lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
349    let mut map = Map::new();
350    let mut awaits_comma = false;
351    let mut awaits_key = false;
352
353    loop {
354        let token = match expect_token(lexer) {
355            Ok(tok) => tok,
356            Err(Error::UnexpectedEndOfInput) => {
357                return Err(Error::UnmatchedBraces(lexer.span()));
358            }
359            Err(e) => {
360                return Err(e);
361            }
362        };
363        match token {
364            Token::BraceClose if !awaits_key => {
365                return Ok(map.into());
366            }
367            Token::Comma if awaits_comma => {
368                awaits_key = true;
369            }
370            _ => {
371                if awaits_comma {
372                    return Err(Error::ExpectedComma(lexer.span()));
373                }
374                let key = parse_item_token(&token, lexer)?;
375                let key_span = lexer.span();
376
377                // Check for duplicate key
378                if map.contains_key(key.clone()) {
379                    return Err(Error::DuplicateMapKey(key_span));
380                }
381
382                if let Ok(Token::Colon) = expect_token(lexer) {
383                    let value = match parse_item(lexer) {
384                        Err(Error::UnexpectedToken(token, span))
385                            if *token == Token::BraceClose =>
386                        {
387                            return Err(Error::ExpectedMapKey(span));
388                        }
389                        other => other?,
390                    };
391                    map.insert(key, value);
392                    awaits_key = false;
393                } else {
394                    return Err(Error::ExpectedColon(lexer.span()));
395                }
396            }
397        }
398        awaits_comma = !awaits_key;
399    }
400}