1use base64::Engine as _;
2use bc_ur::prelude::*;
3use known_values::KnownValue;
4use logos::{ Lexer, Logos, Span };
5use thiserror::Error;
6
7#[derive(Debug, Error, Clone, PartialEq)]
8#[rustfmt::skip]
9pub enum Error {
10 #[error("Empty input")]
11 EmptyInput,
12 #[error("Unexpected end of input")]
13 UnexpectedEndOfInput,
14 #[error("Extra data at end of input")]
15 ExtraData(Span),
16 #[error("Unexpected token {0:?}")]
17 UnexpectedToken(Box<Token>, Span),
18 #[error("Unrecognized token")]
19 UnrecognizedToken(Span),
20 #[error("Expected comma")]
21 ExpectedComma(Span),
22 #[error("Expected colon")]
23 ExpectedColon(Span),
24 #[error("Unmatched parentheses")]
25 UnmatchedParentheses(Span),
26 #[error("Unmatched braces")]
27 UnmatchedBraces(Span),
28 #[error("Expected map key")]
29 ExpectedMapKey(Span),
30 #[error("Invalid tag value '{0}'")]
31 InvalidTagValue(String, Span),
32 #[error("Unknown tag name '{0}'")]
33 UnknownTagName(String, Span),
34 #[error("Invalid hex string")]
35 InvalidHexString(Span),
36 #[error("Invalid base64 string")]
37 InvalidBase64String(Span),
38 #[error("Unknown UR type '{0}'")]
39 UnknownUrType(String, Span),
40 #[error("Invalid UR '{0}'")]
41 InvalidUr(String, Span),
42 #[error("Invalid known value '{0}'")]
43 InvalidKnownValue(String, Span),
44 #[error("Unknown known value name '{0}'")]
45 UnknownKnownValueName(String, Span),
46}
47
48impl Error {
49 pub fn is_default(&self) -> bool {
50 matches!(self, Error::UnrecognizedToken(_))
51 }
52
53 fn format_message(message: &dyn ToString, source: &str, range: &Span) -> String {
54 let message = message.to_string();
55 let start = range.start;
56 let end = range.end;
57 let mut line_number = 1;
59 let mut line_start = 0;
60 for (idx, ch) in source.char_indices() {
61 if idx >= start {
62 break;
63 }
64 if ch == '\n' {
65 line_number += 1;
66 line_start = idx + 1;
67 }
68 }
69 let line = source
71 .lines()
72 .nth(line_number - 1)
73 .unwrap_or("");
74 let column = start.saturating_sub(line_start);
76 let underline_len = end.saturating_sub(start).max(1);
78 let caret = " ".repeat(column) + &"^".repeat(underline_len);
79 format!("line {line_number}: {message}\n{line}\n{caret}")
80 }
81
82 #[rustfmt::skip]
83 pub fn full_message(&self, source: &str) -> String {
84 match self {
85 Error::EmptyInput => Self::format_message(self, source, &Span::default()),
86 Error::UnexpectedEndOfInput => Self::format_message(self, source, &(source.len()..source.len())),
87 Error::ExtraData(range) => Self::format_message(self, source, range),
88 Error::UnexpectedToken(_, range) => Self::format_message(self, source, range),
89 Error::UnrecognizedToken(range) => Self::format_message(self, source, range),
90 Error::UnknownUrType(_, range) => Self::format_message(self, source, range),
91 Error::UnmatchedParentheses(range) => Self::format_message(self, source, range),
92 Error::ExpectedComma(range) => Self::format_message(self, source, range),
93 Error::ExpectedColon(range) => Self::format_message(self, source, range),
94 Error::ExpectedMapKey(range) => Self::format_message(self, source, range),
95 Error::UnmatchedBraces(range) => Self::format_message(self, source, range),
96 Error::UnknownTagName(_, range) => Self::format_message(self, source, range),
97 Error::InvalidHexString(range) => Self::format_message(self, source, range),
98 Error::InvalidBase64String(range) => Self::format_message(self, source, range),
99 Error::InvalidTagValue(_, range) => Self::format_message(self, source, range),
100 Error::InvalidUr(_, range) => Self::format_message(self, source, range),
101 Error::InvalidKnownValue(_, range) => Self::format_message(self, source, range),
102 Error::UnknownKnownValueName(_, range) => Self::format_message(self, source, range),
103 }
104 }
105}
106
107impl Default for Error {
108 fn default() -> Self {
109 Error::UnrecognizedToken(Span::default())
110 }
111}
112
113pub type Result<T> = std::result::Result<T, Error>;
114
115pub fn parse_dcbor_item(src: &str) -> Result<CBOR> {
145 let mut lexer = Token::lexer(src);
146 let first_token = expect_token(&mut lexer);
147 match first_token {
148 Ok(token) => {
149 parse_item_token(&token, &mut lexer).and_then(|cbor| {
150 if lexer.next().is_some() { Err(Error::ExtraData(lexer.span())) } else { Ok(cbor) }
151 })
152 }
153 Err(e) => {
154 if e == Error::UnexpectedEndOfInput {
155 return Err(Error::EmptyInput);
156 }
157 return Err(e);
158 }
159 }
160}
161
162fn parse_item(lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
167 let token = expect_token(lexer)?;
168 parse_item_token(&token, lexer)
169}
170
171fn expect_token(lexer: &mut Lexer<'_, Token>) -> Result<Token> {
172 let span = lexer.span();
173 match lexer.next() {
174 Some(token_or_err) => {
175 match token_or_err {
176 Ok(token) => { Ok(token) }
177 Err(e) => {
178 if e.is_default() { Err(Error::UnrecognizedToken(span)) } else { Err(e) }
179 }
180 }
181 }
182 None => Err(Error::UnexpectedEndOfInput),
183 }
184}
185
186fn parse_item_token(token: &Token, lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
187 if let Token::ByteStringHex(Err(e)) = token {
189 return Err(e.clone());
190 }
191 if let Token::ByteStringBase64(Err(e)) = token {
192 return Err(e.clone());
193 }
194 if let Token::TagValue(Err(e)) = token {
195 return Err(e.clone());
196 }
197 if let Token::UR(Err(e)) = token {
198 return Err(e.clone());
199 }
200 if let Token::KnownValueNumber(Err(e)) = token {
201 return Err(e.clone());
202 }
203
204 match token {
205 Token::Bool(b) => Ok((*b).into()),
206 Token::Null => Ok(CBOR::null()),
207 Token::ByteStringHex(Ok(bytes)) => Ok(CBOR::to_byte_string(bytes)),
208 Token::ByteStringBase64(Ok(bytes)) => Ok(CBOR::to_byte_string(bytes)),
209 Token::Number(num) => Ok((*num).into()),
210 Token::NaN => Ok(f64::NAN.into()),
211 Token::Infinity => Ok(f64::INFINITY.into()),
212 Token::NegInfinity => Ok(f64::NEG_INFINITY.into()),
213 Token::String(s) => parse_string(s, lexer.span()),
214 Token::UR(Ok(ur)) => parse_ur(ur, lexer.span()),
215 Token::TagValue(Ok(tag_value)) => parse_number_tag(*tag_value, lexer),
216 Token::TagName(name) => parse_name_tag(&name, lexer),
217 Token::KnownValueNumber(Ok(value)) => Ok(KnownValue::new(*value).into()),
218 Token::KnownValueName(name) => {
219 if let Some(known_value) = known_value_for_name(&name) {
220 Ok(known_value.into())
221 } else {
222 let span = lexer.span().start + 1..lexer.span().end - 1;
223 Err(Error::UnknownKnownValueName(name.clone(), span))
224 }
225 }
226 Token::Unit => Ok(KnownValue::new(0).into()),
227 Token::BracketOpen => parse_array(lexer),
228 Token::BraceOpen => parse_map(lexer),
229 _ => Err(Error::UnexpectedToken(Box::new(token.clone()), lexer.span())),
230 }
231}
232
233fn parse_string(s: &str, span: Span) -> Result<CBOR> {
234 if s.starts_with('"') && s.ends_with('"') {
235 let s = &s[1..s.len() - 1];
236 Ok(s.into())
237 } else {
238 Err(Error::UnrecognizedToken(span))
239 }
240}
241
242fn tag_for_name(name: &str) -> Option<Tag> {
243 with_tags!(|tags: &TagsStore| tags.tag_for_name(name))
244}
245
246fn known_value_for_name(name: &str) -> Option<KnownValue> {
247 let binding = known_values::KNOWN_VALUES.get();
248 let known_values = binding.as_ref().unwrap();
249 known_values.known_value_named(name).cloned()
250}
251
252fn parse_ur(ur: &UR, span: Span) -> Result<CBOR> {
253 let ur_type = ur.ur_type_str();
254 if let Some(tag) = tag_for_name(ur_type) {
255 Ok(CBOR::to_tagged_value(tag, ur.cbor()))
256 } else {
257 Err(
258 Error::UnknownUrType(
259 ur_type.to_string(),
260 span.start + 3..span.start + 3 + ur_type.len()
261 )
262 )
263 }
264}
265
266fn parse_number_tag(tag_value: TagValue, lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
267 let item = parse_item(lexer)?;
268 match expect_token(lexer) {
269 Ok(Token::ParenthesisClose) => Ok(CBOR::to_tagged_value(tag_value, item)),
270 Ok(_) => Err(Error::UnmatchedParentheses(lexer.span())),
271 Err(e) => {
272 if e == Error::UnexpectedEndOfInput {
273 return Err(Error::UnmatchedParentheses(lexer.span()));
274 }
275 return Err(e);
276 }
277 }
278}
279
280fn parse_name_tag(name: &str, lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
281 let span = lexer.span().start..lexer.span().end - 1;
282 let item = parse_item(lexer)?;
283 match expect_token(lexer)? {
284 Token::ParenthesisClose => {
285 if let Some(tag) = tag_for_name(name) {
286 Ok(CBOR::to_tagged_value(tag, item))
287 } else {
288 Err(Error::UnknownTagName(name.to_string(), span))
289 }
290 }
291 _ => { Err(Error::UnmatchedParentheses(lexer.span())) }
292 }
293}
294
295fn parse_array(lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
296 let mut items = Vec::new();
297 let mut awaits_comma = false;
298 let mut awaits_item = false;
299
300 loop {
301 match expect_token(lexer)? {
302 Token::Bool(b) if !awaits_comma => {
303 items.push(b.into());
304 awaits_item = false;
305 }
306 Token::Null if !awaits_comma => {
307 items.push(CBOR::null());
308 awaits_item = false;
309 }
310 Token::ByteStringHex(Ok(bytes)) if !awaits_comma => {
311 items.push(CBOR::to_byte_string(bytes));
312 awaits_item = false;
313 }
314 Token::ByteStringBase64(Ok(bytes)) if !awaits_comma => {
315 items.push(CBOR::to_byte_string(bytes));
316 awaits_item = false;
317 }
318 Token::Number(num) if !awaits_comma => {
319 items.push(num.into());
320 awaits_item = false;
321 }
322 Token::NaN if !awaits_comma => {
323 items.push(f64::NAN.into());
324 awaits_item = false;
325 }
326 Token::Infinity if !awaits_comma => {
327 items.push(f64::INFINITY.into());
328 awaits_item = false;
329 }
330 Token::NegInfinity if !awaits_comma => {
331 items.push(f64::NEG_INFINITY.into());
332 awaits_item = false;
333 }
334 Token::String(s) if !awaits_comma => {
335 items.push(parse_string(&s, lexer.span())?);
336 awaits_item = false;
337 }
338 Token::UR(Ok(ur)) if !awaits_comma => {
339 items.push(parse_ur(&ur, lexer.span())?);
340 awaits_item = false;
341 }
342 Token::TagValue(Ok(tag_value)) if !awaits_comma => {
343 items.push(parse_number_tag(tag_value, lexer)?);
344 awaits_item = false;
345 }
346 Token::TagName(name) if !awaits_comma => {
347 items.push(parse_name_tag(&name, lexer)?);
348 awaits_item = false;
349 }
350 Token::KnownValueNumber(Ok(value)) if !awaits_comma => {
351 items.push(KnownValue::new(value).into());
352 awaits_item = false;
353 }
354 Token::KnownValueName(name) if !awaits_comma => {
355 if let Some(known_value) = known_value_for_name(&name) {
356 items.push(known_value.into());
357 } else {
358 return Err(Error::UnknownKnownValueName(name, lexer.span()));
359 }
360 awaits_item = false;
361 }
362 Token::BracketOpen if !awaits_comma => {
363 items.push(parse_array(lexer)?);
364 awaits_item = false;
365 }
366 Token::BraceOpen if !awaits_comma => {
367 items.push(parse_map(lexer)?);
368 awaits_item = false;
369 }
370 Token::Comma if awaits_comma => {
371 awaits_item = true;
372 }
373 Token::BracketClose if !awaits_item => {
374 return Ok(items.into());
375 }
376 token => {
377 if awaits_comma {
378 return Err(Error::ExpectedComma(lexer.span()));
379 }
380 return Err(Error::UnexpectedToken(Box::new(token), lexer.span()));
381 }
382 }
383 awaits_comma = !awaits_item;
384 }
385}
386
387fn parse_map(lexer: &mut Lexer<'_, Token>) -> Result<CBOR> {
388 let mut map = Map::new();
389 let mut awaits_comma = false;
390 let mut awaits_key = false;
391
392 loop {
393 let token = match expect_token(lexer) {
394 Ok(tok) => tok,
395 Err(e) if e == Error::UnexpectedEndOfInput => {
396 return Err(Error::UnmatchedBraces(lexer.span()));
397 }
398 Err(e) => {
399 return Err(e);
400 }
401 };
402 match token {
403 Token::BraceClose if !awaits_key => {
404 return Ok(map.into());
405 }
406 Token::Comma if awaits_comma => {
407 awaits_key = true;
408 }
409 _ => {
410 if awaits_comma {
411 return Err(Error::ExpectedComma(lexer.span()));
412 }
413 let key = parse_item_token(&token, lexer)?;
414 if let Some(Token::Colon) = expect_token(lexer).ok() {
415 let value = match parse_item(lexer) {
416 Err(Error::UnexpectedToken(token, span)) if *token == Token::BraceClose => {
417 return Err(Error::ExpectedMapKey(span));
418 }
419 other => other?,
420 };
421 map.insert(key, value);
422 awaits_key = false;
423 } else {
424 return Err(Error::ExpectedColon(lexer.span()));
425 }
426 }
427 }
428 awaits_comma = !awaits_key;
429 }
430}
431
432#[derive(Debug, Clone, Logos, PartialEq)]
433#[rustfmt::skip]
434#[logos(error = Error)]
435#[logos(skip r"(?:[ \t\r\n\f]|/[^/]*/|#[^\n]*)+")]
436pub enum Token {
437 #[token("false", |_| false)]
438 #[token("true", |_| true)]
439 Bool(bool),
440
441 #[token("{")]
442 BraceOpen,
443
444 #[token("}")]
445 BraceClose,
446
447 #[token("[")]
448 BracketOpen,
449
450 #[token("]")]
451 BracketClose,
452
453 #[token("(")]
454 ParenthesisOpen,
455
456 #[token(")")]
457 ParenthesisClose,
458
459 #[token(":")]
460 Colon,
461
462 #[token(",")]
463 Comma,
464
465 #[token("null")]
466 Null,
467
468 #[token("NaN")]
469 NaN,
470
471 #[token("Infinity")]
472 Infinity,
473
474 #[token("-Infinity")]
475 NegInfinity,
476
477 #[regex(r"h'[0-9a-fA-F]*'", |lex| {
479 let hex = lex.slice();
480 let raw_hex = hex[2..hex.len() - 1].as_bytes();
481 if raw_hex.len() % 2 != 0 {
482 return Err(Error::InvalidHexString(lex.span()));
483 }
484 hex::decode(raw_hex)
485 .map_err(|_|
486 Error::InvalidHexString(lex.span())
487 )
488 })]
489 ByteStringHex(Result<Vec<u8>>),
490
491 #[regex(r"b64'([A-Za-z0-9+/=]{2,})'", |lex| {
493 let base64 = lex.slice();
494 let s = &base64[4..base64.len() - 1];
495 base64::engine::general_purpose::STANDARD
496 .decode(s)
497 .map_err(|_| Error::InvalidBase64String(lex.span()))
498 })]
499 ByteStringBase64(Result<Vec<u8>>),
500
501 #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex|
503 lex.slice().parse::<f64>().unwrap()
504 )]
505 Number(f64),
506
507 #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex|
509 lex.slice().to_owned()
510 )]
511 String(String),
512
513 #[regex(r#"0\(|[1-9][0-9]*\("#, |lex|
515 let span = (lex.span().start)..(lex.span().end - 1);
516 let stripped = lex.slice().strip_suffix('(').unwrap();
517 stripped.parse::<TagValue>().map_err(|_|
518 Error::InvalidTagValue(stripped.to_string(), span)
519 )
520 )]
521 TagValue(Result<TagValue>),
522
523 #[regex(r#"[a-zA-Z_][a-zA-Z0-9_-]*\("#, |lex|
525 lex.slice()[..lex.slice().len()-1].to_string()
527 )]
528 TagName(String),
529
530 #[regex(r#"'0'|'[1-9][0-9]*'"#, |lex|
532 let span = (lex.span().start + 1)..(lex.span().end - 1);
533 let slice = lex.slice();
534 let stripped = slice[1..slice.len() - 1].to_string();
535 stripped.parse::<TagValue>().map_err(|_|
536 Error::InvalidKnownValue(stripped, span)
537 )
538 )]
539 KnownValueNumber(Result<u64>),
540
541 #[regex(r#"''|'[a-zA-Z_][a-zA-Z0-9_-]*'"#, |lex|
544 lex.slice()[1..lex.slice().len()-1].to_string()
545 )]
546 KnownValueName(String),
547
548 #[token("Unit")]
550 Unit,
551
552 #[regex(r#"ur:([a-zA-Z0-9][a-zA-Z0-9-]*)/([a-zA-Z]{8,})"#, |lex|
553 let s = lex.slice();
554 let ur = UR::from_ur_string(s);
555 ur.map_err(|e| {
556 Error::InvalidUr(e.to_string(), lex.span())
557 })
558 )]
559 UR(Result<UR>),
560}