Skip to main content

rustledger_parser/
logos_lexer.rs

1//! SIMD-accelerated lexer using Logos.
2//!
3//! This module provides a fast tokenizer for Beancount syntax using the Logos crate,
4//! which generates a DFA-based lexer with SIMD optimizations where available.
5
6use logos::Logos;
7use std::fmt;
8use std::ops::Range;
9
10/// A span in the source code (byte offsets).
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct Span {
13    /// Start byte offset (inclusive).
14    pub start: usize,
15    /// End byte offset (exclusive).
16    pub end: usize,
17}
18
19impl From<Range<usize>> for Span {
20    fn from(range: Range<usize>) -> Self {
21        Self {
22            start: range.start,
23            end: range.end,
24        }
25    }
26}
27
28impl From<Span> for Range<usize> {
29    fn from(span: Span) -> Self {
30        span.start..span.end
31    }
32}
33
34/// Token types produced by the Logos lexer.
35#[derive(Logos, Debug, Clone, PartialEq, Eq)]
36#[logos(skip r"[ \t]+")] // Skip horizontal whitespace (spaces and tabs)
37pub enum Token<'src> {
38    // ===== Literals =====
39    /// A date in YYYY-MM-DD or YYYY/MM/DD format.
40    #[regex(r"\d{4}[-/]\d{2}[-/]\d{2}")]
41    Date(&'src str),
42
43    /// A number with optional sign, thousands separators, and decimals.
44    /// Examples: 123, -456, 1,234.56, 1234.5678, .50, -.50, 1. (trailing decimal)
45    /// Python beancount accepts trailing decimal (e.g., "1." meaning "1.0").
46    #[regex(r"-?(\.\d+|(\d{1,3}(,\d{3})*|\d+)(\.\d*)?)")]
47    Number(&'src str),
48
49    /// A double-quoted string (handles escape sequences).
50    /// The slice includes the quotes.
51    #[regex(r#""([^"\\]|\\.)*""#)]
52    String(&'src str),
53
54    /// An account name like Assets:Bank:Checking, Aktiva:Bank:Girokonto, or Ciste-jmeni:Stav.
55    /// Must start with a capitalized word (account type prefix) and have at least one sub-account.
56    /// Account type prefix can contain hyphens (e.g., Ciste-jmeni for Czech "Čisté jmění").
57    /// Sub-accounts must start with uppercase letter, digit, or non-ASCII character (matching Python beancount).
58    /// Supports Unicode letters, symbols, and emojis (e.g., Expenses:École, Assets:沪深300, Assets:CORP✨).
59    /// Pattern matches beancount's lexer.l: `([A-Z]|UTF-8-ONLY)([A-Za-z0-9-]|UTF-8-ONLY)*`.
60    /// `[^\x00-\x7F]` matches any non-ASCII UTF-8 character (equivalent to beancount's UTF-8-ONLY).
61    /// The account type prefix is validated later against options (`name_assets`, etc.).
62    #[regex(r"([A-Z]|[^\x00-\x7F])([A-Za-z0-9-]|[^\x00-\x7F])*(:([A-Z0-9]|[^\x00-\x7F])([A-Za-z0-9-]|[^\x00-\x7F])*)+")]
63    Account(&'src str),
64
65    /// A currency/commodity code like USD, EUR, AAPL, BTC, or single-char tickers like T, V, F.
66    /// Uppercase letters, can contain digits, apostrophes, dots, underscores, hyphens.
67    /// Single-character currencies (e.g., T for AT&T, V for Visa) are valid NYSE/NASDAQ tickers.
68    /// Note: Single-char currencies are disambiguated from transaction flags in the parser.
69    /// Also supports `/` prefix for options/futures contracts (e.g., `/ESM24`, `/LOX21_211204_P100.25`).
70    /// The `/` prefix requires an uppercase letter first to avoid matching `/1.14` as currency.
71    /// Priority 3 ensures Currency wins over Flag for single uppercase letters.
72    #[regex(r"/[A-Z][A-Z0-9'._-]*|[A-Z][A-Z0-9'._-]*", priority = 3)]
73    Currency(&'src str),
74
75    /// A tag like #tag-name.
76    #[regex(r"#[a-zA-Z0-9-_/.]+")]
77    Tag(&'src str),
78
79    /// A link like ^link-name.
80    #[regex(r"\^[a-zA-Z0-9-_/.]+")]
81    Link(&'src str),
82
83    // ===== Keywords =====
84    // Using #[token] for exact matches (higher priority than regex)
85    /// The `txn` keyword for transactions.
86    #[token("txn")]
87    Txn,
88    /// The `balance` directive keyword.
89    #[token("balance")]
90    Balance,
91    /// The `open` directive keyword.
92    #[token("open")]
93    Open,
94    /// The `close` directive keyword.
95    #[token("close")]
96    Close,
97    /// The `commodity` directive keyword.
98    #[token("commodity")]
99    Commodity,
100    /// The `pad` directive keyword.
101    #[token("pad")]
102    Pad,
103    /// The `event` directive keyword.
104    #[token("event")]
105    Event,
106    /// The `query` directive keyword.
107    #[token("query")]
108    Query,
109    /// The `note` directive keyword.
110    #[token("note")]
111    Note,
112    /// The `document` directive keyword.
113    #[token("document")]
114    Document,
115    /// The `price` directive keyword.
116    #[token("price")]
117    Price,
118    /// The `custom` directive keyword.
119    #[token("custom")]
120    Custom,
121    /// The `option` directive keyword.
122    #[token("option")]
123    Option_,
124    /// The `include` directive keyword.
125    #[token("include")]
126    Include,
127    /// The `plugin` directive keyword.
128    #[token("plugin")]
129    Plugin,
130    /// The `pushtag` directive keyword.
131    #[token("pushtag")]
132    Pushtag,
133    /// The `poptag` directive keyword.
134    #[token("poptag")]
135    Poptag,
136    /// The `pushmeta` directive keyword.
137    #[token("pushmeta")]
138    Pushmeta,
139    /// The `popmeta` directive keyword.
140    #[token("popmeta")]
141    Popmeta,
142    /// The `TRUE` boolean literal (also True, true).
143    #[token("TRUE")]
144    #[token("True")]
145    #[token("true")]
146    True,
147    /// The `FALSE` boolean literal (also False, false).
148    #[token("FALSE")]
149    #[token("False")]
150    #[token("false")]
151    False,
152    /// The `NULL` literal.
153    #[token("NULL")]
154    Null,
155
156    // ===== Punctuation =====
157    // Order matters: longer tokens first
158    /// Double left brace `{{` for cost specifications (legacy total cost).
159    #[token("{{")]
160    LDoubleBrace,
161    /// Double right brace `}}` for cost specifications.
162    #[token("}}")]
163    RDoubleBrace,
164    /// Left brace with hash `{#` for total cost (new syntax).
165    #[token("{#")]
166    LBraceHash,
167    /// Left brace `{` for cost specifications.
168    #[token("{")]
169    LBrace,
170    /// Right brace `}` for cost specifications.
171    #[token("}")]
172    RBrace,
173    /// Left parenthesis `(` for expressions.
174    #[token("(")]
175    LParen,
176    /// Right parenthesis `)` for expressions.
177    #[token(")")]
178    RParen,
179    /// Double at-sign `@@` for total cost.
180    #[token("@@")]
181    AtAt,
182    /// At-sign `@` for unit cost.
183    #[token("@")]
184    At,
185    /// Colon `:` separator.
186    #[token(":")]
187    Colon,
188    /// Comma `,` separator.
189    #[token(",")]
190    Comma,
191    /// Tilde `~` for tolerance.
192    #[token("~")]
193    Tilde,
194    /// Pipe `|` for deprecated payee/narration separator.
195    #[token("|")]
196    Pipe,
197    /// Plus `+` operator.
198    #[token("+")]
199    Plus,
200    /// Minus `-` operator.
201    #[token("-")]
202    Minus,
203    /// Star `*` for cleared transactions and multiplication.
204    #[token("*")]
205    Star,
206    /// Slash `/` for division.
207    #[token("/")]
208    Slash,
209
210    // ===== Transaction Flags =====
211    /// Pending flag `!` for incomplete transactions.
212    #[token("!")]
213    Pending,
214
215    /// Other transaction flags: P S T C U R M ? &
216    /// Note: # and % are handled as comments when followed by space
217    #[regex(r"[PSTCURM?&]")]
218    Flag(&'src str),
219
220    // ===== Structural =====
221    /// Newline (significant in Beancount for directive boundaries).
222    #[regex(r"\r?\n")]
223    Newline,
224
225    /// A comment starting with semicolon.
226    /// The slice includes the semicolon.
227    #[regex(r";[^\n\r]*", allow_greedy = true)]
228    Comment(&'src str),
229
230    /// Hash token `#` used as separator in cost specs: `{per_unit # total currency}`
231    /// Note: In Python beancount, `#` is only a comment at the START of a line.
232    /// Mid-line `# text` is NOT a comment - it's either a cost separator or syntax error.
233    /// Start-of-line hash comments are handled in post-processing (tokenize function).
234    #[token("#")]
235    Hash,
236
237    /// A percent comment (ledger-style).
238    /// Python beancount accepts % as a comment character for ledger compatibility.
239    #[regex(r"%[^\n\r]*", allow_greedy = true)]
240    PercentComment(&'src str),
241
242    /// Shebang line at start of file (e.g., #!/usr/bin/env bean-web).
243    /// Treated as a comment-like directive to skip.
244    #[regex(r"#![^\n\r]*", allow_greedy = true)]
245    Shebang(&'src str),
246
247    /// Emacs org-mode directive (e.g., "#+STARTUP: showall").
248    /// These are Emacs configuration lines that should be skipped.
249    #[regex(r"#\+[^\n\r]*", allow_greedy = true)]
250    EmacsDirective(&'src str),
251
252    /// A metadata key (identifier followed by colon).
253    /// Examples: filename:, lineno:, custom-key:, nameOnCard:
254    /// The slice includes the trailing colon. Keys can use camelCase or `snake_case`.
255    #[regex(r"[a-zA-Z][a-zA-Z0-9_-]*:")]
256    MetaKey(&'src str),
257
258    /// Indentation token (inserted by post-processing, not by Logos).
259    /// Contains the number of leading spaces.
260    /// This is a placeholder - actual indentation detection happens in [`tokenize`].
261    Indent(usize),
262
263    /// Deep indentation (3+ spaces) - used for posting-level metadata.
264    DeepIndent(usize),
265
266    /// Error token for unrecognized input.
267    /// Contains the invalid source text for better error messages.
268    Error(&'src str),
269}
270
271impl Token<'_> {
272    /// Returns true if this is a transaction flag (* or !).
273    /// Single-character currencies (e.g., T, P, C) can also be used as flags.
274    pub const fn is_txn_flag(&self) -> bool {
275        match self {
276            Self::Star | Self::Pending | Self::Flag(_) | Self::Hash => true,
277            // Single-char currencies can be used as transaction flags
278            Self::Currency(s) => s.len() == 1,
279            _ => false,
280        }
281    }
282
283    /// Returns true if this is a keyword that starts a directive.
284    pub const fn is_directive_keyword(&self) -> bool {
285        matches!(
286            self,
287            Self::Txn
288                | Self::Balance
289                | Self::Open
290                | Self::Close
291                | Self::Commodity
292                | Self::Pad
293                | Self::Event
294                | Self::Query
295                | Self::Note
296                | Self::Document
297                | Self::Price
298                | Self::Custom
299                | Self::Option_
300                | Self::Include
301                | Self::Plugin
302                | Self::Pushtag
303                | Self::Poptag
304                | Self::Pushmeta
305                | Self::Popmeta
306        )
307    }
308}
309
310impl fmt::Display for Token<'_> {
311    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312        match self {
313            Self::Date(s) => write!(f, "{s}"),
314            Self::Number(s) => write!(f, "{s}"),
315            Self::String(s) => write!(f, "{s}"),
316            Self::Account(s) => write!(f, "{s}"),
317            Self::Currency(s) => write!(f, "{s}"),
318            Self::Tag(s) => write!(f, "{s}"),
319            Self::Link(s) => write!(f, "{s}"),
320            Self::Txn => write!(f, "txn"),
321            Self::Balance => write!(f, "balance"),
322            Self::Open => write!(f, "open"),
323            Self::Close => write!(f, "close"),
324            Self::Commodity => write!(f, "commodity"),
325            Self::Pad => write!(f, "pad"),
326            Self::Event => write!(f, "event"),
327            Self::Query => write!(f, "query"),
328            Self::Note => write!(f, "note"),
329            Self::Document => write!(f, "document"),
330            Self::Price => write!(f, "price"),
331            Self::Custom => write!(f, "custom"),
332            Self::Option_ => write!(f, "option"),
333            Self::Include => write!(f, "include"),
334            Self::Plugin => write!(f, "plugin"),
335            Self::Pushtag => write!(f, "pushtag"),
336            Self::Poptag => write!(f, "poptag"),
337            Self::Pushmeta => write!(f, "pushmeta"),
338            Self::Popmeta => write!(f, "popmeta"),
339            Self::True => write!(f, "TRUE"),
340            Self::False => write!(f, "FALSE"),
341            Self::Null => write!(f, "NULL"),
342            Self::LDoubleBrace => write!(f, "{{{{"),
343            Self::RDoubleBrace => write!(f, "}}}}"),
344            Self::LBraceHash => write!(f, "{{#"),
345            Self::LBrace => write!(f, "{{"),
346            Self::RBrace => write!(f, "}}"),
347            Self::LParen => write!(f, "("),
348            Self::RParen => write!(f, ")"),
349            Self::AtAt => write!(f, "@@"),
350            Self::At => write!(f, "@"),
351            Self::Colon => write!(f, ":"),
352            Self::Comma => write!(f, ","),
353            Self::Tilde => write!(f, "~"),
354            Self::Pipe => write!(f, "|"),
355            Self::Plus => write!(f, "+"),
356            Self::Minus => write!(f, "-"),
357            Self::Star => write!(f, "*"),
358            Self::Slash => write!(f, "/"),
359            Self::Pending => write!(f, "!"),
360            Self::Flag(s) => write!(f, "{s}"),
361            Self::Newline => write!(f, "\\n"),
362            Self::Comment(s) => write!(f, "{s}"),
363            Self::Hash => write!(f, "#"),
364            Self::PercentComment(s) => write!(f, "{s}"),
365            Self::Shebang(s) => write!(f, "{s}"),
366            Self::EmacsDirective(s) => write!(f, "{s}"),
367            Self::MetaKey(s) => write!(f, "{s}"),
368            Self::Indent(n) => write!(f, "<indent:{n}>"),
369            Self::DeepIndent(n) => write!(f, "<deep-indent:{n}>"),
370            Self::Error(s) => write!(f, "{s}"),
371        }
372    }
373}
374
375/// Tokenize source code into a vector of (Token, Span) pairs.
376///
377/// This function:
378/// 1. Runs the Logos lexer for fast tokenization
379/// 2. Post-processes to detect indentation at line starts
380/// 3. Handles lexer errors by producing Error tokens
381pub fn tokenize(source: &str) -> Vec<(Token<'_>, Span)> {
382    let mut tokens = Vec::new();
383    let mut lexer = Token::lexer(source);
384    let mut at_line_start = true;
385    let mut last_newline_end = 0usize;
386
387    while let Some(result) = lexer.next() {
388        let span = lexer.span();
389
390        match result {
391            Ok(Token::Newline) => {
392                tokens.push((Token::Newline, span.clone().into()));
393                at_line_start = true;
394                last_newline_end = span.end;
395            }
396            Ok(Token::Hash) if at_line_start && span.start == last_newline_end => {
397                // Hash at very start of line (no indentation) is a comment
398                // Find end of line and create a comment token for the whole line
399                let comment_start = span.start;
400                let line_end = source[span.end..]
401                    .find('\n')
402                    .map_or(source.len(), |i| span.end + i);
403                let comment_text = &source[comment_start..line_end];
404                tokens.push((
405                    Token::Comment(comment_text),
406                    Span {
407                        start: comment_start,
408                        end: line_end,
409                    },
410                ));
411                // Skip lexer tokens until we reach the newline
412                while let Some(peek_result) = lexer.next() {
413                    let peek_span = lexer.span();
414                    let peek_end = peek_span.end;
415                    if peek_result == Ok(Token::Newline) {
416                        tokens.push((Token::Newline, peek_span.into()));
417                        at_line_start = true;
418                        last_newline_end = peek_end;
419                        break;
420                    }
421                    // Skip other tokens on the comment line
422                }
423            }
424            Ok(token) => {
425                // Check for indentation at line start
426                if at_line_start && span.start > last_newline_end {
427                    // Count leading whitespace between last newline and this token
428                    // Tabs count as indentation (treat 1 tab as 4 spaces for counting purposes)
429                    let leading = &source[last_newline_end..span.start];
430                    let mut space_count = 0;
431                    let mut char_count = 0;
432                    for c in leading.chars() {
433                        match c {
434                            ' ' => {
435                                space_count += 1;
436                                char_count += 1;
437                            }
438                            '\t' => {
439                                space_count += 4; // Treat tab as 4 spaces
440                                char_count += 1;
441                            }
442                            _ => break,
443                        }
444                    }
445                    // Python beancount accepts 1+ space for metadata indentation
446                    if space_count >= 1 {
447                        let indent_start = last_newline_end;
448                        let indent_end = last_newline_end + char_count;
449                        // Use DeepIndent for 3+ spaces (posting metadata level).
450                        // Python beancount allows flexible indentation where posting
451                        // metadata just needs to be more indented than the posting.
452                        // Common patterns: 2-space posting / 4-space meta, or
453                        // 1-space posting / 3-space meta (as in beancount_reds_plugins).
454                        let indent_token = if space_count >= 3 {
455                            Token::DeepIndent(space_count)
456                        } else {
457                            Token::Indent(space_count)
458                        };
459                        tokens.push((
460                            indent_token,
461                            Span {
462                                start: indent_start,
463                                end: indent_end,
464                            },
465                        ));
466                    }
467                }
468                at_line_start = false;
469                tokens.push((token, span.into()));
470            }
471            Err(()) => {
472                // Lexer error - produce an Error token with the invalid source text
473                at_line_start = false;
474                let invalid_text = &source[span.clone()];
475                tokens.push((Token::Error(invalid_text), span.into()));
476            }
477        }
478    }
479
480    tokens
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486
487    #[test]
488    fn test_tokenize_date() {
489        let tokens = tokenize("2024-01-15");
490        assert_eq!(tokens.len(), 1);
491        assert!(matches!(tokens[0].0, Token::Date("2024-01-15")));
492    }
493
494    #[test]
495    fn test_tokenize_number() {
496        let tokens = tokenize("1234.56");
497        assert_eq!(tokens.len(), 1);
498        assert!(matches!(tokens[0].0, Token::Number("1234.56")));
499
500        let tokens = tokenize("-1,234.56");
501        assert_eq!(tokens.len(), 1);
502        assert!(matches!(tokens[0].0, Token::Number("-1,234.56")));
503    }
504
505    #[test]
506    fn test_tokenize_account() {
507        let tokens = tokenize("Assets:Bank:Checking");
508        assert_eq!(tokens.len(), 1);
509        assert!(matches!(
510            tokens[0].0,
511            Token::Account("Assets:Bank:Checking")
512        ));
513    }
514
515    #[test]
516    fn test_tokenize_account_unicode() {
517        // Test various Unicode characters in account names
518        // Matching beancount's UTF-8-ONLY support (any non-ASCII character)
519
520        // Emoji (Unicode Symbol category So)
521        let tokens = tokenize("Assets:CORP✨");
522        assert_eq!(tokens.len(), 1);
523        assert!(matches!(tokens[0].0, Token::Account("Assets:CORP✨")));
524
525        // CJK characters (Unicode Letter category Lo)
526        let tokens = tokenize("Assets:沪深300");
527        assert_eq!(tokens.len(), 1);
528        assert!(matches!(tokens[0].0, Token::Account("Assets:沪深300")));
529
530        // Full CJK component
531        let tokens = tokenize("Assets:日本銀行");
532        assert_eq!(tokens.len(), 1);
533        assert!(matches!(tokens[0].0, Token::Account("Assets:日本銀行")));
534
535        // Non-ASCII letters (accented)
536        let tokens = tokenize("Assets:Café");
537        assert_eq!(tokens.len(), 1);
538        assert!(matches!(tokens[0].0, Token::Account("Assets:Café")));
539
540        // Currency symbol (Unicode Symbol category Sc)
541        let tokens = tokenize("Assets:€uro");
542        assert_eq!(tokens.len(), 1);
543        assert!(matches!(tokens[0].0, Token::Account("Assets:€uro")));
544
545        // Emoji in middle of component
546        let tokens = tokenize("Assets:Test💰Account");
547        assert_eq!(tokens.len(), 1);
548        assert!(matches!(
549            tokens[0].0,
550            Token::Account("Assets:Test💰Account")
551        ));
552    }
553
554    #[test]
555    fn test_tokenize_currency() {
556        let tokens = tokenize("USD");
557        assert_eq!(tokens.len(), 1);
558        assert!(matches!(tokens[0].0, Token::Currency("USD")));
559    }
560
561    #[test]
562    fn test_tokenize_single_char_currency() {
563        // Single-char NYSE/NASDAQ tickers: T (AT&T), V (Visa), F (Ford), X (US Steel)
564        let tokens = tokenize("T");
565        assert_eq!(tokens.len(), 1);
566        assert!(matches!(tokens[0].0, Token::Currency("T")));
567
568        let tokens = tokenize("V");
569        assert_eq!(tokens.len(), 1);
570        assert!(matches!(tokens[0].0, Token::Currency("V")));
571
572        let tokens = tokenize("F");
573        assert_eq!(tokens.len(), 1);
574        assert!(matches!(tokens[0].0, Token::Currency("F")));
575    }
576
577    #[test]
578    fn test_single_char_currency_is_txn_flag() {
579        // Single-char currencies should be recognized as potential transaction flags
580        let token = Token::Currency("T");
581        assert!(token.is_txn_flag());
582
583        // Multi-char currencies should NOT be transaction flags
584        let token = Token::Currency("USD");
585        assert!(!token.is_txn_flag());
586    }
587
588    #[test]
589    fn test_tokenize_string() {
590        let tokens = tokenize(r#""Hello, World!""#);
591        assert_eq!(tokens.len(), 1);
592        assert!(matches!(tokens[0].0, Token::String(r#""Hello, World!""#)));
593    }
594
595    #[test]
596    fn test_tokenize_keywords() {
597        let tokens = tokenize("txn balance open close");
598        assert_eq!(tokens.len(), 4);
599        assert!(matches!(tokens[0].0, Token::Txn));
600        assert!(matches!(tokens[1].0, Token::Balance));
601        assert!(matches!(tokens[2].0, Token::Open));
602        assert!(matches!(tokens[3].0, Token::Close));
603    }
604
605    #[test]
606    fn test_tokenize_tag_and_link() {
607        let tokens = tokenize("#my-tag ^my-link");
608        assert_eq!(tokens.len(), 2);
609        assert!(matches!(tokens[0].0, Token::Tag("#my-tag")));
610        assert!(matches!(tokens[1].0, Token::Link("^my-link")));
611    }
612
613    #[test]
614    fn test_tokenize_comment() {
615        let tokens = tokenize("; This is a comment");
616        assert_eq!(tokens.len(), 1);
617        assert!(matches!(tokens[0].0, Token::Comment("; This is a comment")));
618    }
619
620    #[test]
621    fn test_tokenize_indentation() {
622        let tokens = tokenize("txn\n  Assets:Bank 100 USD");
623        // Should have: Txn, Newline, Indent, Account, Number, Currency
624        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
625    }
626
627    #[test]
628    fn test_tokenize_transaction_line() {
629        let source = "2024-01-15 * \"Grocery Store\" #food\n  Expenses:Food 50.00 USD";
630        let tokens = tokenize(source);
631
632        // Check key tokens are present
633        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Date(_))));
634        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Star)));
635        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::String(_))));
636        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Tag(_))));
637        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Newline)));
638        assert!(
639            tokens
640                .iter()
641                .any(|(t, _)| matches!(t, Token::Indent(_) | Token::DeepIndent(_)))
642        );
643        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Account(_))));
644        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Number(_))));
645        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Currency(_))));
646    }
647
648    #[test]
649    fn test_tokenize_metadata_key() {
650        let tokens = tokenize("filename:");
651        assert_eq!(tokens.len(), 1);
652        assert!(matches!(tokens[0].0, Token::MetaKey("filename:")));
653    }
654
655    #[test]
656    fn test_tokenize_punctuation() {
657        let tokens = tokenize("{ } @ @@ , ~");
658        let token_types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
659        assert!(token_types.contains(&Token::LBrace));
660        assert!(token_types.contains(&Token::RBrace));
661        assert!(token_types.contains(&Token::At));
662        assert!(token_types.contains(&Token::AtAt));
663        assert!(token_types.contains(&Token::Comma));
664        assert!(token_types.contains(&Token::Tilde));
665    }
666}