rustledger_parser/
token_input.rs

1//! Token-based input using chumsky's slice input.
2//!
3//! This module provides utilities for parsing pre-tokenized input using
4//! chumsky. We store tokens in a Vec and parse from a slice, with spans
5//! tracked separately.
6//!
7//! This module is a proof-of-concept for token-based parsing. The types
8//! will be used when migrating the parser from character-based to token-based.
9
10// Allow dead_code for now - this is a proof-of-concept that will be used later
11#![allow(dead_code)]
12
13use chumsky::prelude::*;
14
15use crate::logos_lexer::{tokenize, Token};
16
17/// A spanned token - a token paired with its byte offset span.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct SpannedToken<'src> {
20    /// The token.
21    pub token: Token<'src>,
22    /// Byte offset span (start, end).
23    pub span: (usize, usize),
24}
25
26impl<'src> SpannedToken<'src> {
27    /// Create a new spanned token.
28    pub const fn new(token: Token<'src>, start: usize, end: usize) -> Self {
29        Self {
30            token,
31            span: (start, end),
32        }
33    }
34}
35
36/// Tokenized input ready for parsing.
37///
38/// This struct owns the token vector and provides a slice view for parsing.
39pub struct TokenizedInput<'src> {
40    tokens: Vec<SpannedToken<'src>>,
41    source_len: usize,
42}
43
44impl<'src> TokenizedInput<'src> {
45    /// Create tokenized input from source code.
46    pub fn new(source: &'src str) -> Self {
47        let raw_tokens = tokenize(source);
48        let tokens = raw_tokens
49            .into_iter()
50            .map(|(token, span)| SpannedToken::new(token, span.start, span.end))
51            .collect();
52
53        Self {
54            tokens,
55            source_len: source.len(),
56        }
57    }
58
59    /// Get the tokens as a slice for parsing.
60    pub fn as_slice(&self) -> &[SpannedToken<'src>] {
61        &self.tokens
62    }
63
64    /// Get the source length (for EOI span).
65    pub const fn source_len(&self) -> usize {
66        self.source_len
67    }
68}
69
70/// Type alias for the parser extra with our token type.
71pub type TokenExtra<'src> = extra::Err<Rich<'src, SpannedToken<'src>>>;
72
73/// Parse a token by kind, extracting the inner token.
74#[macro_export]
75macro_rules! token {
76    ($pattern:pat => $result:expr) => {
77        select! {
78            SpannedToken { token: $pattern, .. } => $result
79        }
80    };
81    ($pattern:pat) => {
82        select! {
83            SpannedToken { token: $pattern, .. } => ()
84        }
85    };
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91    use crate::logos_lexer::Token;
92
93    #[test]
94    fn test_tokenized_input_basic() {
95        let source = "2024-01-15";
96        let input = TokenizedInput::new(source);
97
98        assert_eq!(input.tokens.len(), 1);
99        assert!(matches!(input.tokens[0].token, Token::Date("2024-01-15")));
100        assert_eq!(input.tokens[0].span, (0, 10));
101    }
102
103    #[test]
104    fn test_tokenized_input_multiple() {
105        let source = "open Assets:Bank USD";
106        let input = TokenizedInput::new(source);
107
108        assert_eq!(input.tokens.len(), 3);
109        assert!(matches!(input.tokens[0].token, Token::Open));
110        assert!(matches!(input.tokens[1].token, Token::Account(_)));
111        assert!(matches!(input.tokens[2].token, Token::Currency(_)));
112    }
113
114    #[test]
115    fn test_parse_any_token() {
116        let source = "2024-01-15";
117        let input = TokenizedInput::new(source);
118
119        let parser = any::<_, TokenExtra<'_>>();
120        let result = parser.parse(input.as_slice()).into_result();
121
122        assert!(result.is_ok(), "Parse failed: {result:?}");
123        let tok = result.unwrap();
124        assert!(matches!(tok.token, Token::Date("2024-01-15")));
125    }
126
127    #[test]
128    fn test_parse_collect_all() {
129        let source = "open Assets:Bank USD";
130        let input = TokenizedInput::new(source);
131
132        let parser = any::<_, TokenExtra<'_>>().repeated().collect::<Vec<_>>();
133        let result = parser.parse(input.as_slice()).into_result();
134
135        assert!(result.is_ok(), "Parse failed: {result:?}");
136        let tokens = result.unwrap();
137        assert_eq!(tokens.len(), 3);
138    }
139
140    #[test]
141    fn test_parse_select_date() {
142        let source = "2024-01-15 open Assets:Bank";
143        let input = TokenizedInput::new(source);
144
145        // Use filter instead of select! to avoid type inference issues
146        // Also ignore remaining tokens since we only want the first one
147        let parser = any::<_, TokenExtra<'_>>()
148            .filter(|t: &SpannedToken<'_>| matches!(t.token, Token::Date(_)))
149            .map(|t: SpannedToken<'_>| {
150                if let Token::Date(d) = t.token {
151                    d.to_string()
152                } else {
153                    unreachable!()
154                }
155            })
156            .then_ignore(any().repeated());
157
158        let result = parser.parse(input.as_slice()).into_result();
159
160        assert!(result.is_ok(), "Parse failed: {result:?}");
161        assert_eq!(result.unwrap(), "2024-01-15");
162    }
163
164    #[test]
165    fn test_parse_sequence() {
166        let source = "open Assets:Bank USD";
167        let input = TokenizedInput::new(source);
168
169        // Use filter/map pattern instead of select! for clearer typing
170        let open_kw = any::<_, TokenExtra<'_>>()
171            .filter(|t: &SpannedToken<'_>| matches!(t.token, Token::Open))
172            .to("open");
173
174        let account = any::<_, TokenExtra<'_>>()
175            .filter(|t: &SpannedToken<'_>| matches!(t.token, Token::Account(_)))
176            .map(|t: SpannedToken<'_>| {
177                if let Token::Account(a) = t.token {
178                    a.to_string()
179                } else {
180                    unreachable!()
181                }
182            });
183
184        let currency = any::<_, TokenExtra<'_>>()
185            .filter(|t: &SpannedToken<'_>| matches!(t.token, Token::Currency(_)))
186            .map(|t: SpannedToken<'_>| {
187                if let Token::Currency(c) = t.token {
188                    c.to_string()
189                } else {
190                    unreachable!()
191                }
192            });
193
194        let parser = open_kw.then(account).then(currency);
195
196        let result = parser.parse(input.as_slice()).into_result();
197        assert!(result.is_ok(), "Parse failed: {result:?}");
198
199        let ((kw, acc), curr) = result.unwrap();
200        assert_eq!(kw, "open");
201        assert_eq!(acc, "Assets:Bank");
202        assert_eq!(curr, "USD");
203    }
204
205    #[test]
206    fn test_parse_with_newlines_and_indent() {
207        let source = "txn\n  Assets:Bank 100 USD";
208        let input = TokenizedInput::new(source);
209
210        let parser = any::<_, TokenExtra<'_>>().repeated().collect::<Vec<_>>();
211        let result = parser.parse(input.as_slice()).into_result();
212
213        assert!(result.is_ok(), "Parse failed: {result:?}");
214        let tokens = result.unwrap();
215
216        // Should have: Txn, Newline, Indent, Account, Number, Currency
217        assert!(tokens.iter().any(|t| matches!(t.token, Token::Txn)));
218        assert!(tokens.iter().any(|t| matches!(t.token, Token::Newline)));
219        assert!(tokens.iter().any(|t| matches!(t.token, Token::Indent(_))));
220        assert!(tokens.iter().any(|t| matches!(t.token, Token::Account(_))));
221        assert!(tokens.iter().any(|t| matches!(t.token, Token::Number(_))));
222        assert!(tokens.iter().any(|t| matches!(t.token, Token::Currency(_))));
223    }
224
225    #[test]
226    fn test_span_preservation() {
227        let source = "open Assets:Bank";
228        let input = TokenizedInput::new(source);
229
230        // Verify spans are preserved correctly
231        assert_eq!(input.tokens[0].span, (0, 4)); // "open"
232        assert_eq!(input.tokens[1].span, (5, 16)); // "Assets:Bank"
233    }
234}