1use logos::Logos;
7use std::fmt;
8use std::ops::Range;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct Span {
13 pub start: usize,
15 pub end: usize,
17}
18
19impl From<Range<usize>> for Span {
20 fn from(range: Range<usize>) -> Self {
21 Self {
22 start: range.start,
23 end: range.end,
24 }
25 }
26}
27
28impl From<Span> for Range<usize> {
29 fn from(span: Span) -> Self {
30 span.start..span.end
31 }
32}
33
34#[derive(Logos, Debug, Clone, PartialEq, Eq)]
36#[logos(skip r"[ \t]+")] pub enum Token<'src> {
38 #[regex(r"\d{4}[-/]\d{2}[-/]\d{2}")]
41 Date(&'src str),
42
43 #[regex(r"-?(\.\d+|(\d{1,3}(,\d{3})*|\d+)(\.\d+)?)")]
46 Number(&'src str),
47
48 #[regex(r#""([^"\\]|\\.)*""#)]
51 String(&'src str),
52
53 #[regex(r"(Assets|Liabilities|Equity|Income|Expenses)(:[A-Za-z0-9][a-zA-Z0-9-]*)+")]
57 Account(&'src str),
58
59 #[regex(r"/[A-Z0-9'._-]+|[A-Z][A-Z0-9'._-]+")]
65 Currency(&'src str),
66
67 #[regex(r"#[a-zA-Z0-9-_/.]+")]
69 Tag(&'src str),
70
71 #[regex(r"\^[a-zA-Z0-9-_/.]+")]
73 Link(&'src str),
74
75 #[token("txn")]
79 Txn,
80 #[token("balance")]
82 Balance,
83 #[token("open")]
85 Open,
86 #[token("close")]
88 Close,
89 #[token("commodity")]
91 Commodity,
92 #[token("pad")]
94 Pad,
95 #[token("event")]
97 Event,
98 #[token("query")]
100 Query,
101 #[token("note")]
103 Note,
104 #[token("document")]
106 Document,
107 #[token("price")]
109 Price,
110 #[token("custom")]
112 Custom,
113 #[token("option")]
115 Option_,
116 #[token("include")]
118 Include,
119 #[token("plugin")]
121 Plugin,
122 #[token("pushtag")]
124 Pushtag,
125 #[token("poptag")]
127 Poptag,
128 #[token("pushmeta")]
130 Pushmeta,
131 #[token("popmeta")]
133 Popmeta,
134 #[token("TRUE")]
136 #[token("True")]
137 #[token("true")]
138 True,
139 #[token("FALSE")]
141 #[token("False")]
142 #[token("false")]
143 False,
144 #[token("NULL")]
146 Null,
147
148 #[token("{{")]
152 LDoubleBrace,
153 #[token("}}")]
155 RDoubleBrace,
156 #[token("{#")]
158 LBraceHash,
159 #[token("{")]
161 LBrace,
162 #[token("}")]
164 RBrace,
165 #[token("(")]
167 LParen,
168 #[token(")")]
170 RParen,
171 #[token("@@")]
173 AtAt,
174 #[token("@")]
176 At,
177 #[token(":")]
179 Colon,
180 #[token(",")]
182 Comma,
183 #[token("~")]
185 Tilde,
186 #[token("+")]
188 Plus,
189 #[token("-")]
191 Minus,
192 #[token("*")]
194 Star,
195 #[token("/")]
197 Slash,
198
199 #[token("!")]
202 Pending,
203
204 #[regex(r"[PSTCURM#?%&]")]
207 Flag(&'src str),
208
209 #[regex(r"\r?\n")]
212 Newline,
213
214 #[regex(r";[^\n\r]*")]
217 Comment(&'src str),
218
219 #[regex(r"#![^\n\r]*")]
222 Shebang(&'src str),
223
224 #[regex(r"#\+[^\n\r]*")]
227 EmacsDirective(&'src str),
228
229 #[regex(r"[a-zA-Z][a-zA-Z0-9_-]*:")]
233 MetaKey(&'src str),
234
235 Indent(usize),
239
240 DeepIndent(usize),
242
243 Error,
245}
246
247impl Token<'_> {
248 pub const fn is_txn_flag(&self) -> bool {
250 matches!(self, Self::Star | Self::Pending | Self::Flag(_))
251 }
252
253 pub const fn is_directive_keyword(&self) -> bool {
255 matches!(
256 self,
257 Self::Txn
258 | Self::Balance
259 | Self::Open
260 | Self::Close
261 | Self::Commodity
262 | Self::Pad
263 | Self::Event
264 | Self::Query
265 | Self::Note
266 | Self::Document
267 | Self::Price
268 | Self::Custom
269 | Self::Option_
270 | Self::Include
271 | Self::Plugin
272 | Self::Pushtag
273 | Self::Poptag
274 | Self::Pushmeta
275 | Self::Popmeta
276 )
277 }
278}
279
280impl fmt::Display for Token<'_> {
281 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282 match self {
283 Self::Date(s) => write!(f, "{s}"),
284 Self::Number(s) => write!(f, "{s}"),
285 Self::String(s) => write!(f, "{s}"),
286 Self::Account(s) => write!(f, "{s}"),
287 Self::Currency(s) => write!(f, "{s}"),
288 Self::Tag(s) => write!(f, "{s}"),
289 Self::Link(s) => write!(f, "{s}"),
290 Self::Txn => write!(f, "txn"),
291 Self::Balance => write!(f, "balance"),
292 Self::Open => write!(f, "open"),
293 Self::Close => write!(f, "close"),
294 Self::Commodity => write!(f, "commodity"),
295 Self::Pad => write!(f, "pad"),
296 Self::Event => write!(f, "event"),
297 Self::Query => write!(f, "query"),
298 Self::Note => write!(f, "note"),
299 Self::Document => write!(f, "document"),
300 Self::Price => write!(f, "price"),
301 Self::Custom => write!(f, "custom"),
302 Self::Option_ => write!(f, "option"),
303 Self::Include => write!(f, "include"),
304 Self::Plugin => write!(f, "plugin"),
305 Self::Pushtag => write!(f, "pushtag"),
306 Self::Poptag => write!(f, "poptag"),
307 Self::Pushmeta => write!(f, "pushmeta"),
308 Self::Popmeta => write!(f, "popmeta"),
309 Self::True => write!(f, "TRUE"),
310 Self::False => write!(f, "FALSE"),
311 Self::Null => write!(f, "NULL"),
312 Self::LDoubleBrace => write!(f, "{{{{"),
313 Self::RDoubleBrace => write!(f, "}}}}"),
314 Self::LBraceHash => write!(f, "{{#"),
315 Self::LBrace => write!(f, "{{"),
316 Self::RBrace => write!(f, "}}"),
317 Self::LParen => write!(f, "("),
318 Self::RParen => write!(f, ")"),
319 Self::AtAt => write!(f, "@@"),
320 Self::At => write!(f, "@"),
321 Self::Colon => write!(f, ":"),
322 Self::Comma => write!(f, ","),
323 Self::Tilde => write!(f, "~"),
324 Self::Plus => write!(f, "+"),
325 Self::Minus => write!(f, "-"),
326 Self::Star => write!(f, "*"),
327 Self::Slash => write!(f, "/"),
328 Self::Pending => write!(f, "!"),
329 Self::Flag(s) => write!(f, "{s}"),
330 Self::Newline => write!(f, "\\n"),
331 Self::Comment(s) => write!(f, "{s}"),
332 Self::Shebang(s) => write!(f, "{s}"),
333 Self::EmacsDirective(s) => write!(f, "{s}"),
334 Self::MetaKey(s) => write!(f, "{s}"),
335 Self::Indent(n) => write!(f, "<indent:{n}>"),
336 Self::DeepIndent(n) => write!(f, "<deep-indent:{n}>"),
337 Self::Error => write!(f, "<error>"),
338 }
339 }
340}
341
342pub fn tokenize(source: &str) -> Vec<(Token<'_>, Span)> {
349 let mut tokens = Vec::new();
350 let mut lexer = Token::lexer(source);
351 let mut at_line_start = true;
352 let mut last_newline_end = 0usize;
353
354 while let Some(result) = lexer.next() {
355 let span = lexer.span();
356
357 match result {
358 Ok(Token::Newline) => {
359 tokens.push((Token::Newline, span.clone().into()));
360 at_line_start = true;
361 last_newline_end = span.end;
362 }
363 Ok(token) => {
364 if at_line_start && span.start > last_newline_end {
366 let leading = &source[last_newline_end..span.start];
369 let mut space_count = 0;
370 let mut char_count = 0;
371 for c in leading.chars() {
372 match c {
373 ' ' => {
374 space_count += 1;
375 char_count += 1;
376 }
377 '\t' => {
378 space_count += 4; char_count += 1;
380 }
381 _ => break,
382 }
383 }
384 if space_count >= 2 {
385 let indent_start = last_newline_end;
386 let indent_end = last_newline_end + char_count;
387 let indent_token = if space_count >= 4 {
389 Token::DeepIndent(space_count)
390 } else {
391 Token::Indent(space_count)
392 };
393 tokens.push((
394 indent_token,
395 Span {
396 start: indent_start,
397 end: indent_end,
398 },
399 ));
400 }
401 }
402 at_line_start = false;
403 tokens.push((token, span.into()));
404 }
405 Err(()) => {
406 at_line_start = false;
408 tokens.push((Token::Error, span.into()));
409 }
410 }
411 }
412
413 tokens
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419
420 #[test]
421 fn test_tokenize_date() {
422 let tokens = tokenize("2024-01-15");
423 assert_eq!(tokens.len(), 1);
424 assert!(matches!(tokens[0].0, Token::Date("2024-01-15")));
425 }
426
427 #[test]
428 fn test_tokenize_number() {
429 let tokens = tokenize("1234.56");
430 assert_eq!(tokens.len(), 1);
431 assert!(matches!(tokens[0].0, Token::Number("1234.56")));
432
433 let tokens = tokenize("-1,234.56");
434 assert_eq!(tokens.len(), 1);
435 assert!(matches!(tokens[0].0, Token::Number("-1,234.56")));
436 }
437
438 #[test]
439 fn test_tokenize_account() {
440 let tokens = tokenize("Assets:Bank:Checking");
441 assert_eq!(tokens.len(), 1);
442 assert!(matches!(
443 tokens[0].0,
444 Token::Account("Assets:Bank:Checking")
445 ));
446 }
447
448 #[test]
449 fn test_tokenize_currency() {
450 let tokens = tokenize("USD");
451 assert_eq!(tokens.len(), 1);
452 assert!(matches!(tokens[0].0, Token::Currency("USD")));
453 }
454
455 #[test]
456 fn test_tokenize_string() {
457 let tokens = tokenize(r#""Hello, World!""#);
458 assert_eq!(tokens.len(), 1);
459 assert!(matches!(tokens[0].0, Token::String(r#""Hello, World!""#)));
460 }
461
462 #[test]
463 fn test_tokenize_keywords() {
464 let tokens = tokenize("txn balance open close");
465 assert_eq!(tokens.len(), 4);
466 assert!(matches!(tokens[0].0, Token::Txn));
467 assert!(matches!(tokens[1].0, Token::Balance));
468 assert!(matches!(tokens[2].0, Token::Open));
469 assert!(matches!(tokens[3].0, Token::Close));
470 }
471
472 #[test]
473 fn test_tokenize_tag_and_link() {
474 let tokens = tokenize("#my-tag ^my-link");
475 assert_eq!(tokens.len(), 2);
476 assert!(matches!(tokens[0].0, Token::Tag("#my-tag")));
477 assert!(matches!(tokens[1].0, Token::Link("^my-link")));
478 }
479
480 #[test]
481 fn test_tokenize_comment() {
482 let tokens = tokenize("; This is a comment");
483 assert_eq!(tokens.len(), 1);
484 assert!(matches!(tokens[0].0, Token::Comment("; This is a comment")));
485 }
486
487 #[test]
488 fn test_tokenize_indentation() {
489 let tokens = tokenize("txn\n Assets:Bank 100 USD");
490 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
492 }
493
494 #[test]
495 fn test_tokenize_transaction_line() {
496 let source = "2024-01-15 * \"Grocery Store\" #food\n Expenses:Food 50.00 USD";
497 let tokens = tokenize(source);
498
499 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Date(_))));
501 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Star)));
502 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::String(_))));
503 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Tag(_))));
504 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Newline)));
505 assert!(tokens
506 .iter()
507 .any(|(t, _)| matches!(t, Token::Indent(_) | Token::DeepIndent(_))));
508 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Account(_))));
509 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Number(_))));
510 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Currency(_))));
511 }
512
513 #[test]
514 fn test_tokenize_metadata_key() {
515 let tokens = tokenize("filename:");
516 assert_eq!(tokens.len(), 1);
517 assert!(matches!(tokens[0].0, Token::MetaKey("filename:")));
518 }
519
520 #[test]
521 fn test_tokenize_punctuation() {
522 let tokens = tokenize("{ } @ @@ , ~");
523 let token_types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
524 assert!(token_types.contains(&Token::LBrace));
525 assert!(token_types.contains(&Token::RBrace));
526 assert!(token_types.contains(&Token::At));
527 assert!(token_types.contains(&Token::AtAt));
528 assert!(token_types.contains(&Token::Comma));
529 assert!(token_types.contains(&Token::Tilde));
530 }
531}