Skip to main content

only_syntax/
lex.rs

1use logos::Logos;
2use smol_str::SmolStr;
3use text_size::{TextRange, TextSize};
4
5use crate::{LexToken, SyntaxKind};
6
7#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq)]
8enum RawTokenKind {
9    #[token("shell?=")]
10    ShellFallbackKw,
11    #[token("shell")]
12    ShellKw,
13    #[token("!")]
14    Bang,
15    #[token("%")]
16    Percent,
17    #[token(":")]
18    Colon,
19    #[token("?")]
20    Question,
21    #[token("&")]
22    Amp,
23    #[token("=")]
24    Eq,
25    #[token("@")]
26    At,
27    #[token("(")]
28    LParen,
29    #[token(")")]
30    RParen,
31    #[token("[")]
32    LBracket,
33    #[token("]")]
34    RBracket,
35    #[regex(r#""([^"\n]|\\.)*""#)]
36    String,
37    #[regex(r"[A-Za-z_-][A-Za-z0-9_-]*")]
38    Ident,
39    #[regex(r"#[^\n]*", allow_greedy = true)]
40    Comment,
41    #[regex(r"[ ]+")]
42    Spaces,
43    #[regex(r"\r\n|\n|\r")]
44    Newline,
45}
46
47/// Lexes source text into tokens while preserving trivia.
48///
49/// Args:
50/// source: Raw Onlyfile source text.
51///
52/// Returns:
53/// Token stream including whitespace, comments and EOF.
54pub fn lex(source: &str) -> Vec<LexToken> {
55    let mut lexer = RawTokenKind::lexer(source);
56    let mut tokens = Vec::new();
57    let mut line_start = true;
58
59    while let Some(result) = lexer.next() {
60        let span = lexer.span();
61        let text = &source[span.clone()];
62        let start = TextSize::from(span.start as u32);
63        let end = TextSize::from(span.end as u32);
64        let kind = match result {
65            Ok(RawTokenKind::ShellFallbackKw) => SyntaxKind::ShellFallbackKw,
66            Ok(RawTokenKind::ShellKw) => SyntaxKind::ShellKw,
67            Ok(RawTokenKind::Bang) => SyntaxKind::Bang,
68            Ok(RawTokenKind::Percent) => SyntaxKind::Percent,
69            Ok(RawTokenKind::Colon) => SyntaxKind::Colon,
70            Ok(RawTokenKind::Question) => SyntaxKind::Question,
71            Ok(RawTokenKind::Amp) => SyntaxKind::Amp,
72            Ok(RawTokenKind::Eq) => SyntaxKind::Eq,
73            Ok(RawTokenKind::At) => SyntaxKind::At,
74            Ok(RawTokenKind::LParen) => SyntaxKind::LParen,
75            Ok(RawTokenKind::RParen) => SyntaxKind::RParen,
76            Ok(RawTokenKind::LBracket) => SyntaxKind::LBracket,
77            Ok(RawTokenKind::RBracket) => SyntaxKind::RBracket,
78            Ok(RawTokenKind::String) => SyntaxKind::String,
79            Ok(RawTokenKind::Ident) => SyntaxKind::Ident,
80            Ok(RawTokenKind::Comment) => SyntaxKind::Comment,
81            Ok(RawTokenKind::Newline) => SyntaxKind::Newline,
82            Ok(RawTokenKind::Spaces) if line_start => SyntaxKind::Indent,
83            Ok(RawTokenKind::Spaces) => SyntaxKind::Whitespace,
84            Err(_) => SyntaxKind::Unknown,
85        };
86
87        tokens.push(LexToken {
88            kind,
89            text: SmolStr::new(text),
90            range: TextRange::new(start, end),
91        });
92
93        line_start = kind == SyntaxKind::Newline;
94    }
95
96    let eof = TextSize::from(source.len() as u32);
97    tokens.push(LexToken {
98        kind: SyntaxKind::Eof,
99        text: SmolStr::new(""),
100        range: TextRange::new(eof, eof),
101    });
102
103    tokens
104}