1use crate::{kind::TypstSyntaxKind, language::TypstLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5 source::{Source, TextEdit},
6};
7use std::sync::LazyLock;
8
9type State<'s, S> = LexerState<'s, S, TypstLanguage>;
10
11static TYPST_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static TYPST_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
13static TYPST_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct TypstLexer<'config> {
17 _config: &'config TypstLanguage,
18}
19
20impl<'config> Lexer<TypstLanguage> for TypstLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], _cache: &'a mut impl LexerCache<TypstLanguage>) -> LexOutput<TypstLanguage> {
22 let mut state = State::new(source);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof();
26 }
27 state.finish(result)
28 }
29}
30
31impl<'config> TypstLexer<'config> {
32 pub fn new(config: &'config TypstLanguage) -> Self {
33 Self { _config: config }
34 }
35
36 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
37 while state.not_at_end() {
38 let safe_point = state.get_position();
39
40 if TYPST_WHITESPACE.scan(state, TypstSyntaxKind::Whitespace) {
41 continue;
42 }
43
44 if TYPST_COMMENT.scan(state, TypstSyntaxKind::LineComment, TypstSyntaxKind::BlockComment) {
45 continue;
46 }
47
48 if TYPST_STRING.scan(state, TypstSyntaxKind::StringLiteral) {
49 continue;
50 }
51
52 if self.lex_number_literal(state) {
53 continue;
54 }
55
56 if self.lex_identifier_or_keyword(state) {
57 continue;
58 }
59
60 if self.lex_operators(state) {
61 continue;
62 }
63
64 if self.lex_single_char_tokens(state) {
65 continue;
66 }
67
68 state.advance_if_dead_lock(safe_point);
69 }
70
71 Ok(())
72 }
73
74 fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
75 let start = state.get_position();
76 let text = state.rest();
77 if text.is_empty() || !text.chars().next().unwrap().is_ascii_digit() {
78 return false;
79 }
80
81 let mut pos = 0;
82 let chars: Vec<char> = text.chars().collect();
83
84 while pos < chars.len() && chars[pos].is_ascii_digit() {
86 pos += 1;
87 }
88
89 if pos < chars.len() && chars[pos] == '.' && pos + 1 < chars.len() && chars[pos + 1].is_ascii_digit() {
91 pos += 1; while pos < chars.len() && chars[pos].is_ascii_digit() {
93 pos += 1;
94 }
95 }
96
97 if pos < chars.len() && (chars[pos] == 'e' || chars[pos] == 'E') {
99 pos += 1;
100 if pos < chars.len() && (chars[pos] == '+' || chars[pos] == '-') {
101 pos += 1;
102 }
103 while pos < chars.len() && chars[pos].is_ascii_digit() {
104 pos += 1;
105 }
106 }
107
108 if pos > 0 {
109 state.advance(pos);
110 state.add_token(TypstSyntaxKind::NumericLiteral, start, state.get_position());
111 return true;
112 }
113
114 false
115 }
116
117 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
118 let start = state.get_position();
119 let text = state.rest();
120 if text.is_empty() {
121 return false;
122 }
123
124 let first_char = text.chars().next().unwrap();
125 if !first_char.is_ascii_alphabetic() && first_char != '_' {
126 return false;
127 }
128
129 let mut pos = 0;
130 let chars: Vec<char> = text.chars().collect();
131
132 pos += 1;
134
135 while pos < chars.len() && (chars[pos].is_ascii_alphanumeric() || chars[pos] == '_') {
137 pos += 1;
138 }
139
140 if pos > 0 {
141 let identifier_text = &text[..pos];
142 let kind = self.keyword_or_identifier(identifier_text);
143 state.advance(pos);
144 state.add_token(kind, start, state.get_position());
145 return true;
146 }
147
148 false
149 }
150
151 fn keyword_or_identifier(&self, text: &str) -> TypstSyntaxKind {
152 match text {
153 "let" => TypstSyntaxKind::Let,
154 "if" => TypstSyntaxKind::If,
155 "else" => TypstSyntaxKind::Else,
156 "for" => TypstSyntaxKind::For,
157 "while" => TypstSyntaxKind::While,
158 "break" => TypstSyntaxKind::Break,
159 "continue" => TypstSyntaxKind::Continue,
160 "return" => TypstSyntaxKind::Return,
161 "true" => TypstSyntaxKind::True,
162 "false" => TypstSyntaxKind::False,
163 "set" => TypstSyntaxKind::Set,
164 "show" => TypstSyntaxKind::Show,
165 "import" => TypstSyntaxKind::Import,
166 "include" => TypstSyntaxKind::Include,
167 _ => TypstSyntaxKind::Identifier,
168 }
169 }
170
171 fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
172 let start = state.get_position();
173 let text = state.rest();
174 if text.is_empty() {
175 return false;
176 }
177
178 let chars: Vec<char> = text.chars().collect();
179
180 let (kind, len) = match chars[0] {
181 '=' => {
182 if chars.len() > 1 && chars[1] == '=' {
183 (TypstSyntaxKind::EqualEqual, 2)
184 }
185 else {
186 (TypstSyntaxKind::Equal, 1)
187 }
188 }
189 '!' => {
190 if chars.len() > 1 && chars[1] == '=' {
191 (TypstSyntaxKind::NotEqual, 2)
192 }
193 else {
194 (TypstSyntaxKind::Not, 1)
195 }
196 }
197 '<' => {
198 if chars.len() > 1 && chars[1] == '=' {
199 (TypstSyntaxKind::LessEqual, 2)
200 }
201 else {
202 (TypstSyntaxKind::Less, 1)
203 }
204 }
205 '>' => {
206 if chars.len() > 1 && chars[1] == '=' {
207 (TypstSyntaxKind::GreaterEqual, 2)
208 }
209 else {
210 (TypstSyntaxKind::Greater, 1)
211 }
212 }
213 '&' => {
214 if chars.len() > 1 && chars[1] == '&' {
215 (TypstSyntaxKind::And, 2)
216 }
217 else {
218 return false;
219 }
220 }
221 '|' => {
222 if chars.len() > 1 && chars[1] == '|' {
223 (TypstSyntaxKind::Or, 2)
224 }
225 else {
226 return false;
227 }
228 }
229 '+' => (TypstSyntaxKind::Plus, 1),
230 '-' => (TypstSyntaxKind::Minus, 1),
231 '*' => (TypstSyntaxKind::Star, 1),
232 '/' => (TypstSyntaxKind::Slash, 1),
233 '%' => (TypstSyntaxKind::Percent, 1),
234 _ => return false,
235 };
236
237 state.advance(len);
238 state.add_token(kind, start, state.get_position());
239 true
240 }
241
242 fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
243 let start = state.get_position();
244 let text = state.rest();
245 if text.is_empty() {
246 return false;
247 }
248
249 let ch = text.chars().next().unwrap();
250
251 let kind = match ch {
252 '(' => TypstSyntaxKind::LeftParen,
253 ')' => TypstSyntaxKind::RightParen,
254 '{' => TypstSyntaxKind::LeftBrace,
255 '}' => TypstSyntaxKind::RightBrace,
256 '[' => TypstSyntaxKind::LeftBracket,
257 ']' => TypstSyntaxKind::RightBracket,
258 ';' => TypstSyntaxKind::Semicolon,
259 ',' => TypstSyntaxKind::Comma,
260 '.' => TypstSyntaxKind::Dot,
261 ':' => TypstSyntaxKind::Colon,
262 '#' => TypstSyntaxKind::Hash,
263 '@' => TypstSyntaxKind::At,
264 '$' => TypstSyntaxKind::Dollar,
265 '_' => TypstSyntaxKind::Underscore,
266 _ => TypstSyntaxKind::Error,
267 };
268
269 state.advance(1);
270 state.add_token(kind, start, state.get_position());
271 true
272 }
273}