1use crate::{kind::FSharpSyntaxKind, language::FSharpLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, FSharpLanguage>;
10
11static FS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
15pub struct FSharpLexer<'config> {
16 config: &'config FSharpLanguage,
17}
18
19impl<'config> FSharpLexer<'config> {
20 pub fn new(config: &'config FSharpLanguage) -> Self {
21 Self { config }
22 }
23
24 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
25 while state.not_at_end() {
26 if self.skip_whitespace(state) {
28 continue;
29 }
30
31 if self.skip_comment(state) {
33 continue;
34 }
35
36 if self.lex_string_literal(state) {
38 continue;
39 }
40
41 if self.lex_char_literal(state) {
43 continue;
44 }
45
46 if self.lex_number(state) {
48 continue;
49 }
50
51 if self.lex_identifier_or_keyword(state) {
53 continue;
54 }
55
56 if self.lex_operator_or_punctuation(state) {
58 continue;
59 }
60
61 let start = state.get_position();
63 if let Some(ch) = state.peek() {
64 state.advance(ch.len_utf8());
65 state.add_token(FSharpSyntaxKind::Error, start, state.get_position());
66 }
67 }
68
69 let eof_pos = state.get_position();
71 state.add_token(FSharpSyntaxKind::Eof, eof_pos, eof_pos);
72 Ok(())
73 }
74
75 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
76 match FS_WHITESPACE.scan(state.rest(), state.get_position(), FSharpSyntaxKind::Whitespace) {
77 Some(token) => {
78 state.advance_with(token);
79 return true;
80 }
81 None => {}
82 }
83 false
84 }
85
86 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
87 let start = state.get_position();
88 let rest = state.rest();
89
90 if rest.starts_with("//") {
92 state.advance(2);
93 while let Some(ch) = state.peek() {
94 if ch == '\n' || ch == '\r' {
95 break;
96 }
97 state.advance(ch.len_utf8());
98 }
99 state.add_token(FSharpSyntaxKind::LineComment, start, state.get_position());
100 return true;
101 }
102
103 if rest.starts_with("(*") {
105 state.advance(2);
106 let mut depth = 1usize;
107 while let Some(ch) = state.peek() {
108 if ch == '(' && state.peek_next_n(1) == Some('*') {
109 state.advance(2);
110 depth += 1;
111 continue;
112 }
113 if ch == '*' && state.peek_next_n(1) == Some(')') {
114 state.advance(2);
115 depth -= 1;
116 if depth == 0 {
117 break;
118 }
119 continue;
120 }
121 state.advance(ch.len_utf8());
122 }
123 state.add_token(FSharpSyntaxKind::BlockComment, start, state.get_position());
124 return true;
125 }
126 false
127 }
128
129 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
130 let start = state.get_position();
131
132 if state.peek() == Some('@') && state.peek_next_n(1) == Some('"') {
134 state.advance(2); while let Some(ch) = state.peek() {
136 if ch == '"' {
137 state.advance(1);
138 break;
139 }
140 state.advance(ch.len_utf8());
141 }
142 state.add_token(FSharpSyntaxKind::StringLiteral, start, state.get_position());
143 return true;
144 }
145
146 if state.peek() == Some('"') {
148 state.advance(1); while let Some(ch) = state.peek() {
150 if ch == '"' {
151 state.advance(1);
152 break;
153 }
154 if ch == '\\' {
155 state.advance(1); if let Some(escaped) = state.peek() {
157 state.advance(escaped.len_utf8());
158 }
159 }
160 else {
161 state.advance(ch.len_utf8());
162 }
163 }
164 state.add_token(FSharpSyntaxKind::StringLiteral, start, state.get_position());
165 return true;
166 }
167 false
168 }
169
170 fn lex_char_literal<S: Source>(&self, state: &mut State<S>) -> bool {
171 let start = state.get_position();
172
173 if state.peek() == Some('\'') {
174 state.advance(1); if let Some(ch) = state.peek() {
176 if ch == '\\' {
177 state.advance(1); if let Some(escaped) = state.peek() {
179 state.advance(escaped.len_utf8());
180 }
181 }
182 else {
183 state.advance(ch.len_utf8());
184 }
185 }
186 if state.peek() == Some('\'') {
187 state.advance(1); }
189 state.add_token(FSharpSyntaxKind::CharLiteral, start, state.get_position());
190 return true;
191 }
192 false
193 }
194
195 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
196 if !state.current().map_or(false, |c| c.is_ascii_digit()) {
197 return false;
198 }
199
200 let start = state.get_position();
201
202 while state.current().map_or(false, |c| c.is_ascii_digit()) {
204 state.advance(1);
205 }
206
207 if state.current() == Some('.') && state.peek().map_or(false, |c| c.is_ascii_digit()) {
209 state.advance(1); while state.current().map_or(false, |c| c.is_ascii_digit()) {
211 state.advance(1);
212 }
213 state.add_token(FSharpSyntaxKind::FloatLiteral, start, state.get_position());
214 }
215 else {
216 if matches!(state.current(), Some('e') | Some('E')) {
218 state.advance(1);
219 if matches!(state.current(), Some('+') | Some('-')) {
220 state.advance(1);
221 }
222 while state.current().map_or(false, |c| c.is_ascii_digit()) {
223 state.advance(1);
224 }
225 state.add_token(FSharpSyntaxKind::FloatLiteral, start, state.get_position());
226 }
227 else {
228 if state.current().map_or(false, |c| c.is_ascii_alphabetic()) {
230 while state.current().map_or(false, |c| c.is_ascii_alphanumeric()) {
231 state.advance(1);
232 }
233 }
234 state.add_token(FSharpSyntaxKind::IntegerLiteral, start, state.get_position());
235 }
236 }
237
238 true
239 }
240
241 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
242 if !state.current().map_or(false, |c| c.is_ascii_alphabetic() || c == '_') {
243 return false;
244 }
245
246 let start = state.get_position();
247
248 while state.current().map_or(false, |c| c.is_ascii_alphanumeric() || c == '_') {
250 state.advance(1);
251 }
252
253 let text = state.get_text_from(start);
254 let kind = self.classify_identifier(&text);
255 state.add_token(kind, start, state.get_position());
256 true
257 }
258
259 fn classify_identifier(&self, text: &str) -> FSharpSyntaxKind {
260 match text {
261 "abstract" => FSharpSyntaxKind::Abstract,
263 "and" => FSharpSyntaxKind::And,
264 "as" => FSharpSyntaxKind::As,
265 "assert" => FSharpSyntaxKind::Assert,
266 "base" => FSharpSyntaxKind::Base,
267 "begin" => FSharpSyntaxKind::Begin,
268 "class" => FSharpSyntaxKind::Class,
269 "default" => FSharpSyntaxKind::Default,
270 "do" => FSharpSyntaxKind::Do,
271 "done" => FSharpSyntaxKind::Done,
272 "downcast" => FSharpSyntaxKind::Downcast,
273 "downto" => FSharpSyntaxKind::Downto,
274 "elif" => FSharpSyntaxKind::Elif,
275 "else" => FSharpSyntaxKind::Else,
276 "end" => FSharpSyntaxKind::End,
277 "exception" => FSharpSyntaxKind::Exception,
278 "extern" => FSharpSyntaxKind::Extern,
279 "false" => FSharpSyntaxKind::False,
280 "finally" => FSharpSyntaxKind::Finally,
281 "for" => FSharpSyntaxKind::For,
282 "fun" => FSharpSyntaxKind::Fun,
283 "function" => FSharpSyntaxKind::Function,
284 "global" => FSharpSyntaxKind::Global,
285 "if" => FSharpSyntaxKind::If,
286 "in" => FSharpSyntaxKind::In,
287 "inherit" => FSharpSyntaxKind::Inherit,
288 "inline" => FSharpSyntaxKind::Inline,
289 "interface" => FSharpSyntaxKind::Interface,
290 "internal" => FSharpSyntaxKind::Internal,
291 "lazy" => FSharpSyntaxKind::Lazy,
292 "let" => FSharpSyntaxKind::Let,
293 "match" => FSharpSyntaxKind::Match,
294 "member" => FSharpSyntaxKind::Member,
295 "module" => FSharpSyntaxKind::Module,
296 "mutable" => FSharpSyntaxKind::Mutable,
297 "namespace" => FSharpSyntaxKind::Namespace,
298 "new" => FSharpSyntaxKind::New,
299 "not" => FSharpSyntaxKind::Not,
300 "null" => FSharpSyntaxKind::Null,
301 "of" => FSharpSyntaxKind::Of,
302 "open" => FSharpSyntaxKind::Open,
303 "or" => FSharpSyntaxKind::Or,
304 "override" => FSharpSyntaxKind::Override,
305 "private" => FSharpSyntaxKind::Private,
306 "public" => FSharpSyntaxKind::Public,
307 "rec" => FSharpSyntaxKind::Rec,
308 "return" => FSharpSyntaxKind::Return,
309 "sig" => FSharpSyntaxKind::Sig,
310 "static" => FSharpSyntaxKind::Static,
311 "struct" => FSharpSyntaxKind::Struct,
312 "then" => FSharpSyntaxKind::Then,
313 "to" => FSharpSyntaxKind::To,
314 "true" => FSharpSyntaxKind::True,
315 "try" => FSharpSyntaxKind::Try,
316 "type" => FSharpSyntaxKind::Type,
317 "upcast" => FSharpSyntaxKind::Upcast,
318 "use" => FSharpSyntaxKind::Use,
319 "val" => FSharpSyntaxKind::Val,
320 "void" => FSharpSyntaxKind::Void,
321 "when" => FSharpSyntaxKind::When,
322 "while" => FSharpSyntaxKind::While,
323 "with" => FSharpSyntaxKind::With,
324 "yield" => FSharpSyntaxKind::Yield,
325 "async" => FSharpSyntaxKind::Async,
326 "seq" => FSharpSyntaxKind::Seq,
327 "raise" => FSharpSyntaxKind::Raise,
328 "failwith" => FSharpSyntaxKind::Failwith,
329 _ => FSharpSyntaxKind::Identifier,
330 }
331 }
332
333 fn lex_operator_or_punctuation<S: Source>(&self, state: &mut State<S>) -> bool {
334 let current = state.current();
335 if current.is_none() {
336 return false;
337 }
338
339 let start = state.get_position();
340 let c = current.unwrap();
341 let next = state.peek();
342
343 match (c, next) {
345 ('-', Some('>')) => {
346 state.advance(2);
347 state.add_token(FSharpSyntaxKind::Arrow, start, state.get_position());
348 return true;
349 }
350 (':', Some(':')) => {
351 state.advance(2);
352 state.add_token(FSharpSyntaxKind::Cons, start, state.get_position());
353 return true;
354 }
355 ('=', Some('=')) => {
356 state.advance(2);
357 state.add_token(FSharpSyntaxKind::Equal, start, state.get_position());
358 return true;
359 }
360 ('<', Some('=')) => {
361 state.advance(2);
362 state.add_token(FSharpSyntaxKind::LessEqual, start, state.get_position());
363 return true;
364 }
365 ('>', Some('=')) => {
366 state.advance(2);
367 state.add_token(FSharpSyntaxKind::GreaterEqual, start, state.get_position());
368 return true;
369 }
370 ('<', Some('>')) => {
371 state.advance(2);
372 state.add_token(FSharpSyntaxKind::NotEqual, start, state.get_position());
373 return true;
374 }
375 ('|', Some('>')) => {
376 state.advance(2);
377 state.add_token(FSharpSyntaxKind::Pipe, start, state.get_position());
378 return true;
379 }
380 _ => {}
381 }
382
383 let kind = match c {
385 '+' => FSharpSyntaxKind::Plus,
386 '-' => FSharpSyntaxKind::Minus,
387 '*' => FSharpSyntaxKind::Star,
388 '/' => FSharpSyntaxKind::Slash,
389 '%' => FSharpSyntaxKind::Percent,
390 '=' => FSharpSyntaxKind::Equal,
391 '<' => FSharpSyntaxKind::LessThan,
392 '>' => FSharpSyntaxKind::GreaterThan,
393 '&' => FSharpSyntaxKind::Ampersand,
394 '|' => FSharpSyntaxKind::Pipe,
395 '^' => FSharpSyntaxKind::Caret,
396 '!' => FSharpSyntaxKind::Not,
397 '?' => FSharpSyntaxKind::Question,
398 ':' => FSharpSyntaxKind::Colon,
399 ';' => FSharpSyntaxKind::Semicolon,
400 ',' => FSharpSyntaxKind::Comma,
401 '.' => FSharpSyntaxKind::Dot,
402 '(' => FSharpSyntaxKind::LeftParen,
403 ')' => FSharpSyntaxKind::RightParen,
404 '[' => FSharpSyntaxKind::LeftBracket,
405 ']' => FSharpSyntaxKind::RightBracket,
406 '{' => FSharpSyntaxKind::LeftBrace,
407 '}' => FSharpSyntaxKind::RightBrace,
408 _ => return false,
409 };
410
411 state.advance(1);
412 state.add_token(kind, start, state.get_position());
413 true
414 }
415}
416
417impl<'config> Lexer<FSharpLanguage> for FSharpLexer<'config> {
418 fn lex_incremental(
419 &self,
420 source: impl Source,
421 changed: usize,
422 cache: IncrementalCache<FSharpLanguage>,
423 ) -> LexOutput<FSharpLanguage> {
424 let mut state = LexerState::new_with_cache(source, changed, cache);
425 let result = self.run(&mut state);
426 state.finish(result)
427 }
428}