1use crate::{kind::FSharpSyntaxKind, language::FSharpLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, FSharpLanguage>;
10
11static FS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
15pub struct FSharpLexer<'config> {
16 _config: &'config FSharpLanguage,
17}
18
19impl<'config> Lexer<FSharpLanguage> for FSharpLexer<'config> {
20 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<FSharpLanguage>) -> LexOutput<FSharpLanguage> {
21 let mut state = LexerState::new(source);
22 let result = self.run(&mut state);
23 if result.is_ok() {
24 state.add_eof();
25 }
26 state.finish_with_cache(result, cache)
27 }
28}
29
30impl<'config> FSharpLexer<'config> {
31 pub fn new(config: &'config FSharpLanguage) -> Self {
32 Self { _config: config }
33 }
34
35 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36 while state.not_at_end() {
37 if self.skip_whitespace(state) {
39 continue;
40 }
41
42 if self.skip_comment(state) {
44 continue;
45 }
46
47 if self.lex_string_literal(state) {
49 continue;
50 }
51
52 if self.lex_char_literal(state) {
54 continue;
55 }
56
57 if self.lex_number(state) {
59 continue;
60 }
61
62 if self.lex_identifier_or_keyword(state) {
64 continue;
65 }
66
67 if self.lex_operator_or_punctuation(state) {
69 continue;
70 }
71
72 let start = state.get_position();
74 if let Some(ch) = state.peek() {
75 state.advance(ch.len_utf8());
76 state.add_token(FSharpSyntaxKind::Error, start, state.get_position());
77 }
78 }
79
80 Ok(())
81 }
82
83 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85 FS_WHITESPACE.scan(state, FSharpSyntaxKind::Whitespace)
86 }
87
88 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
89 let start = state.get_position();
90 let rest = state.rest();
91
92 if rest.starts_with("//") {
94 state.advance(2);
95 while let Some(ch) = state.peek() {
96 if ch == '\n' || ch == '\r' {
97 break;
98 }
99 state.advance(ch.len_utf8());
100 }
101 state.add_token(FSharpSyntaxKind::LineComment, start, state.get_position());
102 return true;
103 }
104
105 if rest.starts_with("(*") {
107 state.advance(2);
108 let mut depth = 1usize;
109 while let Some(ch) = state.peek() {
110 if ch == '(' && state.peek_next_n(1) == Some('*') {
111 state.advance(2);
112 depth += 1;
113 continue;
114 }
115 if ch == '*' && state.peek_next_n(1) == Some(')') {
116 state.advance(2);
117 depth -= 1;
118 if depth == 0 {
119 break;
120 }
121 continue;
122 }
123 state.advance(ch.len_utf8());
124 }
125 state.add_token(FSharpSyntaxKind::BlockComment, start, state.get_position());
126 return true;
127 }
128 false
129 }
130
131 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
132 let start = state.get_position();
133
134 if state.peek() == Some('@') && state.peek_next_n(1) == Some('"') {
136 state.advance(2); while let Some(ch) = state.peek() {
138 if ch == '"' {
139 state.advance(1);
140 break;
141 }
142 state.advance(ch.len_utf8());
143 }
144 state.add_token(FSharpSyntaxKind::StringLiteral, start, state.get_position());
145 return true;
146 }
147
148 if state.peek() == Some('"') {
150 state.advance(1); while let Some(ch) = state.peek() {
152 if ch == '"' {
153 state.advance(1);
154 break;
155 }
156 if ch == '\\' {
157 state.advance(1); if let Some(escaped) = state.peek() {
159 state.advance(escaped.len_utf8());
160 }
161 }
162 else {
163 state.advance(ch.len_utf8());
164 }
165 }
166 state.add_token(FSharpSyntaxKind::StringLiteral, start, state.get_position());
167 return true;
168 }
169 false
170 }
171
172 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
173 let start = state.get_position();
174
175 if state.peek() == Some('\'') {
176 state.advance(1); if let Some(ch) = state.peek() {
178 if ch == '\\' {
179 state.advance(1); if let Some(escaped) = state.peek() {
181 state.advance(escaped.len_utf8());
182 }
183 }
184 else {
185 state.advance(ch.len_utf8());
186 }
187 }
188 if state.peek() == Some('\'') {
189 state.advance(1); }
191 state.add_token(FSharpSyntaxKind::CharLiteral, start, state.get_position());
192 return true;
193 }
194 false
195 }
196
197 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
198 if !state.current().map_or(false, |c| c.is_ascii_digit()) {
199 return false;
200 }
201
202 let start = state.get_position();
203
204 while state.current().map_or(false, |c| c.is_ascii_digit()) {
206 state.advance(1);
207 }
208
209 if state.current() == Some('.') && state.peek().map_or(false, |c| c.is_ascii_digit()) {
211 state.advance(1); while state.current().map_or(false, |c| c.is_ascii_digit()) {
213 state.advance(1);
214 }
215 state.add_token(FSharpSyntaxKind::FloatLiteral, start, state.get_position());
216 }
217 else {
218 if matches!(state.current(), Some('e') | Some('E')) {
220 state.advance(1);
221 if matches!(state.current(), Some('+') | Some('-')) {
222 state.advance(1);
223 }
224 while state.current().map_or(false, |c| c.is_ascii_digit()) {
225 state.advance(1);
226 }
227 state.add_token(FSharpSyntaxKind::FloatLiteral, start, state.get_position());
228 }
229 else {
230 if state.current().map_or(false, |c| c.is_ascii_alphabetic()) {
232 while state.current().map_or(false, |c| c.is_ascii_alphanumeric()) {
233 state.advance(1);
234 }
235 }
236 state.add_token(FSharpSyntaxKind::IntegerLiteral, start, state.get_position());
237 }
238 }
239
240 true
241 }
242
243 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
244 if !state.current().map_or(false, |c| c.is_ascii_alphabetic() || c == '_') {
245 return false;
246 }
247
248 let start = state.get_position();
249
250 while state.current().map_or(false, |c| c.is_ascii_alphanumeric() || c == '_') {
252 state.advance(1);
253 }
254
255 let text = state.get_text_from(start);
256 let kind = self.classify_identifier(&text);
257 state.add_token(kind, start, state.get_position());
258 true
259 }
260
261 fn classify_identifier(&self, text: &str) -> FSharpSyntaxKind {
262 match text {
263 "abstract" => FSharpSyntaxKind::Abstract,
265 "and" => FSharpSyntaxKind::And,
266 "as" => FSharpSyntaxKind::As,
267 "assert" => FSharpSyntaxKind::Assert,
268 "base" => FSharpSyntaxKind::Base,
269 "begin" => FSharpSyntaxKind::Begin,
270 "class" => FSharpSyntaxKind::Class,
271 "default" => FSharpSyntaxKind::Default,
272 "do" => FSharpSyntaxKind::Do,
273 "done" => FSharpSyntaxKind::Done,
274 "downcast" => FSharpSyntaxKind::Downcast,
275 "downto" => FSharpSyntaxKind::Downto,
276 "elif" => FSharpSyntaxKind::Elif,
277 "else" => FSharpSyntaxKind::Else,
278 "end" => FSharpSyntaxKind::End,
279 "exception" => FSharpSyntaxKind::Exception,
280 "extern" => FSharpSyntaxKind::Extern,
281 "false" => FSharpSyntaxKind::False,
282 "finally" => FSharpSyntaxKind::Finally,
283 "for" => FSharpSyntaxKind::For,
284 "fun" => FSharpSyntaxKind::Fun,
285 "function" => FSharpSyntaxKind::Function,
286 "global" => FSharpSyntaxKind::Global,
287 "if" => FSharpSyntaxKind::If,
288 "in" => FSharpSyntaxKind::In,
289 "inherit" => FSharpSyntaxKind::Inherit,
290 "inline" => FSharpSyntaxKind::Inline,
291 "interface" => FSharpSyntaxKind::Interface,
292 "internal" => FSharpSyntaxKind::Internal,
293 "lazy" => FSharpSyntaxKind::Lazy,
294 "let" => FSharpSyntaxKind::Let,
295 "match" => FSharpSyntaxKind::Match,
296 "member" => FSharpSyntaxKind::Member,
297 "module" => FSharpSyntaxKind::Module,
298 "mutable" => FSharpSyntaxKind::Mutable,
299 "namespace" => FSharpSyntaxKind::Namespace,
300 "new" => FSharpSyntaxKind::New,
301 "not" => FSharpSyntaxKind::Not,
302 "null" => FSharpSyntaxKind::Null,
303 "of" => FSharpSyntaxKind::Of,
304 "open" => FSharpSyntaxKind::Open,
305 "or" => FSharpSyntaxKind::Or,
306 "override" => FSharpSyntaxKind::Override,
307 "private" => FSharpSyntaxKind::Private,
308 "public" => FSharpSyntaxKind::Public,
309 "rec" => FSharpSyntaxKind::Rec,
310 "return" => FSharpSyntaxKind::Return,
311 "sig" => FSharpSyntaxKind::Sig,
312 "static" => FSharpSyntaxKind::Static,
313 "struct" => FSharpSyntaxKind::Struct,
314 "then" => FSharpSyntaxKind::Then,
315 "to" => FSharpSyntaxKind::To,
316 "true" => FSharpSyntaxKind::True,
317 "try" => FSharpSyntaxKind::Try,
318 "type" => FSharpSyntaxKind::Type,
319 "upcast" => FSharpSyntaxKind::Upcast,
320 "use" => FSharpSyntaxKind::Use,
321 "val" => FSharpSyntaxKind::Val,
322 "void" => FSharpSyntaxKind::Void,
323 "when" => FSharpSyntaxKind::When,
324 "while" => FSharpSyntaxKind::While,
325 "with" => FSharpSyntaxKind::With,
326 "yield" => FSharpSyntaxKind::Yield,
327 "async" => FSharpSyntaxKind::Async,
328 "seq" => FSharpSyntaxKind::Seq,
329 "raise" => FSharpSyntaxKind::Raise,
330 "failwith" => FSharpSyntaxKind::Failwith,
331 _ => FSharpSyntaxKind::Identifier,
332 }
333 }
334
335 fn lex_operator_or_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
336 let current = state.current();
337 if current.is_none() {
338 return false;
339 }
340
341 let start = state.get_position();
342 let c = current.unwrap();
343 let next = state.peek();
344
345 match (c, next) {
347 ('-', Some('>')) => {
348 state.advance(2);
349 state.add_token(FSharpSyntaxKind::Arrow, start, state.get_position());
350 return true;
351 }
352 (':', Some(':')) => {
353 state.advance(2);
354 state.add_token(FSharpSyntaxKind::Cons, start, state.get_position());
355 return true;
356 }
357 ('=', Some('=')) => {
358 state.advance(2);
359 state.add_token(FSharpSyntaxKind::Equal, start, state.get_position());
360 return true;
361 }
362 ('<', Some('=')) => {
363 state.advance(2);
364 state.add_token(FSharpSyntaxKind::LessEqual, start, state.get_position());
365 return true;
366 }
367 ('>', Some('=')) => {
368 state.advance(2);
369 state.add_token(FSharpSyntaxKind::GreaterEqual, start, state.get_position());
370 return true;
371 }
372 ('<', Some('>')) => {
373 state.advance(2);
374 state.add_token(FSharpSyntaxKind::NotEqual, start, state.get_position());
375 return true;
376 }
377 ('|', Some('>')) => {
378 state.advance(2);
379 state.add_token(FSharpSyntaxKind::Pipe, start, state.get_position());
380 return true;
381 }
382 _ => {}
383 }
384
385 let kind = match c {
387 '+' => FSharpSyntaxKind::Plus,
388 '-' => FSharpSyntaxKind::Minus,
389 '*' => FSharpSyntaxKind::Star,
390 '/' => FSharpSyntaxKind::Slash,
391 '%' => FSharpSyntaxKind::Percent,
392 '=' => FSharpSyntaxKind::Equal,
393 '<' => FSharpSyntaxKind::LessThan,
394 '>' => FSharpSyntaxKind::GreaterThan,
395 '&' => FSharpSyntaxKind::Ampersand,
396 '|' => FSharpSyntaxKind::Pipe,
397 '^' => FSharpSyntaxKind::Caret,
398 '!' => FSharpSyntaxKind::Not,
399 '?' => FSharpSyntaxKind::Question,
400 ':' => FSharpSyntaxKind::Colon,
401 ';' => FSharpSyntaxKind::Semicolon,
402 ',' => FSharpSyntaxKind::Comma,
403 '.' => FSharpSyntaxKind::Dot,
404 '(' => FSharpSyntaxKind::LeftParen,
405 ')' => FSharpSyntaxKind::RightParen,
406 '[' => FSharpSyntaxKind::LeftBracket,
407 ']' => FSharpSyntaxKind::RightBracket,
408 '{' => FSharpSyntaxKind::LeftBrace,
409 '}' => FSharpSyntaxKind::RightBrace,
410 _ => return false,
411 };
412
413 state.advance(1);
414 state.add_token(kind, start, state.get_position());
415 true
416 }
417}