1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::FSharpLanguage, lexer::token_type::FSharpTokenType};
5use oak_core::{
6 Lexer, LexerCache, LexerState, OakError, Range, Source, TextEdit,
7 lexer::{LexOutput, WhitespaceConfig},
8};
9use std::sync::LazyLock;
10
11pub(crate) type State<'a, S> = LexerState<'a, S, FSharpLanguage>;
12
13static FS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
14
15#[derive(Clone)]
17pub struct FSharpLexer<'config> {
18 config: &'config FSharpLanguage,
19}
20
21impl<'config> Lexer<FSharpLanguage> for FSharpLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<FSharpLanguage>) -> LexOutput<FSharpLanguage> {
23 let mut state = LexerState::new(source);
24 let result = self.run(&mut state);
25 if result.is_ok() {
26 state.add_eof();
27 }
28 state.finish_with_cache(result, cache)
29 }
30}
31
32impl<'config> FSharpLexer<'config> {
33 pub fn new(config: &'config FSharpLanguage) -> Self {
35 Self { config }
36 }
37
38 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 if self.skip_whitespace(state) {
42 continue;
43 }
44
45 if self.skip_comment(state) {
47 continue;
48 }
49
50 if self.lex_string_literal(state) {
52 continue;
53 }
54
55 if self.lex_char_literal(state) {
57 continue;
58 }
59
60 if self.lex_number(state) {
62 continue;
63 }
64
65 if self.lex_identifier_or_keyword(state) {
67 continue;
68 }
69
70 if self.lex_operator_or_punctuation(state) {
72 continue;
73 }
74
75 let start = state.get_position();
77 if let Some(ch) = state.peek() {
78 state.advance(ch.len_utf8());
79 state.add_token(FSharpTokenType::Error, start, state.get_position())
80 }
81 }
82
83 Ok(())
84 }
85
86 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 let start = state.get_position();
89 if let Some(ch) = state.peek() {
90 if ch == '\n' || ch == '\r' {
91 state.advance(ch.len_utf8());
92 state.add_token(FSharpTokenType::Newline, start, state.get_position());
93 return true;
94 }
95 if ch.is_whitespace() {
96 state.advance(ch.len_utf8());
97 while let Some(next) = state.peek() {
98 if next == '\n' || next == '\r' || !next.is_whitespace() {
99 break;
100 }
101 state.advance(next.len_utf8());
102 }
103 state.add_token(FSharpTokenType::Whitespace, start, state.get_position());
104 return true;
105 }
106 }
107 false
108 }
109
110 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
111 let start = state.get_position();
112 let rest = state.rest();
113
114 if rest.starts_with("//") {
116 state.advance(2);
117 while let Some(ch) = state.peek() {
118 if ch == '\n' || ch == '\r' {
119 break;
120 }
121 state.advance(ch.len_utf8());
122 }
123 state.add_token(FSharpTokenType::LineComment, start, state.get_position());
124 return true;
125 }
126
127 if rest.starts_with("(*") {
129 state.advance(2);
130 let mut depth = 1usize;
131 while let Some(ch) = state.peek() {
132 if ch == '(' && state.peek_next_n(1) == Some('*') {
133 state.advance(2);
134 depth += 1;
135 continue;
136 }
137 if ch == '*' && state.peek_next_n(1) == Some(')') {
138 state.advance(2);
139 depth -= 1;
140 if depth == 0 {
141 break;
142 }
143 continue;
144 }
145 state.advance(ch.len_utf8());
146 }
147 state.add_token(FSharpTokenType::BlockComment, start, state.get_position());
148 return true;
149 }
150 false
151 }
152
153 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
154 let start = state.get_position();
155
156 if state.peek() == Some('@') && state.peek_next_n(1) == Some('"') {
158 state.advance(2); while let Some(ch) = state.peek() {
160 if ch == '"' {
161 state.advance(1);
162 break;
163 }
164 state.advance(ch.len_utf8());
165 }
166 state.add_token(FSharpTokenType::StringLiteral, start, state.get_position());
167 return true;
168 }
169
170 if state.peek() == Some('"') {
172 state.advance(1); while let Some(ch) = state.peek() {
174 if ch == '"' {
175 state.advance(1);
176 break;
177 }
178 if ch == '\\' {
179 state.advance(1); if let Some(escaped) = state.peek() {
181 state.advance(escaped.len_utf8());
182 }
183 }
184 else {
185 state.advance(ch.len_utf8());
186 }
187 }
188 state.add_token(FSharpTokenType::StringLiteral, start, state.get_position());
189 return true;
190 }
191 false
192 }
193
194 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
195 let start = state.get_position();
196
197 if state.peek() == Some('\'') {
198 state.advance(1); if let Some(ch) = state.peek() {
200 if ch == '\\' {
201 state.advance(1); if let Some(escaped) = state.peek() {
203 state.advance(escaped.len_utf8());
204 }
205 }
206 else {
207 state.advance(ch.len_utf8());
208 }
209 }
210 if state.peek() == Some('\'') {
211 state.advance(1); }
213 state.add_token(FSharpTokenType::CharLiteral, start, state.get_position());
214 return true;
215 }
216 false
217 }
218
219 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
220 if !state.current().map_or(false, |c| c.is_ascii_digit()) {
221 return false;
222 }
223
224 let start = state.get_position();
225
226 while state.current().map_or(false, |c| c.is_ascii_digit()) {
228 state.advance(1);
229 }
230
231 if state.current() == Some('.') && state.peek().map_or(false, |c| c.is_ascii_digit()) {
233 state.advance(1); while state.current().map_or(false, |c| c.is_ascii_digit()) {
235 state.advance(1);
236 }
237 state.add_token(FSharpTokenType::FloatLiteral, start, state.get_position());
238 }
239 else {
240 if matches!(state.current(), Some('e') | Some('E')) {
242 state.advance(1);
243 if matches!(state.current(), Some('+') | Some('-')) {
244 state.advance(1);
245 }
246 while state.current().map_or(false, |c| c.is_ascii_digit()) {
247 state.advance(1);
248 }
249 state.add_token(FSharpTokenType::FloatLiteral, start, state.get_position());
250 }
251 else {
252 if state.current().map_or(false, |c| c.is_ascii_alphabetic()) {
254 while state.current().map_or(false, |c| c.is_ascii_alphanumeric()) {
255 state.advance(1);
256 }
257 }
258 state.add_token(FSharpTokenType::IntegerLiteral, start, state.get_position());
259 }
260 }
261
262 true
263 }
264
265 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
266 if !state.current().map_or(false, |c| c.is_ascii_alphabetic() || c == '_') {
267 return false;
268 }
269
270 let start = state.get_position();
271 while state.current().map_or(false, |c| c.is_ascii_alphanumeric() || c == '_') {
272 state.advance(1);
273 }
274
275 let text = state.get_text_in((start..state.get_position()).into());
276 let kind = match text.as_ref() {
277 "abstract" => FSharpTokenType::Abstract,
278 "and" => FSharpTokenType::And,
279 "as" => FSharpTokenType::As,
280 "assert" => FSharpTokenType::Assert,
281 "base" => FSharpTokenType::Base,
282 "begin" => FSharpTokenType::Begin,
283 "class" => FSharpTokenType::Class,
284 "default" => FSharpTokenType::Default,
285 "delegate" => FSharpTokenType::Delegate,
286 "do" => FSharpTokenType::Do,
287 "done" => FSharpTokenType::Done,
288 "downcast" => FSharpTokenType::Downcast,
289 "downto" => FSharpTokenType::Downto,
290 "elif" => FSharpTokenType::Elif,
291 "else" => FSharpTokenType::Else,
292 "end" => FSharpTokenType::End,
293 "exception" => FSharpTokenType::Exception,
294 "extern" => FSharpTokenType::Extern,
295 "false" => FSharpTokenType::False,
296 "finally" => FSharpTokenType::Finally,
297 "for" => FSharpTokenType::For,
298 "fun" => FSharpTokenType::Fun,
299 "function" => FSharpTokenType::Function,
300 "if" => FSharpTokenType::If,
301 "in" => FSharpTokenType::In,
302 "inherit" => FSharpTokenType::Inherit,
303 "inline" => FSharpTokenType::Inline,
304 "interface" => FSharpTokenType::Interface,
305 "internal" => FSharpTokenType::Internal,
306 "lazy" => FSharpTokenType::Lazy,
307 "let" => FSharpTokenType::Let,
308 "match" => FSharpTokenType::Match,
309 "member" => FSharpTokenType::Member,
310 "module" => FSharpTokenType::Module,
311 "mutable" => FSharpTokenType::Mutable,
312 "namespace" => FSharpTokenType::Namespace,
313 "new" => FSharpTokenType::New,
314 "not" => FSharpTokenType::Not,
315 "null" => FSharpTokenType::Null,
316 "of" => FSharpTokenType::Of,
317 "open" => FSharpTokenType::Open,
318 "or" => FSharpTokenType::Or,
319 "override" => FSharpTokenType::Override,
320 "private" => FSharpTokenType::Private,
321 "public" => FSharpTokenType::Public,
322 "rec" => FSharpTokenType::Rec,
323 "return" => FSharpTokenType::Return,
324 "select" => FSharpTokenType::Select,
325 "static" => FSharpTokenType::Static,
326 "struct" => FSharpTokenType::Struct,
327 "then" => FSharpTokenType::Then,
328 "to" => FSharpTokenType::To,
329 "true" => FSharpTokenType::True,
330 "try" => FSharpTokenType::Try,
331 "type" => FSharpTokenType::Type,
332 "upcast" => FSharpTokenType::Upcast,
333 "use" => FSharpTokenType::Use,
334 "val" => FSharpTokenType::Val,
335 "void" => FSharpTokenType::Void,
336 "when" => FSharpTokenType::When,
337 "while" => FSharpTokenType::While,
338 "with" => FSharpTokenType::With,
339 "yield" => FSharpTokenType::Yield,
340 _ => FSharpTokenType::Identifier,
341 };
342
343 state.add_token(kind, start, state.get_position());
344 true
345 }
346
347 fn lex_operator_or_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
348 let current = state.current();
349 if current.is_none() {
350 return false;
351 }
352
353 let start = state.get_position();
354 let c = current.unwrap();
355 let next = state.peek();
356
357 match (c, next) {
359 ('-', Some('>')) => {
360 state.advance(2);
361 state.add_token(FSharpTokenType::Arrow, start, state.get_position());
362 return true;
363 }
364 (':', Some(':')) => {
365 state.advance(2);
366 state.add_token(FSharpTokenType::Cons, start, state.get_position());
367 return true;
368 }
369 ('=', Some('=')) => {
370 state.advance(2);
371 state.add_token(FSharpTokenType::Equal, start, state.get_position());
372 return true;
373 }
374 ('<', Some('=')) => {
375 state.advance(2);
376 state.add_token(FSharpTokenType::LessEqual, start, state.get_position());
377 return true;
378 }
379 ('>', Some('=')) => {
380 state.advance(2);
381 state.add_token(FSharpTokenType::GreaterEqual, start, state.get_position());
382 return true;
383 }
384 ('<', Some('>')) => {
385 state.advance(2);
386 state.add_token(FSharpTokenType::NotEqual, start, state.get_position());
387 return true;
388 }
389 ('|', Some('>')) => {
390 state.advance(2);
391 state.add_token(FSharpTokenType::Pipe, start, state.get_position());
392 return true;
393 }
394 _ => {}
395 }
396
397 let kind = match c {
399 '+' => FSharpTokenType::Plus,
400 '-' => FSharpTokenType::Minus,
401 '*' => FSharpTokenType::Star,
402 '/' => FSharpTokenType::Slash,
403 '%' => FSharpTokenType::Percent,
404 '=' => FSharpTokenType::Equal,
405 '<' => FSharpTokenType::LessThan,
406 '>' => FSharpTokenType::GreaterThan,
407 '&' => FSharpTokenType::Ampersand,
408 '|' => FSharpTokenType::Pipe,
409 '^' => FSharpTokenType::Caret,
410 '!' => FSharpTokenType::Not,
411 '?' => FSharpTokenType::Question,
412 ':' => FSharpTokenType::Colon,
413 ';' => FSharpTokenType::Semicolon,
414 ',' => FSharpTokenType::Comma,
415 '.' => FSharpTokenType::Dot,
416 '(' => FSharpTokenType::LeftParen,
417 ')' => FSharpTokenType::RightParen,
418 '[' => FSharpTokenType::LeftBracket,
419 ']' => FSharpTokenType::RightBracket,
420 '{' => FSharpTokenType::LeftBrace,
421 '}' => FSharpTokenType::RightBrace,
422 _ => return false,
423 };
424
425 state.advance(1);
426 state.add_token(kind, start, state.get_position());
427 true
428 }
429}