1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::FSharpLanguage, lexer::token_type::FSharpTokenType};
5use oak_core::{
6 Lexer, LexerCache, LexerState, OakError, Range, Source, TextEdit,
7 lexer::{LexOutput, WhitespaceConfig},
8};
9use std::sync::LazyLock;
10
11pub(crate) type State<'a, S> = LexerState<'a, S, FSharpLanguage>;
12
13static FS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
14
15#[derive(Clone)]
17pub struct FSharpLexer<'config> {
18 config: &'config FSharpLanguage,
19}
20
21impl<'config> Lexer<FSharpLanguage> for FSharpLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<FSharpLanguage>) -> LexOutput<FSharpLanguage> {
23 let mut state = LexerState::new(source);
24 let result = self.run(&mut state);
25 if result.is_ok() {
26 state.add_eof();
27 }
28 state.finish_with_cache(result, cache)
29 }
30}
31
32impl<'config> FSharpLexer<'config> {
33 pub fn new(config: &'config FSharpLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
40 while state.not_at_end() {
41 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.skip_comment(state) {
48 continue;
49 }
50
51 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_char_literal(state) {
58 continue;
59 }
60
61 if self.lex_number(state) {
63 continue;
64 }
65
66 if self.lex_identifier_or_keyword(state) {
68 continue;
69 }
70
71 if self.lex_operator_or_punctuation(state) {
73 continue;
74 }
75
76 let start = state.get_position();
78 if let Some(ch) = state.peek() {
79 state.advance(ch.len_utf8());
80 state.add_token(FSharpTokenType::Error, start, state.get_position())
81 }
82 }
83
84 Ok(())
85 }
86
87 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
89 let start = state.get_position();
90 if let Some(ch) = state.peek() {
91 if ch == '\n' || ch == '\r' {
92 state.advance(ch.len_utf8());
93 state.add_token(FSharpTokenType::Newline, start, state.get_position());
94 return true;
95 }
96 if ch.is_whitespace() {
97 state.advance(ch.len_utf8());
98 while let Some(next) = state.peek() {
99 if next == '\n' || next == '\r' || !next.is_whitespace() {
100 break;
101 }
102 state.advance(next.len_utf8());
103 }
104 state.add_token(FSharpTokenType::Whitespace, start, state.get_position());
105 return true;
106 }
107 }
108 false
109 }
110
111 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
112 let start = state.get_position();
113 let rest = state.rest();
114
115 if rest.starts_with("//") {
117 state.advance(2);
118 while let Some(ch) = state.peek() {
119 if ch == '\n' || ch == '\r' {
120 break;
121 }
122 state.advance(ch.len_utf8());
123 }
124 state.add_token(FSharpTokenType::LineComment, start, state.get_position());
125 return true;
126 }
127
128 if rest.starts_with("(*") {
130 state.advance(2);
131 let mut depth = 1usize;
132 while let Some(ch) = state.peek() {
133 if ch == '(' && state.peek_next_n(1) == Some('*') {
134 state.advance(2);
135 depth += 1;
136 continue;
137 }
138 if ch == '*' && state.peek_next_n(1) == Some(')') {
139 state.advance(2);
140 depth -= 1;
141 if depth == 0 {
142 break;
143 }
144 continue;
145 }
146 state.advance(ch.len_utf8());
147 }
148 state.add_token(FSharpTokenType::BlockComment, start, state.get_position());
149 return true;
150 }
151 false
152 }
153
154 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
155 let start = state.get_position();
156
157 if state.peek() == Some('@') && state.peek_next_n(1) == Some('"') {
159 state.advance(2); while let Some(ch) = state.peek() {
161 if ch == '"' {
162 state.advance(1);
163 break;
164 }
165 state.advance(ch.len_utf8());
166 }
167 state.add_token(FSharpTokenType::StringLiteral, start, state.get_position());
168 return true;
169 }
170
171 if state.peek() == Some('"') {
173 state.advance(1); while let Some(ch) = state.peek() {
175 if ch == '"' {
176 state.advance(1);
177 break;
178 }
179 if ch == '\\' {
180 state.advance(1); if let Some(escaped) = state.peek() {
182 state.advance(escaped.len_utf8());
183 }
184 }
185 else {
186 state.advance(ch.len_utf8());
187 }
188 }
189 state.add_token(FSharpTokenType::StringLiteral, start, state.get_position());
190 return true;
191 }
192 false
193 }
194
195 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
196 let start = state.get_position();
197
198 if state.peek() == Some('\'') {
199 state.advance(1); if let Some(ch) = state.peek() {
201 if ch == '\\' {
202 state.advance(1); if let Some(escaped) = state.peek() {
204 state.advance(escaped.len_utf8());
205 }
206 }
207 else {
208 state.advance(ch.len_utf8());
209 }
210 }
211 if state.peek() == Some('\'') {
212 state.advance(1); }
214 state.add_token(FSharpTokenType::CharLiteral, start, state.get_position());
215 return true;
216 }
217 false
218 }
219
220 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
221 if !state.current().map_or(false, |c| c.is_ascii_digit()) {
222 return false;
223 }
224
225 let start = state.get_position();
226
227 while state.current().map_or(false, |c| c.is_ascii_digit()) {
229 state.advance(1);
230 }
231
232 if state.current() == Some('.') && state.peek().map_or(false, |c| c.is_ascii_digit()) {
234 state.advance(1); while state.current().map_or(false, |c| c.is_ascii_digit()) {
236 state.advance(1);
237 }
238 state.add_token(FSharpTokenType::FloatLiteral, start, state.get_position());
239 }
240 else {
241 if matches!(state.current(), Some('e') | Some('E')) {
243 state.advance(1);
244 if matches!(state.current(), Some('+') | Some('-')) {
245 state.advance(1);
246 }
247 while state.current().map_or(false, |c| c.is_ascii_digit()) {
248 state.advance(1);
249 }
250 state.add_token(FSharpTokenType::FloatLiteral, start, state.get_position());
251 }
252 else {
253 if state.current().map_or(false, |c| c.is_ascii_alphabetic()) {
255 while state.current().map_or(false, |c| c.is_ascii_alphanumeric()) {
256 state.advance(1);
257 }
258 }
259 state.add_token(FSharpTokenType::IntegerLiteral, start, state.get_position());
260 }
261 }
262
263 true
264 }
265
266 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
267 if !state.current().map_or(false, |c| c.is_ascii_alphabetic() || c == '_') {
268 return false;
269 }
270
271 let start = state.get_position();
272 while state.current().map_or(false, |c| c.is_ascii_alphanumeric() || c == '_') {
273 state.advance(1);
274 }
275
276 let text = state.get_text_in((start..state.get_position()).into());
277 let kind = match text.as_ref() {
278 "abstract" => FSharpTokenType::Abstract,
279 "and" => FSharpTokenType::And,
280 "as" => FSharpTokenType::As,
281 "assert" => FSharpTokenType::Assert,
282 "base" => FSharpTokenType::Base,
283 "begin" => FSharpTokenType::Begin,
284 "class" => FSharpTokenType::Class,
285 "default" => FSharpTokenType::Default,
286 "delegate" => FSharpTokenType::Delegate,
287 "do" => FSharpTokenType::Do,
288 "done" => FSharpTokenType::Done,
289 "downcast" => FSharpTokenType::Downcast,
290 "downto" => FSharpTokenType::Downto,
291 "elif" => FSharpTokenType::Elif,
292 "else" => FSharpTokenType::Else,
293 "end" => FSharpTokenType::End,
294 "exception" => FSharpTokenType::Exception,
295 "extern" => FSharpTokenType::Extern,
296 "false" => FSharpTokenType::False,
297 "finally" => FSharpTokenType::Finally,
298 "for" => FSharpTokenType::For,
299 "fun" => FSharpTokenType::Fun,
300 "function" => FSharpTokenType::Function,
301 "if" => FSharpTokenType::If,
302 "in" => FSharpTokenType::In,
303 "inherit" => FSharpTokenType::Inherit,
304 "inline" => FSharpTokenType::Inline,
305 "interface" => FSharpTokenType::Interface,
306 "internal" => FSharpTokenType::Internal,
307 "lazy" => FSharpTokenType::Lazy,
308 "let" => FSharpTokenType::Let,
309 "match" => FSharpTokenType::Match,
310 "member" => FSharpTokenType::Member,
311 "module" => FSharpTokenType::Module,
312 "mutable" => FSharpTokenType::Mutable,
313 "namespace" => FSharpTokenType::Namespace,
314 "new" => FSharpTokenType::New,
315 "not" => FSharpTokenType::Not,
316 "null" => FSharpTokenType::Null,
317 "of" => FSharpTokenType::Of,
318 "open" => FSharpTokenType::Open,
319 "or" => FSharpTokenType::Or,
320 "override" => FSharpTokenType::Override,
321 "private" => FSharpTokenType::Private,
322 "public" => FSharpTokenType::Public,
323 "rec" => FSharpTokenType::Rec,
324 "return" => FSharpTokenType::Return,
325 "select" => FSharpTokenType::Select,
326 "static" => FSharpTokenType::Static,
327 "struct" => FSharpTokenType::Struct,
328 "then" => FSharpTokenType::Then,
329 "to" => FSharpTokenType::To,
330 "true" => FSharpTokenType::True,
331 "try" => FSharpTokenType::Try,
332 "type" => FSharpTokenType::Type,
333 "upcast" => FSharpTokenType::Upcast,
334 "use" => FSharpTokenType::Use,
335 "val" => FSharpTokenType::Val,
336 "void" => FSharpTokenType::Void,
337 "when" => FSharpTokenType::When,
338 "while" => FSharpTokenType::While,
339 "with" => FSharpTokenType::With,
340 "yield" => FSharpTokenType::Yield,
341 _ => FSharpTokenType::Identifier,
342 };
343
344 state.add_token(kind, start, state.get_position());
345 true
346 }
347
348 fn lex_operator_or_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
349 let current = state.current();
350 if current.is_none() {
351 return false;
352 }
353
354 let start = state.get_position();
355 let c = current.unwrap();
356 let next = state.peek();
357
358 match (c, next) {
360 ('-', Some('>')) => {
361 state.advance(2);
362 state.add_token(FSharpTokenType::Arrow, start, state.get_position());
363 return true;
364 }
365 (':', Some(':')) => {
366 state.advance(2);
367 state.add_token(FSharpTokenType::Cons, start, state.get_position());
368 return true;
369 }
370 ('=', Some('=')) => {
371 state.advance(2);
372 state.add_token(FSharpTokenType::Equal, start, state.get_position());
373 return true;
374 }
375 ('<', Some('=')) => {
376 state.advance(2);
377 state.add_token(FSharpTokenType::LessEqual, start, state.get_position());
378 return true;
379 }
380 ('>', Some('=')) => {
381 state.advance(2);
382 state.add_token(FSharpTokenType::GreaterEqual, start, state.get_position());
383 return true;
384 }
385 ('<', Some('>')) => {
386 state.advance(2);
387 state.add_token(FSharpTokenType::NotEqual, start, state.get_position());
388 return true;
389 }
390 ('|', Some('>')) => {
391 state.advance(2);
392 state.add_token(FSharpTokenType::Pipe, start, state.get_position());
393 return true;
394 }
395 _ => {}
396 }
397
398 let kind = match c {
400 '+' => FSharpTokenType::Plus,
401 '-' => FSharpTokenType::Minus,
402 '*' => FSharpTokenType::Star,
403 '/' => FSharpTokenType::Slash,
404 '%' => FSharpTokenType::Percent,
405 '=' => FSharpTokenType::Equal,
406 '<' => FSharpTokenType::LessThan,
407 '>' => FSharpTokenType::GreaterThan,
408 '&' => FSharpTokenType::Ampersand,
409 '|' => FSharpTokenType::Pipe,
410 '^' => FSharpTokenType::Caret,
411 '!' => FSharpTokenType::Not,
412 '?' => FSharpTokenType::Question,
413 ':' => FSharpTokenType::Colon,
414 ';' => FSharpTokenType::Semicolon,
415 ',' => FSharpTokenType::Comma,
416 '.' => FSharpTokenType::Dot,
417 '(' => FSharpTokenType::LeftParen,
418 ')' => FSharpTokenType::RightParen,
419 '[' => FSharpTokenType::LeftBracket,
420 ']' => FSharpTokenType::RightBracket,
421 '{' => FSharpTokenType::LeftBrace,
422 '}' => FSharpTokenType::RightBrace,
423 _ => return false,
424 };
425
426 state.advance(1);
427 state.add_token(kind, start, state.get_position());
428 true
429 }
430}