1use crate::{kind::ElixirSyntaxKind, language::ElixirLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState,
4 errors::OakError,
5 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
6 source::Source,
7};
8use std::sync::LazyLock;
9
10type State<'s, S> = LexerState<'s, S, ElixirLanguage>;
11
12static ELIXIR_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
13static ELIXIR_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "#", block_start: "", block_end: "", nested_blocks: false });
14static ELIXIR_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
15static ELIXIR_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: None });
16
17#[derive(Clone)]
18pub struct ElixirLexer<'config> {
19 _config: &'config ElixirLanguage,
20}
21
22impl<'config> Lexer<ElixirLanguage> for ElixirLexer<'config> {
23 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<ElixirLanguage>) -> LexOutput<ElixirLanguage> {
24 let mut state = State::new(source);
25 let result = self.run(&mut state);
26 if result.is_ok() {
27 state.add_eof();
28 }
29 state.finish_with_cache(result, cache)
30 }
31}
32
33impl<'config> ElixirLexer<'config> {
34 pub fn new(config: &'config ElixirLanguage) -> Self {
35 Self { _config: config }
36 }
37
38 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.skip_comment(state) {
47 continue;
48 }
49
50 if self.lex_string_literal(state) {
51 continue;
52 }
53
54 if self.lex_char_literal(state) {
55 continue;
56 }
57
58 if self.lex_sigil(state) {
59 continue;
60 }
61
62 if self.lex_number_literal(state) {
63 continue;
64 }
65
66 if self.lex_identifier_or_keyword(state) {
67 continue;
68 }
69
70 if self.lex_atom(state) {
71 continue;
72 }
73
74 if self.lex_operators(state) {
75 continue;
76 }
77
78 state.advance_if_dead_lock(safe_point);
79 }
80
81 Ok(())
82 }
83
84 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
86 ELIXIR_WHITESPACE.scan(state, ElixirSyntaxKind::Whitespace)
87 }
88
89 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
90 ELIXIR_COMMENT.scan(state, ElixirSyntaxKind::Comment, ElixirSyntaxKind::Comment)
91 }
92
93 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
94 ELIXIR_STRING.scan(state, ElixirSyntaxKind::String)
95 }
96
97 fn lex_char_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
98 ELIXIR_CHAR.scan(state, ElixirSyntaxKind::Character)
99 }
100
101 fn lex_sigil<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
102 let start = state.get_position();
103
104 if state.consume_if_starts_with("~") {
105 if let Some(sigil_type) = state.peek() {
106 if sigil_type.is_alphabetic() {
107 state.advance(sigil_type.len_utf8());
108
109 if let Some(delimiter) = state.peek() {
111 let closing_delimiter = match delimiter {
112 '(' => ')',
113 '[' => ']',
114 '{' => '}',
115 '<' => '>',
116 '/' => '/',
117 '|' => '|',
118 '"' => '"',
119 '\'' => '\'',
120 _ => delimiter,
121 };
122
123 state.advance(delimiter.len_utf8());
124
125 while let Some(ch) = state.peek() {
126 if ch == closing_delimiter {
127 state.advance(ch.len_utf8());
128 break;
129 }
130 state.advance(ch.len_utf8());
131 }
132
133 state.take_while(|c| c.is_alphabetic());
135
136 state.add_token(ElixirSyntaxKind::Sigil, start, state.get_position());
137 return true;
138 }
139 }
140 }
141 }
142 false
143 }
144
145 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
146 let start = state.get_position();
147 let first = match state.peek() {
148 Some(c) => c,
149 None => return false,
150 };
151 if !first.is_ascii_digit() {
152 return false;
153 }
154 let mut is_float = false;
155 if first == '0' {
156 match state.peek_next_n(1) {
157 Some('x') | Some('X') => {
158 state.advance(2);
159 state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
160 }
161 Some('b') | Some('B') => {
162 state.advance(2);
163 state.take_while(|c| c == '0' || c == '1' || c == '_');
164 }
165 Some('o') | Some('O') => {
166 state.advance(2);
167 state.take_while(|c| ('0'..='7').contains(&c) || c == '_');
168 }
169 _ => {
170 state.advance(1);
171 state.take_while(|c| c.is_ascii_digit() || c == '_');
172 }
173 }
174 }
175 else {
176 state.advance(1);
177 state.take_while(|c| c.is_ascii_digit() || c == '_');
178 }
179 if state.peek() == Some('.') {
181 let n1 = state.peek_next_n(1);
182 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
183 is_float = true;
184 state.advance(1); state.take_while(|c| c.is_ascii_digit() || c == '_');
186 }
187 }
188 if let Some(c) = state.peek() {
190 if c == 'e' || c == 'E' {
191 let n1 = state.peek_next_n(1);
192 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
193 is_float = true;
194 state.advance(1);
195 if let Some(sign) = state.peek() {
196 if sign == '+' || sign == '-' {
197 state.advance(1);
198 }
199 }
200 state.take_while(|d| d.is_ascii_digit() || d == '_');
201 }
202 }
203 }
204 state.take_while(|c| c.is_ascii_alphabetic());
206 let end = state.get_position();
207 state.add_token(if is_float { ElixirSyntaxKind::Float } else { ElixirSyntaxKind::Number }, start, end);
208 true
209 }
210
211 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
212 let start = state.get_position();
213
214 if let Some(ch) = state.peek() {
215 if ch.is_alphabetic() || ch == '_' {
216 state.advance(ch.len_utf8());
217 state.take_while(|next_ch| next_ch.is_alphanumeric() || next_ch == '_' || next_ch == '?' || next_ch == '!');
218
219 let text = state.get_text_in((start..state.get_position()).into());
220 let kind = match text.as_ref() {
221 "after" => ElixirSyntaxKind::After,
222 "and" => ElixirSyntaxKind::And,
223 "case" => ElixirSyntaxKind::Case,
224 "catch" => ElixirSyntaxKind::Catch,
225 "cond" => ElixirSyntaxKind::Cond,
226 "def" => ElixirSyntaxKind::Def,
227 "defp" => ElixirSyntaxKind::Defp,
228 "defmodule" => ElixirSyntaxKind::Defmodule,
229 "defstruct" => ElixirSyntaxKind::Defstruct,
230 "defprotocol" => ElixirSyntaxKind::Defprotocol,
231 "defimpl" => ElixirSyntaxKind::Defimpl,
232 "defmacro" => ElixirSyntaxKind::Defmacro,
233 "defmacrop" => ElixirSyntaxKind::Defmacrop,
234 "do" => ElixirSyntaxKind::Do,
235 "else" => ElixirSyntaxKind::Else,
236 "elsif" => ElixirSyntaxKind::Elsif,
237 "end" => ElixirSyntaxKind::End,
238 "false" => ElixirSyntaxKind::False,
239 "fn" => ElixirSyntaxKind::Fn,
240 "if" => ElixirSyntaxKind::If,
241 "in" => ElixirSyntaxKind::In,
242 "not" => ElixirSyntaxKind::Not,
243 "or" => ElixirSyntaxKind::Or,
244 "receive" => ElixirSyntaxKind::Receive,
245 "rescue" => ElixirSyntaxKind::Rescue,
246 "true" => ElixirSyntaxKind::True,
247 "try" => ElixirSyntaxKind::Try,
248 "unless" => ElixirSyntaxKind::Unless,
249 "when" => ElixirSyntaxKind::When,
250 "with" => ElixirSyntaxKind::With,
251 _ => {
252 if text.as_ref().chars().next().unwrap().is_uppercase() {
253 ElixirSyntaxKind::Variable
254 }
255 else {
256 ElixirSyntaxKind::Identifier
257 }
258 }
259 };
260
261 state.add_token(kind, start, state.get_position());
262 return true;
263 }
264 }
265 false
266 }
267
268 fn lex_atom<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
269 let start = state.get_position();
270
271 if state.consume_if_starts_with(":") {
272 if state.consume_if_starts_with("\"") {
274 while let Some(ch) = state.peek() {
275 if ch == '"' {
276 state.advance(1);
277 break;
278 }
279 if state.consume_if_starts_with("\\") {
280 if let Some(escaped) = state.peek() {
281 state.advance(escaped.len_utf8());
282 }
283 }
284 else {
285 state.advance(ch.len_utf8());
286 }
287 }
288 }
289 else if let Some(ch) = state.peek() {
290 if ch.is_alphabetic() || ch == '_' {
291 state.advance(ch.len_utf8());
292 state.take_while(|next_ch| next_ch.is_alphanumeric() || next_ch == '_' || next_ch == '?' || next_ch == '!');
293 }
294 }
295
296 state.add_token(ElixirSyntaxKind::Atom, start, state.get_position());
297 return true;
298 }
299 false
300 }
301
302 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
303 let start = state.get_position();
304
305 let ops = [
307 ("===", ElixirSyntaxKind::EqualEqualEqual),
308 ("!==", ElixirSyntaxKind::NotEqualEqual),
309 ("==", ElixirSyntaxKind::EqualEqual),
310 ("!=", ElixirSyntaxKind::NotEqual),
311 ("<=", ElixirSyntaxKind::LessEqual),
312 (">=", ElixirSyntaxKind::GreaterEqual),
313 ("++", ElixirSyntaxKind::PlusPlus),
314 ("--", ElixirSyntaxKind::MinusMinus),
315 ("**", ElixirSyntaxKind::StarStar),
316 ("<<", ElixirSyntaxKind::LeftShift),
317 (">>", ElixirSyntaxKind::RightShift),
318 ("=~", ElixirSyntaxKind::MatchOp),
319 ("|>", ElixirSyntaxKind::PipeRight),
320 ("||", ElixirSyntaxKind::PipePipe),
321 ("->", ElixirSyntaxKind::Arrow),
322 ];
323
324 for (pattern, kind) in ops {
325 if state.consume_if_starts_with(pattern) {
326 state.add_token(kind, start, state.get_position());
327 return true;
328 }
329 }
330
331 if let Some(ch) = state.peek() {
333 let kind = match ch {
334 '+' => ElixirSyntaxKind::Plus,
335 '-' => ElixirSyntaxKind::Minus,
336 '*' => ElixirSyntaxKind::Star,
337 '/' => ElixirSyntaxKind::Slash,
338 '=' => ElixirSyntaxKind::Equal,
339 '<' => ElixirSyntaxKind::Less,
340 '>' => ElixirSyntaxKind::Greater,
341 '!' => ElixirSyntaxKind::Exclamation,
342 '?' => ElixirSyntaxKind::Question,
343 '&' => ElixirSyntaxKind::Ampersand,
344 '@' => ElixirSyntaxKind::At,
345 '^' => ElixirSyntaxKind::Caret,
346 '~' => ElixirSyntaxKind::Tilde,
347 '|' => ElixirSyntaxKind::Pipe,
348 '#' => ElixirSyntaxKind::Hash,
349 '(' => ElixirSyntaxKind::LeftParen,
350 ')' => ElixirSyntaxKind::RightParen,
351 '{' => ElixirSyntaxKind::LeftBrace,
352 '}' => ElixirSyntaxKind::RightBrace,
353 '[' => ElixirSyntaxKind::LeftBracket,
354 ']' => ElixirSyntaxKind::RightBracket,
355 ',' => ElixirSyntaxKind::Comma,
356 ';' => ElixirSyntaxKind::Semicolon,
357 '.' => ElixirSyntaxKind::Dot,
358 ':' => ElixirSyntaxKind::Colon,
359 '\n' => ElixirSyntaxKind::Newline,
360 _ => return false,
361 };
362
363 state.advance(ch.len_utf8());
364 state.add_token(kind, start, state.get_position());
365 return true;
366 }
367
368 false
369 }
370}