1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::NixLanguage, lexer::token_type::NixTokenType};
6use oak_core::{
7 Source,
8 lexer::{LexOutput, Lexer, LexerCache, LexerState},
9 source::TextEdit,
10};
11
12pub(crate) type State<'a, S> = LexerState<'a, S, NixLanguage>;
13
14#[derive(Clone, Debug)]
16pub struct NixLexer<'config> {
17 config: &'config NixLanguage,
18}
19
20impl<'config> NixLexer<'config> {
21 pub fn new(config: &'config NixLanguage) -> Self {
23 Self { config }
24 }
25}
26
27impl NixLexer<'_> {
28 pub fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
30 while state.not_at_end() {
31 if self.skip_whitespace(state) {
32 continue;
33 }
34 if self.lex_newline(state) {
35 continue;
36 }
37 if self.lex_comment(state) {
38 continue;
39 }
40 if self.lex_string(state) {
41 continue;
42 }
43 if self.lex_number(state) {
44 continue;
45 }
46 if self.lex_identifier(state) {
47 continue;
48 }
49 if self.lex_operator(state) {
50 continue;
51 }
52
53 let start_pos = state.get_position();
55 if let Some(ch) = state.peek() {
56 state.advance(ch.len_utf8());
57 state.add_token(NixTokenType::Error, start_pos, state.get_position());
58 }
59 }
60 Ok(())
61 }
62}
63
64impl<'config> Lexer<NixLanguage> for NixLexer<'config> {
65 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<NixLanguage>) -> LexOutput<NixLanguage> {
66 let mut state = LexerState::new(source);
67 let result = self.run(&mut state);
68 if result.is_ok() {
69 state.add_eof();
70 }
71 state.finish_with_cache(result, cache)
72 }
73}
74
75impl NixLexer<'_> {
76 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78 let start_pos = state.get_position();
79
80 while let Some(ch) = state.peek() {
81 if ch == ' ' || ch == '\t' {
82 state.advance(ch.len_utf8());
83 }
84 else {
85 break;
86 }
87 }
88
89 if state.get_position() > start_pos {
90 state.add_token(NixTokenType::Whitespace, start_pos, state.get_position());
91 true
92 }
93 else {
94 false
95 }
96 }
97
98 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
100 let start_pos = state.get_position();
101
102 if let Some('\n') = state.peek() {
103 state.advance(1);
104 state.add_token(NixTokenType::Newline, start_pos, state.get_position());
105 true
106 }
107 else if let Some('\r') = state.peek() {
108 state.advance(1);
109 if let Some('\n') = state.peek() {
110 state.advance(1);
111 }
112 state.add_token(NixTokenType::Newline, start_pos, state.get_position());
113 true
114 }
115 else {
116 false
117 }
118 }
119
120 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122 let start_pos = state.get_position();
123
124 if let Some('#') = state.peek() {
125 state.advance(1);
126
127 while let Some(ch) = state.peek() {
129 if ch == '\n' || ch == '\r' {
130 break;
131 }
132 state.advance(ch.len_utf8());
133 }
134
135 state.add_token(NixTokenType::Comment, start_pos, state.get_position());
136 true
137 }
138 else {
139 false
140 }
141 }
142
143 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
145 let start_pos = state.get_position();
146
147 if let Some('"') = state.peek() {
148 state.advance(1);
149
150 while let Some(ch) = state.peek() {
151 if ch == '"' {
152 state.advance(1);
153 break;
154 }
155 else if ch == '\\' {
156 state.advance(1);
157 if let Some(_) = state.peek() {
158 state.advance(1);
159 }
160 }
161 else {
162 state.advance(ch.len_utf8());
163 }
164 }
165
166 state.add_token(NixTokenType::String, start_pos, state.get_position());
167 true
168 }
169 else {
170 false
171 }
172 }
173
174 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176 let start_pos = state.get_position();
177
178 if let Some(ch) = state.peek() {
179 if ch.is_ascii_digit() {
180 state.advance(1);
181 while let Some(ch) = state.peek() {
182 if ch.is_ascii_digit() || ch == '.' {
183 state.advance(1);
184 }
185 else {
186 break;
187 }
188 }
189 state.add_token(NixTokenType::Number, start_pos, state.get_position());
190 return true;
191 }
192 }
193 false
194 }
195
196 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
198 let start_pos = state.get_position();
199
200 if let Some(ch) = state.peek() {
201 if ch.is_alphabetic() || ch == '_' {
202 state.advance(ch.len_utf8());
203 while let Some(ch) = state.peek() {
204 if ch.is_alphanumeric() || ch == '_' || ch == '-' || ch == '\'' {
205 state.advance(ch.len_utf8());
206 }
207 else {
208 break;
209 }
210 }
211
212 let text = state.get_text_in((start_pos..state.get_position()).into());
213 let kind = match &*text {
214 "let" => NixTokenType::Let,
215 "in" => NixTokenType::In,
216 "if" => NixTokenType::If,
217 "then" => NixTokenType::Then,
218 "else" => NixTokenType::Else,
219 "with" => NixTokenType::With,
220 "inherit" => NixTokenType::Inherit,
221 "rec" => NixTokenType::Rec,
222 "import" => NixTokenType::Import,
223 "true" | "false" => NixTokenType::Boolean,
224 "null" => NixTokenType::Null,
225 _ => NixTokenType::Identifier,
226 };
227
228 state.add_token(kind, start_pos, state.get_position());
229 true
230 }
231 else {
232 false
233 }
234 }
235 else {
236 false
237 }
238 }
239
240 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
242 let start_pos = state.get_position();
243
244 if let Some(ch) = state.peek() {
245 let kind = match ch {
246 '+' => {
247 state.advance(1);
248 if let Some('+') = state.peek() {
249 state.advance(1);
250 NixTokenType::Concatenation
251 }
252 else {
253 NixTokenType::Plus
254 }
255 }
256 '-' => {
257 state.advance(1);
258 if let Some('>') = state.peek() {
259 state.advance(1);
260 NixTokenType::Implication
261 }
262 else {
263 NixTokenType::Minus
264 }
265 }
266 '*' => {
267 state.advance(1);
268 NixTokenType::Star
269 }
270 '/' => {
271 state.advance(1);
272 if let Some('/') = state.peek() {
273 state.advance(1);
274 NixTokenType::Update
275 }
276 else {
277 NixTokenType::Slash
278 }
279 }
280 '%' => {
281 state.advance(1);
282 NixTokenType::Percent
283 }
284 '=' => {
285 state.advance(1);
286 if let Some('=') = state.peek() {
287 state.advance(1);
288 NixTokenType::Equal
289 }
290 else {
291 NixTokenType::Assign
292 }
293 }
294 '!' => {
295 state.advance(1);
296 if let Some('=') = state.peek() {
297 state.advance(1);
298 NixTokenType::NotEqual
299 }
300 else {
301 return false;
302 }
303 }
304 '<' => {
305 state.advance(1);
306 if let Some('=') = state.peek() {
307 state.advance(1);
308 NixTokenType::LessEqual
309 }
310 else {
311 NixTokenType::Less
312 }
313 }
314 '>' => {
315 state.advance(1);
316 if let Some('=') = state.peek() {
317 state.advance(1);
318 NixTokenType::GreaterEqual
319 }
320 else {
321 NixTokenType::Greater
322 }
323 }
324 '&' => {
325 state.advance(1);
326 if let Some('&') = state.peek() {
327 state.advance(1);
328 NixTokenType::LogicalAnd
329 }
330 else {
331 return false;
332 }
333 }
334 '|' => {
335 state.advance(1);
336 if let Some('|') = state.peek() {
337 state.advance(1);
338 NixTokenType::LogicalOr
339 }
340 else {
341 return false;
342 }
343 }
344 '?' => {
345 state.advance(1);
346 NixTokenType::Question
347 }
348 '(' => {
349 state.advance(1);
350 NixTokenType::LeftParen
351 }
352 ')' => {
353 state.advance(1);
354 NixTokenType::RightParen
355 }
356 '{' => {
357 state.advance(1);
358 NixTokenType::LeftBrace
359 }
360 '}' => {
361 state.advance(1);
362 NixTokenType::RightBrace
363 }
364 '[' => {
365 state.advance(1);
366 NixTokenType::LeftBracket
367 }
368 ']' => {
369 state.advance(1);
370 NixTokenType::RightBracket
371 }
372 ';' => {
373 state.advance(1);
374 NixTokenType::Semicolon
375 }
376 ':' => {
377 state.advance(1);
378 NixTokenType::Colon
379 }
380 ',' => {
381 state.advance(1);
382 NixTokenType::Comma
383 }
384 '.' => {
385 state.advance(1);
386 NixTokenType::Dot
387 }
388 '@' => {
389 state.advance(1);
390 NixTokenType::At
391 }
392 '$' => {
393 state.advance(1);
394 NixTokenType::Dollar
395 }
396 '#' => {
397 state.advance(1);
398 NixTokenType::Hash
399 }
400 _ => return false,
401 };
402
403 state.add_token(kind, start_pos, state.get_position());
404 true
405 }
406 else {
407 false
408 }
409 }
410}