1use crate::{kind::NginxSyntaxKind, language::NginxLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, NginxLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct NginxLexer<'config> {
8 _config: &'config NginxLanguage,
9}
10
11impl<'config> NginxLexer<'config> {
12 pub fn new(config: &'config NginxLanguage) -> Self {
13 Self { _config: config }
14 }
15
16 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
18 let start_pos = state.get_position();
19
20 while let Some(ch) = state.peek() {
21 if ch == ' ' || ch == '\t' {
22 state.advance(ch.len_utf8());
23 }
24 else {
25 break;
26 }
27 }
28
29 if state.get_position() > start_pos {
30 state.add_token(NginxSyntaxKind::Whitespace, start_pos, state.get_position());
31 true
32 }
33 else {
34 false
35 }
36 }
37
38 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
40 let start_pos = state.get_position();
41
42 if let Some('\n') = state.peek() {
43 state.advance(1);
44 state.add_token(NginxSyntaxKind::Newline, start_pos, state.get_position());
45 true
46 }
47 else if let Some('\r') = state.peek() {
48 state.advance(1);
49 if let Some('\n') = state.peek() {
50 state.advance(1);
51 }
52 state.add_token(NginxSyntaxKind::Newline, start_pos, state.get_position());
53 true
54 }
55 else {
56 false
57 }
58 }
59
60 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
62 let start_pos = state.get_position();
63
64 if let Some('#') = state.peek() {
65 state.advance(1);
66
67 while let Some(ch) = state.peek() {
69 if ch == '\n' || ch == '\r' {
70 break;
71 }
72 state.advance(ch.len_utf8());
73 }
74
75 state.add_token(NginxSyntaxKind::CommentToken, start_pos, state.get_position());
76 true
77 }
78 else {
79 false
80 }
81 }
82
83 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85 let start_pos = state.get_position();
86
87 if let Some(quote) = state.peek() {
88 if quote != '"' && quote != '\'' {
89 return false;
90 }
91
92 state.advance(1); while let Some(ch) = state.peek() {
94 if ch == quote {
95 state.advance(1); break;
97 }
98 else if ch == '\\' {
99 state.advance(1); if let Some(c) = state.peek() {
101 state.advance(c.len_utf8());
102 }
103 }
104 else {
105 state.advance(ch.len_utf8());
106 }
107 }
108
109 state.add_token(NginxSyntaxKind::String, start_pos, state.get_position());
110 true
111 }
112 else {
113 false
114 }
115 }
116
117 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
119 let start_pos = state.get_position();
120
121 if let Some(ch) = state.peek() {
122 if !ch.is_ascii_digit() {
123 return false;
124 }
125
126 while let Some(ch) = state.peek() {
128 if ch.is_ascii_digit() {
129 state.advance(ch.len_utf8());
130 }
131 else {
132 break;
133 }
134 }
135
136 if let Some('.') = state.peek() {
138 if let Some(next_ch) = state.peek_next_n(1) {
139 if next_ch.is_ascii_digit() {
140 state.advance(1); while let Some(ch) = state.peek() {
142 if ch.is_ascii_digit() {
143 state.advance(ch.len_utf8());
144 }
145 else {
146 break;
147 }
148 }
149 }
150 }
151 }
152
153 if let Some(ch) = state.peek() {
155 if ch.is_ascii_alphabetic() {
156 while let Some(ch) = state.peek() {
157 if ch.is_ascii_alphabetic() {
158 state.advance(ch.len_utf8());
159 }
160 else {
161 break;
162 }
163 }
164 }
165 }
166
167 state.add_token(NginxSyntaxKind::Number, start_pos, state.get_position());
168 true
169 }
170 else {
171 false
172 }
173 }
174
175 fn lex_path<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
177 let start_pos = state.get_position();
178
179 if let Some('/') = state.peek() {
180 state.advance(1);
181
182 while let Some(ch) = state.peek() {
183 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' || ch == '*' {
184 state.advance(ch.len_utf8());
185 }
186 else {
187 break;
188 }
189 }
190
191 state.add_token(NginxSyntaxKind::Path, start_pos, state.get_position());
192 true
193 }
194 else {
195 false
196 }
197 }
198
199 fn lex_url<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
201 let start_pos = state.get_position();
202
203 if state.starts_with("http://") || state.starts_with("https://") {
205 let scheme_len = if state.starts_with("https://") { 8 } else { 7 };
206 state.advance(scheme_len);
207
208 while let Some(ch) = state.peek() {
209 if ch.is_ascii_alphanumeric() || ch == '.' || ch == '/' || ch == ':' || ch == '-' || ch == '_' || ch == '?' || ch == '&' || ch == '=' {
210 state.advance(ch.len_utf8());
211 }
212 else {
213 break;
214 }
215 }
216
217 state.add_token(NginxSyntaxKind::Url, start_pos, state.get_position());
218 true
219 }
220 else {
221 false
222 }
223 }
224
225 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
227 let start_pos = state.get_position();
228
229 if let Some(ch) = state.peek() {
230 if !ch.is_ascii_alphabetic() && ch != '_' {
231 return false;
232 }
233
234 while let Some(ch) = state.peek() {
236 if ch.is_ascii_alphanumeric() || ch == '_' {
237 state.advance(ch.len_utf8());
238 }
239 else {
240 break;
241 }
242 }
243
244 let end_pos = state.get_position();
246 let text = state.source().get_text_in(oak_core::Range { start: start_pos, end: end_pos });
247 let token_kind = match text.as_ref() {
248 "server" => NginxSyntaxKind::ServerKeyword,
249 "location" => NginxSyntaxKind::LocationKeyword,
250 "upstream" => NginxSyntaxKind::UpstreamKeyword,
251 "http" => NginxSyntaxKind::HttpKeyword,
252 "events" => NginxSyntaxKind::EventsKeyword,
253 "listen" => NginxSyntaxKind::ListenKeyword,
254 "server_name" => NginxSyntaxKind::ServerNameKeyword,
255 "root" => NginxSyntaxKind::RootKeyword,
256 "index" => NginxSyntaxKind::IndexKeyword,
257 "proxy_pass" => NginxSyntaxKind::ProxyPassKeyword,
258 _ => NginxSyntaxKind::Identifier,
259 };
260
261 state.add_token(token_kind, start_pos, end_pos);
262 true
263 }
264 else {
265 false
266 }
267 }
268
269 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
271 let start_pos = state.get_position();
272
273 if let Some(ch) = state.peek() {
274 let token_kind = match ch {
275 '{' => NginxSyntaxKind::LeftBrace,
276 '}' => NginxSyntaxKind::RightBrace,
277 ';' => NginxSyntaxKind::Semicolon,
278 _ => return false,
279 };
280
281 state.advance(ch.len_utf8());
282 state.add_token(token_kind, start_pos, state.get_position());
283 true
284 }
285 else {
286 false
287 }
288 }
289
290 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
291 while state.not_at_end() {
292 let start_pos = state.get_position();
293
294 if self.skip_whitespace(state) {
296 continue;
297 }
298
299 if self.lex_newline(state) {
300 continue;
301 }
302
303 if self.lex_comment(state) {
304 continue;
305 }
306
307 if self.lex_string(state) {
308 continue;
309 }
310
311 if self.lex_url(state) {
312 continue;
313 }
314
315 if self.lex_path(state) {
316 continue;
317 }
318
319 if self.lex_number(state) {
320 continue;
321 }
322
323 if self.lex_identifier(state) {
324 continue;
325 }
326
327 if self.lex_delimiter(state) {
328 continue;
329 }
330
331 state.advance_if_dead_lock(start_pos);
333 if state.get_position() > start_pos {
334 state.add_token(NginxSyntaxKind::Error, start_pos, state.get_position());
335 }
336 }
337 Ok(())
338 }
339}
340
341impl<'config> Lexer<NginxLanguage> for NginxLexer<'config> {
342 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<NginxLanguage>) -> LexOutput<NginxLanguage> {
343 let mut state = LexerState::new(source);
344 let result = self.run(&mut state);
345 if result.is_ok() {
346 state.add_eof();
347 }
348 state.finish_with_cache(result, cache)
349 }
350}