1use crate::{kind::NginxSyntaxKind, language::NginxLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, NginxLanguage>;
5
6#[derive(Clone)]
7pub struct NginxLexer<'config> {
8 config: &'config NginxLanguage,
9}
10
11impl<'config> NginxLexer<'config> {
12 pub fn new(config: &'config NginxLanguage) -> Self {
13 Self { config }
14 }
15
16 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
18 let start_pos = state.get_position();
19
20 while let Some(ch) = state.peek() {
21 if ch == ' ' || ch == '\t' {
22 state.advance(ch.len_utf8());
23 }
24 else {
25 break;
26 }
27 }
28
29 if state.get_position() > start_pos {
30 state.add_token(NginxSyntaxKind::Whitespace, start_pos, state.get_position());
31 true
32 }
33 else {
34 false
35 }
36 }
37
38 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
40 let start_pos = state.get_position();
41
42 if let Some('\n') = state.peek() {
43 state.advance(1);
44 state.add_token(NginxSyntaxKind::Newline, start_pos, state.get_position());
45 true
46 }
47 else if let Some('\r') = state.peek() {
48 state.advance(1);
49 if let Some('\n') = state.peek() {
50 state.advance(1);
51 }
52 state.add_token(NginxSyntaxKind::Newline, start_pos, state.get_position());
53 true
54 }
55 else {
56 false
57 }
58 }
59
60 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
62 let start_pos = state.get_position();
63
64 if let Some('#') = state.peek() {
65 state.advance(1);
66
67 while let Some(ch) = state.peek() {
69 if ch == '\n' || ch == '\r' {
70 break;
71 }
72 state.advance(ch.len_utf8());
73 }
74
75 state.add_token(NginxSyntaxKind::CommentToken, start_pos, state.get_position());
76 true
77 }
78 else {
79 false
80 }
81 }
82
83 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
85 let start_pos = state.get_position();
86
87 if let Some(quote) = state.peek() {
88 if quote != '"' && quote != '\'' {
89 return false;
90 }
91
92 state.advance(1); while let Some(ch) = state.peek() {
94 if ch == quote {
95 state.advance(1); break;
97 }
98 else if ch == '\\' {
99 state.advance(1); if state.peek().is_some() {
101 state.advance(state.peek().unwrap().len_utf8());
102 }
103 }
104 else {
105 state.advance(ch.len_utf8());
106 }
107 }
108
109 state.add_token(NginxSyntaxKind::String, start_pos, state.get_position());
110 true
111 }
112 else {
113 false
114 }
115 }
116
117 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
119 let start_pos = state.get_position();
120
121 if let Some(ch) = state.peek() {
122 if !ch.is_ascii_digit() {
123 return false;
124 }
125
126 while let Some(ch) = state.peek() {
128 if ch.is_ascii_digit() {
129 state.advance(ch.len_utf8());
130 }
131 else {
132 break;
133 }
134 }
135
136 if let Some('.') = state.peek() {
138 if let Some(next_ch) = state.peek_next_n(1) {
139 if next_ch.is_ascii_digit() {
140 state.advance(1); while let Some(ch) = state.peek() {
142 if ch.is_ascii_digit() {
143 state.advance(ch.len_utf8());
144 }
145 else {
146 break;
147 }
148 }
149 }
150 }
151 }
152
153 if let Some(ch) = state.peek() {
155 if ch.is_ascii_alphabetic() {
156 while let Some(ch) = state.peek() {
157 if ch.is_ascii_alphabetic() {
158 state.advance(ch.len_utf8());
159 }
160 else {
161 break;
162 }
163 }
164 }
165 }
166
167 state.add_token(NginxSyntaxKind::Number, start_pos, state.get_position());
168 true
169 }
170 else {
171 false
172 }
173 }
174
175 fn lex_path<S: Source>(&self, state: &mut State<S>) -> bool {
177 let start_pos = state.get_position();
178
179 if let Some('/') = state.peek() {
180 state.advance(1);
181
182 while let Some(ch) = state.peek() {
183 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' || ch == '*' {
184 state.advance(ch.len_utf8());
185 }
186 else {
187 break;
188 }
189 }
190
191 state.add_token(NginxSyntaxKind::Path, start_pos, state.get_position());
192 true
193 }
194 else {
195 false
196 }
197 }
198
199 fn lex_url<S: Source>(&self, state: &mut State<S>) -> bool {
201 let start_pos = state.get_position();
202
203 let text = state.get_text_from(state.get_position());
205 if text.starts_with("http://") || text.starts_with("https://") {
206 let scheme_len = if text.starts_with("https://") { 8 } else { 7 };
207 state.advance(scheme_len);
208
209 while let Some(ch) = state.peek() {
210 if ch.is_ascii_alphanumeric()
211 || ch == '.'
212 || ch == '/'
213 || ch == ':'
214 || ch == '-'
215 || ch == '_'
216 || ch == '?'
217 || ch == '&'
218 || ch == '='
219 {
220 state.advance(ch.len_utf8());
221 }
222 else {
223 break;
224 }
225 }
226
227 state.add_token(NginxSyntaxKind::Url, start_pos, state.get_position());
228 true
229 }
230 else {
231 false
232 }
233 }
234
235 fn lex_identifier<S: Source>(&self, state: &mut State<S>) -> bool {
237 let start_pos = state.get_position();
238
239 if let Some(ch) = state.peek() {
240 if !ch.is_ascii_alphabetic() && ch != '_' {
241 return false;
242 }
243
244 while let Some(ch) = state.peek() {
246 if ch.is_ascii_alphanumeric() || ch == '_' {
247 state.advance(ch.len_utf8());
248 }
249 else {
250 break;
251 }
252 }
253
254 let text = state.get_text_in((start_pos..state.get_position()).into());
256 let token_kind = match text {
257 "server" => NginxSyntaxKind::ServerKeyword,
258 "location" => NginxSyntaxKind::LocationKeyword,
259 "upstream" => NginxSyntaxKind::UpstreamKeyword,
260 "http" => NginxSyntaxKind::HttpKeyword,
261 "events" => NginxSyntaxKind::EventsKeyword,
262 "listen" => NginxSyntaxKind::ListenKeyword,
263 "server_name" => NginxSyntaxKind::ServerNameKeyword,
264 "root" => NginxSyntaxKind::RootKeyword,
265 "index" => NginxSyntaxKind::IndexKeyword,
266 "proxy_pass" => NginxSyntaxKind::ProxyPassKeyword,
267 _ => NginxSyntaxKind::Identifier,
268 };
269
270 state.add_token(token_kind, start_pos, state.get_position());
271 true
272 }
273 else {
274 false
275 }
276 }
277
278 fn lex_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
280 let start_pos = state.get_position();
281
282 if let Some(ch) = state.peek() {
283 let token_kind = match ch {
284 '{' => NginxSyntaxKind::LeftBrace,
285 '}' => NginxSyntaxKind::RightBrace,
286 ';' => NginxSyntaxKind::Semicolon,
287 _ => return false,
288 };
289
290 state.advance(ch.len_utf8());
291 state.add_token(token_kind, start_pos, state.get_position());
292 true
293 }
294 else {
295 false
296 }
297 }
298}
299
300impl<'config> Lexer<NginxLanguage> for NginxLexer<'config> {
301 fn lex_incremental(
302 &self,
303 source: impl Source,
304 changed: usize,
305 cache: IncrementalCache<NginxLanguage>,
306 ) -> LexOutput<NginxLanguage> {
307 let mut state = LexerState::new_with_cache(source, changed, cache);
308
309 while state.not_at_end() {
310 if self.skip_whitespace(&mut state) {
312 continue;
313 }
314
315 if self.lex_newline(&mut state) {
316 continue;
317 }
318
319 if self.lex_comment(&mut state) {
320 continue;
321 }
322
323 if self.lex_string(&mut state) {
324 continue;
325 }
326
327 if self.lex_url(&mut state) {
328 continue;
329 }
330
331 if self.lex_path(&mut state) {
332 continue;
333 }
334
335 if self.lex_number(&mut state) {
336 continue;
337 }
338
339 if self.lex_identifier(&mut state) {
340 continue;
341 }
342
343 if self.lex_delimiter(&mut state) {
344 continue;
345 }
346
347 let start_pos = state.get_position();
349 if let Some(ch) = state.peek() {
350 state.advance(ch.len_utf8());
351 state.add_token(NginxSyntaxKind::Error, start_pos, state.get_position());
352 }
353 }
354
355 let eof_pos = state.get_position();
357 state.add_token(NginxSyntaxKind::Eof, eof_pos, eof_pos);
358
359 state.finish(Ok(()))
360 }
361}