1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::NginxLanguage, lexer::token_type::NginxTokenType};
6use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
7
8pub(crate) type State<'a, S> = LexerState<'a, S, NginxLanguage>;
9
10#[derive(Clone, Debug)]
12pub struct NginxLexer<'config> {
13 config: &'config NginxLanguage,
14}
15
16impl<'config> NginxLexer<'config> {
17 pub fn new(config: &'config NginxLanguage) -> Self {
19 Self { config }
20 }
21
22 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
24 let start_pos = state.get_position();
25
26 while let Some(ch) = state.peek() {
27 if ch == ' ' || ch == '\t' {
28 state.advance(ch.len_utf8());
29 }
30 else {
31 break;
32 }
33 }
34
35 if state.get_position() > start_pos {
36 state.add_token(NginxTokenType::Whitespace, start_pos, state.get_position());
37 true
38 }
39 else {
40 false
41 }
42 }
43
44 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
46 let start_pos = state.get_position();
47
48 if let Some('\n') = state.peek() {
49 state.advance(1);
50 state.add_token(NginxTokenType::Newline, start_pos, state.get_position());
51 true
52 }
53 else if let Some('\r') = state.peek() {
54 state.advance(1);
55 if let Some('\n') = state.peek() {
56 state.advance(1);
57 }
58 state.add_token(NginxTokenType::Newline, start_pos, state.get_position());
59 true
60 }
61 else {
62 false
63 }
64 }
65
66 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
68 let start_pos = state.get_position();
69
70 if let Some('#') = state.peek() {
71 state.advance(1);
72
73 while let Some(ch) = state.peek() {
75 if ch == '\n' || ch == '\r' {
76 break;
77 }
78 state.advance(ch.len_utf8());
79 }
80
81 state.add_token(NginxTokenType::CommentToken, start_pos, state.get_position());
82 true
83 }
84 else {
85 false
86 }
87 }
88
89 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91 let start_pos = state.get_position();
92
93 if let Some(quote) = state.peek() {
94 if quote != '"' && quote != '\'' {
95 return false;
96 }
97
98 state.advance(1); while let Some(ch) = state.peek() {
100 if ch == quote {
101 state.advance(1); break;
103 }
104 else if ch == '\\' {
105 state.advance(1); if let Some(c) = state.peek() {
107 state.advance(c.len_utf8());
108 }
109 }
110 else {
111 state.advance(ch.len_utf8());
112 }
113 }
114
115 state.add_token(NginxTokenType::String, start_pos, state.get_position());
116 true
117 }
118 else {
119 false
120 }
121 }
122
123 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
125 let start_pos = state.get_position();
126
127 if let Some(ch) = state.peek() {
128 if !ch.is_ascii_digit() {
129 return false;
130 }
131
132 while let Some(ch) = state.peek() {
134 if ch.is_ascii_digit() {
135 state.advance(ch.len_utf8());
136 }
137 else {
138 break;
139 }
140 }
141
142 if let Some('.') = state.peek() {
144 if let Some(next_ch) = state.peek_next_n(1) {
145 if next_ch.is_ascii_digit() {
146 state.advance(1); while let Some(ch) = state.peek() {
148 if ch.is_ascii_digit() {
149 state.advance(ch.len_utf8());
150 }
151 else {
152 break;
153 }
154 }
155 }
156 }
157 }
158
159 if let Some(ch) = state.peek() {
161 if ch.is_ascii_alphabetic() {
162 while let Some(ch) = state.peek() {
163 if ch.is_ascii_alphabetic() {
164 state.advance(ch.len_utf8());
165 }
166 else {
167 break;
168 }
169 }
170 }
171 }
172
173 state.add_token(NginxTokenType::Number, start_pos, state.get_position());
174 true
175 }
176 else {
177 false
178 }
179 }
180
181 fn lex_path<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
183 let start_pos = state.get_position();
184
185 if let Some('/') = state.peek() {
186 state.advance(1);
187
188 while let Some(ch) = state.peek() {
189 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' || ch == '*' {
190 state.advance(ch.len_utf8());
191 }
192 else {
193 break;
194 }
195 }
196
197 state.add_token(NginxTokenType::Path, start_pos, state.get_position());
198 true
199 }
200 else {
201 false
202 }
203 }
204
205 fn lex_url<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
207 let start_pos = state.get_position();
208
209 if state.starts_with("http://") || state.starts_with("https://") {
211 let scheme_len = if state.starts_with("https://") { 8 } else { 7 };
212 state.advance(scheme_len);
213
214 while let Some(ch) = state.peek() {
215 if ch.is_ascii_alphanumeric() || ch == '.' || ch == '/' || ch == ':' || ch == '-' || ch == '_' || ch == '?' || ch == '&' || ch == '=' {
216 state.advance(ch.len_utf8());
217 }
218 else {
219 break;
220 }
221 }
222
223 state.add_token(NginxTokenType::Url, start_pos, state.get_position());
224 true
225 }
226 else {
227 false
228 }
229 }
230
231 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
233 let start_pos = state.get_position();
234
235 if let Some(ch) = state.peek() {
236 if !ch.is_ascii_alphanumeric() && ch != '_' && ch != '$' {
237 return false;
238 }
239
240 while let Some(ch) = state.peek() {
242 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
243 state.advance(ch.len_utf8());
244 }
245 else {
246 break;
247 }
248 }
249
250 let end_pos = state.get_position();
252 let text = state.source().get_text_in(oak_core::Range { start: start_pos, end: end_pos });
253 let token_kind = match text.as_ref() {
254 "server" => NginxTokenType::ServerKeyword,
255 "location" => NginxTokenType::LocationKeyword,
256 "upstream" => NginxTokenType::UpstreamKeyword,
257 "http" => NginxTokenType::HttpKeyword,
258 "events" => NginxTokenType::EventsKeyword,
259 "listen" => NginxTokenType::ListenKeyword,
260 "server_name" => NginxTokenType::ServerNameKeyword,
261 "root" => NginxTokenType::RootKeyword,
262 "index" => NginxTokenType::IndexKeyword,
263 "proxy_pass" => NginxTokenType::ProxyPassKeyword,
264 _ => NginxTokenType::Identifier,
265 };
266
267 state.add_token(token_kind, start_pos, end_pos);
268 true
269 }
270 else {
271 false
272 }
273 }
274
275 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
277 let start_pos = state.get_position();
278
279 if let Some(ch) = state.peek() {
280 let token_kind = match ch {
281 '{' => NginxTokenType::LeftBrace,
282 '}' => NginxTokenType::RightBrace,
283 ';' => NginxTokenType::Semicolon,
284 _ => return false,
285 };
286
287 state.advance(ch.len_utf8());
288 state.add_token(token_kind, start_pos, state.get_position());
289 true
290 }
291 else {
292 false
293 }
294 }
295
296 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
297 while state.not_at_end() {
298 let start_pos = state.get_position();
299
300 if self.skip_whitespace(state) {
302 continue;
303 }
304
305 if self.lex_newline(state) {
306 continue;
307 }
308
309 if self.lex_comment(state) {
310 continue;
311 }
312
313 if self.lex_string(state) {
314 continue;
315 }
316
317 if self.lex_url(state) {
318 continue;
319 }
320
321 if self.lex_path(state) {
322 continue;
323 }
324
325 if self.lex_number(state) {
326 continue;
327 }
328
329 if self.lex_identifier(state) {
330 continue;
331 }
332
333 if self.lex_delimiter(state) {
334 continue;
335 }
336
337 state.advance_if_dead_lock(start_pos);
339 if state.get_position() > start_pos {
340 state.add_token(NginxTokenType::Error, start_pos, state.get_position())
341 }
342 }
343 Ok(())
344 }
345}
346
347impl<'config> Lexer<NginxLanguage> for NginxLexer<'config> {
348 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<NginxLanguage>) -> LexOutput<NginxLanguage> {
349 let mut state = LexerState::new(source);
350 let result = self.run(&mut state);
351 if result.is_ok() {
352 state.add_eof()
353 }
354 state.finish_with_cache(result, cache)
355 }
356}