1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::NginxLanguage, lexer::token_type::NginxTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, NginxLanguage>;
8
9#[derive(Clone, Debug)]
10pub struct NginxLexer<'config> {
11 _config: &'config NginxLanguage,
12}
13
14impl<'config> NginxLexer<'config> {
15 pub fn new(config: &'config NginxLanguage) -> Self {
16 Self { _config: config }
17 }
18
19 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
21 let start_pos = state.get_position();
22
23 while let Some(ch) = state.peek() {
24 if ch == ' ' || ch == '\t' {
25 state.advance(ch.len_utf8());
26 }
27 else {
28 break;
29 }
30 }
31
32 if state.get_position() > start_pos {
33 state.add_token(NginxTokenType::Whitespace, start_pos, state.get_position());
34 true
35 }
36 else {
37 false
38 }
39 }
40
41 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
43 let start_pos = state.get_position();
44
45 if let Some('\n') = state.peek() {
46 state.advance(1);
47 state.add_token(NginxTokenType::Newline, start_pos, state.get_position());
48 true
49 }
50 else if let Some('\r') = state.peek() {
51 state.advance(1);
52 if let Some('\n') = state.peek() {
53 state.advance(1);
54 }
55 state.add_token(NginxTokenType::Newline, start_pos, state.get_position());
56 true
57 }
58 else {
59 false
60 }
61 }
62
63 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
65 let start_pos = state.get_position();
66
67 if let Some('#') = state.peek() {
68 state.advance(1);
69
70 while let Some(ch) = state.peek() {
72 if ch == '\n' || ch == '\r' {
73 break;
74 }
75 state.advance(ch.len_utf8());
76 }
77
78 state.add_token(NginxTokenType::CommentToken, start_pos, state.get_position());
79 true
80 }
81 else {
82 false
83 }
84 }
85
86 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 let start_pos = state.get_position();
89
90 if let Some(quote) = state.peek() {
91 if quote != '"' && quote != '\'' {
92 return false;
93 }
94
95 state.advance(1); while let Some(ch) = state.peek() {
97 if ch == quote {
98 state.advance(1); break;
100 }
101 else if ch == '\\' {
102 state.advance(1); if let Some(c) = state.peek() {
104 state.advance(c.len_utf8());
105 }
106 }
107 else {
108 state.advance(ch.len_utf8());
109 }
110 }
111
112 state.add_token(NginxTokenType::String, start_pos, state.get_position());
113 true
114 }
115 else {
116 false
117 }
118 }
119
120 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122 let start_pos = state.get_position();
123
124 if let Some(ch) = state.peek() {
125 if !ch.is_ascii_digit() {
126 return false;
127 }
128
129 while let Some(ch) = state.peek() {
131 if ch.is_ascii_digit() {
132 state.advance(ch.len_utf8());
133 }
134 else {
135 break;
136 }
137 }
138
139 if let Some('.') = state.peek() {
141 if let Some(next_ch) = state.peek_next_n(1) {
142 if next_ch.is_ascii_digit() {
143 state.advance(1); while let Some(ch) = state.peek() {
145 if ch.is_ascii_digit() {
146 state.advance(ch.len_utf8());
147 }
148 else {
149 break;
150 }
151 }
152 }
153 }
154 }
155
156 if let Some(ch) = state.peek() {
158 if ch.is_ascii_alphabetic() {
159 while let Some(ch) = state.peek() {
160 if ch.is_ascii_alphabetic() {
161 state.advance(ch.len_utf8());
162 }
163 else {
164 break;
165 }
166 }
167 }
168 }
169
170 state.add_token(NginxTokenType::Number, start_pos, state.get_position());
171 true
172 }
173 else {
174 false
175 }
176 }
177
178 fn lex_path<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
180 let start_pos = state.get_position();
181
182 if let Some('/') = state.peek() {
183 state.advance(1);
184
185 while let Some(ch) = state.peek() {
186 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' || ch == '*' {
187 state.advance(ch.len_utf8());
188 }
189 else {
190 break;
191 }
192 }
193
194 state.add_token(NginxTokenType::Path, start_pos, state.get_position());
195 true
196 }
197 else {
198 false
199 }
200 }
201
202 fn lex_url<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
204 let start_pos = state.get_position();
205
206 if state.starts_with("http://") || state.starts_with("https://") {
208 let scheme_len = if state.starts_with("https://") { 8 } else { 7 };
209 state.advance(scheme_len);
210
211 while let Some(ch) = state.peek() {
212 if ch.is_ascii_alphanumeric() || ch == '.' || ch == '/' || ch == ':' || ch == '-' || ch == '_' || ch == '?' || ch == '&' || ch == '=' {
213 state.advance(ch.len_utf8());
214 }
215 else {
216 break;
217 }
218 }
219
220 state.add_token(NginxTokenType::Url, start_pos, state.get_position());
221 true
222 }
223 else {
224 false
225 }
226 }
227
228 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
230 let start_pos = state.get_position();
231
232 if let Some(ch) = state.peek() {
233 if !ch.is_ascii_alphanumeric() && ch != '_' && ch != '$' {
234 return false;
235 }
236
237 while let Some(ch) = state.peek() {
239 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
240 state.advance(ch.len_utf8());
241 }
242 else {
243 break;
244 }
245 }
246
247 let end_pos = state.get_position();
249 let text = state.source().get_text_in(oak_core::Range { start: start_pos, end: end_pos });
250 let token_kind = match text.as_ref() {
251 "server" => NginxTokenType::ServerKeyword,
252 "location" => NginxTokenType::LocationKeyword,
253 "upstream" => NginxTokenType::UpstreamKeyword,
254 "http" => NginxTokenType::HttpKeyword,
255 "events" => NginxTokenType::EventsKeyword,
256 "listen" => NginxTokenType::ListenKeyword,
257 "server_name" => NginxTokenType::ServerNameKeyword,
258 "root" => NginxTokenType::RootKeyword,
259 "index" => NginxTokenType::IndexKeyword,
260 "proxy_pass" => NginxTokenType::ProxyPassKeyword,
261 _ => NginxTokenType::Identifier,
262 };
263
264 state.add_token(token_kind, start_pos, end_pos);
265 true
266 }
267 else {
268 false
269 }
270 }
271
272 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
274 let start_pos = state.get_position();
275
276 if let Some(ch) = state.peek() {
277 let token_kind = match ch {
278 '{' => NginxTokenType::LeftBrace,
279 '}' => NginxTokenType::RightBrace,
280 ';' => NginxTokenType::Semicolon,
281 _ => return false,
282 };
283
284 state.advance(ch.len_utf8());
285 state.add_token(token_kind, start_pos, state.get_position());
286 true
287 }
288 else {
289 false
290 }
291 }
292
293 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
294 while state.not_at_end() {
295 let start_pos = state.get_position();
296
297 if self.skip_whitespace(state) {
299 continue;
300 }
301
302 if self.lex_newline(state) {
303 continue;
304 }
305
306 if self.lex_comment(state) {
307 continue;
308 }
309
310 if self.lex_string(state) {
311 continue;
312 }
313
314 if self.lex_url(state) {
315 continue;
316 }
317
318 if self.lex_path(state) {
319 continue;
320 }
321
322 if self.lex_number(state) {
323 continue;
324 }
325
326 if self.lex_identifier(state) {
327 continue;
328 }
329
330 if self.lex_delimiter(state) {
331 continue;
332 }
333
334 state.advance_if_dead_lock(start_pos);
336 if state.get_position() > start_pos {
337 state.add_token(NginxTokenType::Error, start_pos, state.get_position())
338 }
339 }
340 Ok(())
341 }
342}
343
344impl<'config> Lexer<NginxLanguage> for NginxLexer<'config> {
345 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<NginxLanguage>) -> LexOutput<NginxLanguage> {
346 let mut state = LexerState::new(source);
347 let result = self.run(&mut state);
348 if result.is_ok() {
349 state.add_eof()
350 }
351 state.finish_with_cache(result, cache)
352 }
353}