1use crate::{kind::ProtobufSyntaxKind, language::ProtobufLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::LexOutput,
5 source::{Source, TextEdit},
6};
7
8type State<'a, S> = LexerState<'a, S, ProtobufLanguage>;
9
10#[derive(Clone)]
11pub struct ProtobufLexer<'config> {
12 _config: &'config ProtobufLanguage,
13}
14
15impl<'config> ProtobufLexer<'config> {
16 pub fn new(config: &'config ProtobufLanguage) -> Self {
17 Self { _config: config }
18 }
19
20 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
21 while state.not_at_end() {
22 let safe_point = state.get_position();
23
24 if self.skip_whitespace(state) {
25 continue;
26 }
27
28 if self.lex_newline(state) {
29 continue;
30 }
31
32 if self.lex_comment(state) {
33 continue;
34 }
35
36 if self.lex_string_literal(state) {
37 continue;
38 }
39
40 if self.lex_number_literal(state) {
41 continue;
42 }
43
44 if self.lex_identifier_or_keyword(state) {
45 continue;
46 }
47
48 if self.lex_operators_and_delimiters(state) {
49 continue;
50 }
51
52 if let Some(ch) = state.peek() {
54 let start_pos = state.get_position();
55 state.advance(ch.len_utf8());
56 state.add_token(ProtobufSyntaxKind::Error, start_pos, state.get_position());
57 }
58 else {
59 break;
61 }
62
63 state.advance_if_dead_lock(safe_point);
64 }
65
66 let pos = state.get_position();
68 state.add_token(ProtobufSyntaxKind::Eof, pos, pos);
69
70 Ok(())
71 }
72
73 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
74 let start_pos = state.get_position();
75
76 while let Some(ch) = state.peek() {
77 if ch == ' ' || ch == '\t' {
78 state.advance(ch.len_utf8());
79 }
80 else {
81 break;
82 }
83 }
84
85 if state.get_position() > start_pos {
86 state.add_token(ProtobufSyntaxKind::Whitespace, start_pos, state.get_position());
87 true
88 }
89 else {
90 false
91 }
92 }
93
94 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
95 let start_pos = state.get_position();
96
97 if let Some('\n') = state.peek() {
98 state.advance(1);
99 state.add_token(ProtobufSyntaxKind::Newline, start_pos, state.get_position());
100 true
101 }
102 else if let Some('\r') = state.peek() {
103 state.advance(1);
104 if let Some('\n') = state.peek() {
105 state.advance(1);
106 }
107 state.add_token(ProtobufSyntaxKind::Newline, start_pos, state.get_position());
108 true
109 }
110 else {
111 false
112 }
113 }
114
115 fn lex_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
116 let start_pos = state.get_position();
117
118 if let Some('/') = state.peek() {
119 state.advance(1);
120 if let Some('/') = state.peek() {
121 state.advance(1);
122 while let Some(ch) = state.peek() {
124 if ch == '\n' || ch == '\r' {
125 break;
126 }
127 state.advance(ch.len_utf8());
128 }
129 state.add_token(ProtobufSyntaxKind::Comment, start_pos, state.get_position());
130 true
131 }
132 else if let Some('*') = state.peek() {
133 state.advance(1);
134 while let Some(ch) = state.peek() {
136 if ch == '*' {
137 state.advance(1);
138 if let Some('/') = state.peek() {
139 state.advance(1);
140 break;
141 }
142 }
143 else {
144 state.advance(ch.len_utf8());
145 }
146 }
147 state.add_token(ProtobufSyntaxKind::Comment, start_pos, state.get_position());
148 true
149 }
150 else {
151 state.set_position(start_pos);
153 false
154 }
155 }
156 else {
157 false
158 }
159 }
160
161 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
162 let start_pos = state.get_position();
163
164 if let Some(quote_char) = state.peek() {
165 if quote_char == '"' || quote_char == '\'' {
166 state.advance(1); let mut escaped = false;
169 while let Some(ch) = state.peek() {
170 if escaped {
171 escaped = false;
172 state.advance(ch.len_utf8());
173 }
174 else if ch == '\\' {
175 escaped = true;
176 state.advance(1);
177 }
178 else if ch == quote_char {
179 state.advance(1); break;
181 }
182 else if ch == '\n' || ch == '\r' {
183 break;
185 }
186 else {
187 state.advance(ch.len_utf8());
188 }
189 }
190
191 state.add_token(ProtobufSyntaxKind::StringLiteral, start_pos, state.get_position());
192 true
193 }
194 else {
195 false
196 }
197 }
198 else {
199 false
200 }
201 }
202
203 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
204 if let Some(ch) = state.peek() {
205 if ch.is_ascii_digit() || (ch == '-' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit())) {
206 let start_pos = state.get_position();
207
208 if ch == '-' {
210 state.advance(1);
211 }
212
213 while let Some(ch) = state.peek() {
215 if ch.is_ascii_digit() {
216 state.advance(1);
217 }
218 else {
219 break;
220 }
221 }
222
223 if let Some('.') = state.peek() {
225 if state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
226 state.advance(1);
227 while let Some(ch) = state.peek() {
229 if ch.is_ascii_digit() {
230 state.advance(1);
231 }
232 else {
233 break;
234 }
235 }
236 }
237 }
238
239 if let Some(ch) = state.peek() {
241 if ch == 'e' || ch == 'E' {
242 state.advance(1);
243 if let Some(ch) = state.peek() {
244 if ch == '+' || ch == '-' {
245 state.advance(1);
246 }
247 }
248 while let Some(ch) = state.peek() {
249 if ch.is_ascii_digit() {
250 state.advance(1);
251 }
252 else {
253 break;
254 }
255 }
256 }
257 }
258
259 state.add_token(ProtobufSyntaxKind::NumberLiteral, start_pos, state.get_position());
260 true
261 }
262 else {
263 false
264 }
265 }
266 else {
267 false
268 }
269 }
270
271 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
272 if let Some(ch) = state.peek() {
273 if ch.is_ascii_alphabetic() || ch == '_' {
274 let start_pos = state.get_position();
275 let mut text = String::new();
276
277 while let Some(ch) = state.peek() {
279 if ch.is_alphanumeric() || ch == '_' {
280 text.push(ch);
281 state.advance(ch.len_utf8());
282 }
283 else {
284 break;
285 }
286 }
287
288 let kind = match text.as_str() {
290 "kind" => ProtobufSyntaxKind::Syntax,
291 "package" => ProtobufSyntaxKind::Package,
292 "import" => ProtobufSyntaxKind::Import,
293 "option" => ProtobufSyntaxKind::Option,
294 "message" => ProtobufSyntaxKind::Message,
295 "enum" => ProtobufSyntaxKind::Enum,
296 "service" => ProtobufSyntaxKind::Service,
297 "rpc" => ProtobufSyntaxKind::Rpc,
298 "returns" => ProtobufSyntaxKind::Returns,
299 "stream" => ProtobufSyntaxKind::Stream,
300 "repeated" => ProtobufSyntaxKind::Repeated,
301 "optional" => ProtobufSyntaxKind::Optional,
302 "required" => ProtobufSyntaxKind::Required,
303 "oneof" => ProtobufSyntaxKind::Oneof,
304 "map" => ProtobufSyntaxKind::Map,
305 "reserved" => ProtobufSyntaxKind::Reserved,
306 "extensions" => ProtobufSyntaxKind::Extensions,
307 "extend" => ProtobufSyntaxKind::Extend,
308 "group" => ProtobufSyntaxKind::Group,
309 "public" => ProtobufSyntaxKind::Public,
310 "weak" => ProtobufSyntaxKind::Weak,
311 "double" => ProtobufSyntaxKind::Double,
313 "float" => ProtobufSyntaxKind::Float,
314 "int32" => ProtobufSyntaxKind::Int32,
315 "int64" => ProtobufSyntaxKind::Int64,
316 "uint32" => ProtobufSyntaxKind::Uint32,
317 "uint64" => ProtobufSyntaxKind::Uint64,
318 "sint32" => ProtobufSyntaxKind::Sint32,
319 "sint64" => ProtobufSyntaxKind::Sint64,
320 "fixed32" => ProtobufSyntaxKind::Fixed32,
321 "fixed64" => ProtobufSyntaxKind::Fixed64,
322 "sfixed32" => ProtobufSyntaxKind::Sfixed32,
323 "sfixed64" => ProtobufSyntaxKind::Sfixed64,
324 "bool" => ProtobufSyntaxKind::Bool,
325 "string" => ProtobufSyntaxKind::String,
326 "bytes" => ProtobufSyntaxKind::Bytes,
327 "true" | "false" => ProtobufSyntaxKind::BooleanLiteral,
329 _ => ProtobufSyntaxKind::Identifier,
330 };
331
332 state.add_token(kind, start_pos, state.get_position());
333 true
334 }
335 else {
336 false
337 }
338 }
339 else {
340 false
341 }
342 }
343
344 fn lex_operators_and_delimiters<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
345 if let Some(ch) = state.peek() {
346 let start_pos = state.get_position();
347
348 let kind = match ch {
349 '=' => {
350 state.advance(1);
351 ProtobufSyntaxKind::Assign
352 }
353 ';' => {
354 state.advance(1);
355 ProtobufSyntaxKind::Semicolon
356 }
357 ',' => {
358 state.advance(1);
359 ProtobufSyntaxKind::Comma
360 }
361 '.' => {
362 state.advance(1);
363 ProtobufSyntaxKind::Dot
364 }
365 '(' => {
366 state.advance(1);
367 ProtobufSyntaxKind::LeftParen
368 }
369 ')' => {
370 state.advance(1);
371 ProtobufSyntaxKind::RightParen
372 }
373 '[' => {
374 state.advance(1);
375 ProtobufSyntaxKind::LeftBracket
376 }
377 ']' => {
378 state.advance(1);
379 ProtobufSyntaxKind::RightBracket
380 }
381 '{' => {
382 state.advance(1);
383 ProtobufSyntaxKind::LeftBrace
384 }
385 '}' => {
386 state.advance(1);
387 ProtobufSyntaxKind::RightBrace
388 }
389 '<' => {
390 state.advance(1);
391 ProtobufSyntaxKind::LeftAngle
392 }
393 '>' => {
394 state.advance(1);
395 ProtobufSyntaxKind::RightAngle
396 }
397 _ => return false,
398 };
399
400 state.add_token(kind, start_pos, state.get_position());
401 true
402 }
403 else {
404 false
405 }
406 }
407}
408
409impl<'config> Lexer<ProtobufLanguage> for ProtobufLexer<'config> {
410 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ProtobufLanguage>) -> LexOutput<ProtobufLanguage> {
411 let mut state = State::new(source);
412 let result = self.run(&mut state);
413 state.finish_with_cache(result, cache)
414 }
415}