1use std::borrow::Cow::Borrowed;
3use std::borrow::{Borrow, Cow};
4use std::cell::Cell;
5
6use std::rc::Rc;
7
8use crate::char_stream::CharStream;
9use crate::error_listener::{ConsoleErrorListener, ErrorListener};
10use crate::errors::ANTLRError;
11use crate::int_stream::IntStream;
12use crate::lexer_atn_simulator::{ILexerATNSimulator, LexerATNSimulator};
13
14use crate::recognizer::{Actions, Recognizer};
15use crate::rule_context::EmptyRuleNode;
16use crate::token::TOKEN_INVALID_TYPE;
17use crate::token_factory::TokenFactory;
18use crate::token_source::TokenSource;
19use std::ops::{Deref, DerefMut};
20
21pub trait Lexer<'input, 'arena, Input, TF>:
23 TokenSource<'input, 'arena, TF> + Recognizer<'input, 'arena>
24where
25 'input: 'arena,
26 Input: CharStream<'input>,
27 TF: TokenFactory<'input, 'arena> + 'arena,
28{
29 fn input(&mut self) -> &mut Input;
32 fn set_channel(&mut self, v: i32);
38
39 fn push_mode(&mut self, m: usize);
42
43 fn pop_mode(&mut self) -> Option<usize>;
45
46 fn set_type(&mut self, t: i32);
49
50 fn set_mode(&mut self, m: usize);
52
53 fn more(&mut self);
55
56 fn skip(&mut self);
58
59 #[doc(hidden)]
60 fn reset(&mut self);
61
62 #[doc(hidden)]
63 fn get_interpreter(&self) -> Option<&LexerATNSimulator>;
64}
65
66pub trait LexerRecog<'input, 'arena, TF, R>: Actions<'input, 'arena, R> + Sized + 'static
70where
71 'input: 'arena,
72 TF: TokenFactory<'input, 'arena> + 'arena,
73 R: Recognizer<'input, 'arena>,
74{
75 fn before_emit(_lexer: &mut R) {}
77
78 fn get_rule_names(&self) -> &'static [&'static str];
79
80 fn get_literal_names(&self) -> &[Option<&str>];
81
82 fn get_symbolic_names(&self) -> &[Option<&str>];
83
84 fn get_grammar_file_name(&self) -> &'static str;
85
86 fn get_atn_simulator(&self) -> LexerATNSimulator;
87}
88
89#[allow(missing_docs)]
93pub struct BaseLexer<'input, 'arena, Ext, Input, TF>
94where
95 'input: 'arena,
96 Ext: LexerRecog<'input, 'arena, TF, Self> + 'static,
97 Input: CharStream<'input>,
98 TF: TokenFactory<'input, 'arena> + 'arena,
99{
100 pub interpreter: Option<Box<LexerATNSimulator>>,
102 pub input: Option<Input>,
104 recog: Ext,
105
106 factory: TF,
107
108 error_listeners: Vec<Box<dyn ErrorListener<'input, 'arena, Self>>>,
109
110 pub token_start_char_index: isize,
111 pub token_start_line: u32,
112 pub token_start_column: i32,
113 current_pos: Rc<LexerPosition>,
114 pub token_type: i32,
116 pub token: Option<&'arena mut TF::Tok>,
118 hit_eof: bool,
119 pub channel: i32,
121 pub mode_stack: Vec<usize>,
123 pub mode: usize,
125 pub text: Option<String>,
127}
128
129#[derive(Debug)]
130pub(crate) struct LexerPosition {
131 pub(crate) line: Cell<u32>,
132 pub(crate) char_position_in_line: Cell<i32>,
133}
134
135impl<'input, 'arena, Ext, Input, TF> Deref for BaseLexer<'input, 'arena, Ext, Input, TF>
136where
137 Ext: LexerRecog<'input, 'arena, TF, Self> + 'static,
138 Input: CharStream<'input>,
139 TF: TokenFactory<'input, 'arena> + 'arena,
140{
141 type Target = Ext;
142 fn deref(&self) -> &Self::Target {
143 &self.recog
144 }
145}
146
147impl<'input, 'arena, Ext, Input, TF> DerefMut for BaseLexer<'input, 'arena, Ext, Input, TF>
148where
149 Ext: LexerRecog<'input, 'arena, TF, Self> + 'static,
150 Input: CharStream<'input>,
151 TF: TokenFactory<'input, 'arena> + 'arena,
152{
153 fn deref_mut(&mut self) -> &mut Self::Target {
154 &mut self.recog
155 }
156}
157
158impl<'input, 'arena, Ext, Input, TF> Recognizer<'input, 'arena>
159 for BaseLexer<'input, 'arena, Ext, Input, TF>
160where
161 'input: 'arena,
162 Ext: LexerRecog<'input, 'arena, TF, Self> + 'static,
163 Input: CharStream<'input>,
164 TF: TokenFactory<'input, 'arena> + 'arena,
165{
166 type Node = EmptyRuleNode<'input, 'arena>;
167
168 fn sempred(
169 &mut self,
170 localctx: Option<&'arena EmptyRuleNode<'input, 'arena>>,
171 rule_index: i32,
172 action_index: i32,
173 ) -> bool {
174 Ext::sempred(localctx, rule_index, action_index, self)
175 }
176
177 fn action(
178 &mut self,
179 localctx: Option<&'arena EmptyRuleNode<'input, 'arena>>,
180 rule_index: i32,
181 action_index: i32,
182 ) {
183 Ext::action(localctx, rule_index, action_index, self)
184 }
185}
186
187pub const LEXER_DEFAULT_MODE: usize = 0;
189pub const LEXER_MORE: i32 = -2;
192pub const LEXER_SKIP: i32 = -3;
196
197#[doc(inline)]
198pub use super::token::TOKEN_DEFAULT_CHANNEL as LEXER_DEFAULT_TOKEN_CHANNEL;
199
200#[doc(inline)]
201pub use super::token::TOKEN_HIDDEN_CHANNEL as LEXER_HIDDEN;
202
203pub(crate) const LEXER_MIN_CHAR_VALUE: i32 = 0x0000;
204pub(crate) const LEXER_MAX_CHAR_VALUE: i32 = 0x10FFFF;
205
206impl<'input, 'arena, Ext, Input, TF> BaseLexer<'input, 'arena, Ext, Input, TF>
207where
208 'input: 'arena,
209 Ext: LexerRecog<'input, 'arena, TF, Self> + 'static,
210 Input: CharStream<'input>,
211 TF: TokenFactory<'input, 'arena> + 'arena,
212{
213 fn emit_token(&mut self, token: &'arena mut TF::Tok) {
214 self.token = Some(token);
215 }
216
217 fn emit(&mut self) {
218 Ext::before_emit(self);
219 let stop = self.get_char_index() - 1;
220 let token = self.factory.create(
221 Some(self.input.as_mut().unwrap()),
222 self.token_type,
223 self.text.take(),
224 self.channel,
225 self.token_start_char_index,
226 stop,
227 self.token_start_line,
228 self.token_start_column,
229 );
230 self.emit_token(token);
231 }
232
233 fn emit_eof(&mut self) {
234 let token = self.factory.create(
235 None::<&mut Input>,
236 super::int_stream::EOF,
237 None,
238 LEXER_DEFAULT_TOKEN_CHANNEL,
239 self.get_char_index(),
240 self.get_char_index() - 1,
241 self.get_line(),
242 self.get_char_position_in_line(),
243 );
244 self.emit_token(token)
245 }
246
247 pub fn get_char_index(&self) -> isize {
249 self.input.as_ref().unwrap().index()
250 }
251
252 pub fn get_text<'a>(&'a self) -> Cow<'a, str>
254 where
255 'input: 'a,
256 {
257 self.text
258 .as_ref()
259 .map(|it| Borrowed(it.borrow()))
260 .unwrap_or_else(|| {
262 self.input
263 .as_ref()
264 .unwrap()
265 .get_text(self.token_start_char_index, self.get_char_index() - 1)
266 })
267 }
268
269 pub fn set_text(&mut self, _text: impl Into<String>) {
271 self.text = Some(_text.into());
272 }
273
274 pub fn add_error_listener(&mut self, listener: Box<dyn ErrorListener<'input, 'arena, Self>>) {
280 self.error_listeners.push(listener);
281 }
282
283 pub fn remove_error_listeners(&mut self) {
285 self.error_listeners.clear();
286 }
287
288 pub fn new_base_lexer(input: Input, recog: Ext, factory: TF) -> Self {
290 let mut lexer = Self {
291 interpreter: Some(Box::new(recog.get_atn_simulator())),
292 input: Some(input),
293 recog,
294 factory,
295 error_listeners: vec![Box::new(ConsoleErrorListener {})],
296 token_start_char_index: 0,
297 token_start_line: 0,
298 token_start_column: 0,
299 current_pos: Rc::new(LexerPosition {
300 line: Cell::new(1),
301 char_position_in_line: Cell::new(0),
302 }),
303 token_type: super::token::TOKEN_INVALID_TYPE,
304 text: None,
305 token: None,
306 hit_eof: false,
307 channel: super::token::TOKEN_DEFAULT_CHANNEL,
308 mode_stack: Vec::new(),
310 mode: self::LEXER_DEFAULT_MODE,
311 };
312 let pos = lexer.current_pos.clone();
313 lexer.interpreter.as_mut().unwrap().current_pos = pos;
314 lexer
315 }
316}
317
318impl<'input, 'arena, L, Input, TF> TokenSource<'input, 'arena, TF>
319 for BaseLexer<'input, 'arena, L, Input, TF>
320where
321 'input: 'arena,
322 L: LexerRecog<'input, 'arena, TF, Self> + 'static,
323 Input: CharStream<'input>,
324 TF: TokenFactory<'input, 'arena> + 'arena,
325{
326 #[inline]
327 #[allow(unused_labels)]
328 fn next_token(&mut self) -> &'arena mut TF::Tok {
329 assert!(self.input.is_some());
330
331 let _marker = self.input().mark();
332 'outer: loop {
333 if self.hit_eof {
334 self.emit_eof();
335 break;
336 }
337 self.token = None;
338 self.channel = LEXER_DEFAULT_TOKEN_CHANNEL;
339 self.token_start_column = self
340 .interpreter
341 .as_ref()
342 .unwrap()
343 .get_char_position_in_line();
344 self.token_start_line = self.interpreter.as_ref().unwrap().get_line();
345 self.text = None;
346 let index = self.input().index();
347 self.token_start_char_index = index;
348
349 'inner: loop {
350 self.token_type = TOKEN_INVALID_TYPE;
351 let mut interpreter = self.interpreter.take().unwrap();
353 let result = interpreter.match_token(self.mode, self);
355 self.interpreter = Some(interpreter);
356
357 let ttype = result.unwrap_or_else(|err| {
358 notify_listeners(self, &err);
360 self.interpreter
361 .as_mut()
362 .unwrap()
363 .recover(err, self.input.as_mut().unwrap());
364 LEXER_SKIP
365 });
366 if self.input().la(1) == super::int_stream::EOF {
369 self.hit_eof = true;
370 }
371
372 if self.token_type == TOKEN_INVALID_TYPE {
373 self.token_type = ttype;
374 }
375
376 if self.token_type == LEXER_SKIP {
377 continue 'outer;
378 }
379
380 if self.token_type != LEXER_MORE {
381 break;
382 }
383 }
384
385 if self.token.is_none() {
386 self.emit();
387 break;
388 }
389 }
390 self.input().release(_marker);
391 self.token.take().unwrap()
392 }
393
394 fn get_line(&self) -> u32 {
395 self.current_pos.line.get()
396 }
397
398 fn get_char_position_in_line(&self) -> i32 {
399 self.current_pos.char_position_in_line.get()
400 }
401
402 fn get_input_stream(&mut self) -> Option<&mut dyn IntStream> {
403 self.input.as_mut().map(|x| x as _)
404 }
405
406 fn get_source_name(&self) -> String {
407 self.input
408 .as_ref()
409 .map(|it| it.get_source_name())
410 .unwrap_or("<none>".to_string())
411 }
412
413 fn get_token_factory(&self) -> &TF {
418 &self.factory
419 }
420
421 fn get_dfa_string(&self) -> String {
422 self.get_interpreter()
423 .unwrap()
424 .get_dfa_for_mode(LEXER_DEFAULT_MODE)
425 .to_lexer_string()
426 }
427}
428
429#[cold]
430#[inline(never)]
431fn notify_listeners<'input, 'arena, L, Input, TF>(
432 lexer: &mut BaseLexer<'input, 'arena, L, Input, TF>,
433 e: &ANTLRError,
434) where
435 'input: 'arena,
436 L: LexerRecog<'input, 'arena, TF, BaseLexer<'input, 'arena, L, Input, TF>> + 'static,
437 Input: CharStream<'input>,
438 TF: TokenFactory<'input, 'arena> + 'arena,
439{
440 let inner = lexer
441 .input
442 .as_ref()
443 .unwrap()
444 .get_text(lexer.token_start_char_index, lexer.get_char_index());
445 let text = format!("token recognition error at: '{}'", inner);
446 for listener in lexer.error_listeners.iter() {
447 listener.syntax_error(
448 lexer,
449 None,
450 lexer.token_start_line,
451 lexer.token_start_column,
452 &text,
453 Some(e),
454 )
455 }
456}
457
458impl<'input, 'arena, L, Input, TF> Lexer<'input, 'arena, Input, TF>
459 for BaseLexer<'input, 'arena, L, Input, TF>
460where
461 'input: 'arena,
462 L: LexerRecog<'input, 'arena, TF, Self> + 'static,
463 Input: CharStream<'input>,
464 TF: TokenFactory<'input, 'arena> + 'arena,
465{
466 fn input(&mut self) -> &mut Input {
467 self.input.as_mut().unwrap()
468 }
469
470 fn set_channel(&mut self, v: i32) {
471 self.channel = v;
472 }
473
474 fn push_mode(&mut self, m: usize) {
475 self.mode_stack.push(self.mode);
476 self.mode = m;
477 }
478
479 fn pop_mode(&mut self) -> Option<usize> {
480 self.mode_stack.pop().inspect(|&mode| {
481 self.mode = mode;
482 })
483 }
484
485 fn set_type(&mut self, t: i32) {
486 self.token_type = t;
487 }
488
489 fn set_mode(&mut self, m: usize) {
490 self.mode = m;
491 }
492
493 fn more(&mut self) {
494 self.set_type(LEXER_MORE)
495 }
496
497 fn skip(&mut self) {
498 self.set_type(LEXER_SKIP)
499 }
500
501 fn reset(&mut self) {
502 unimplemented!()
503 }
504
505 fn get_interpreter(&self) -> Option<&LexerATNSimulator> {
506 self.interpreter.as_deref()
507 }
508}