1use std::borrow::Cow::Borrowed;
3use std::borrow::{Borrow, Cow};
4use std::cell::{Cell, RefCell};
5
6use std::rc::Rc;
7
8use crate::char_stream::{CharStream, InputData};
9use crate::error_listener::{ConsoleErrorListener, ErrorListener};
10use crate::errors::ANTLRError;
11use crate::int_stream::IntStream;
12use crate::lexer_atn_simulator::{ILexerATNSimulator, LexerATNSimulator};
13use crate::parser::ParserNodeType;
14
15use crate::recognizer::{Actions, Recognizer};
16use crate::rule_context::EmptyContextType;
17use crate::token::TOKEN_INVALID_TYPE;
18use crate::token_factory::{CommonTokenFactory, TokenAware, TokenFactory};
19use crate::token_source::TokenSource;
20use std::ops::{Deref, DerefMut};
21
22pub trait Lexer<'input>:
24 TokenSource<'input>
25 + Recognizer<'input, Node = EmptyContextType<'input, <Self as TokenAware<'input>>::TF>>
26{
27 type Input: IntStream;
29 fn input(&mut self) -> &mut Self::Input;
32 fn set_channel(&mut self, v: isize);
38
39 fn push_mode(&mut self, m: usize);
42
43 fn pop_mode(&mut self) -> Option<usize>;
45
46 fn set_type(&mut self, t: isize);
49
50 fn set_mode(&mut self, m: usize);
52
53 fn more(&mut self);
55
56 fn skip(&mut self);
58
59 #[doc(hidden)]
60 fn reset(&mut self);
61
62 #[doc(hidden)]
63 fn get_interpreter(&self) -> Option<&LexerATNSimulator>;
64}
65
66pub trait LexerRecog<'a, T: Recognizer<'a>>: Actions<'a, T> + Sized + 'static {
70 fn before_emit(_lexer: &mut T) {}
72}
73
74#[allow(missing_docs)]
78pub struct BaseLexer<
79 'input,
80 T: LexerRecog<'input, Self> + 'static,
81 Input: CharStream<TF::From>,
82 TF: TokenFactory<'input> = CommonTokenFactory,
83> {
84 pub interpreter: Option<Box<LexerATNSimulator>>,
86 pub input: Option<Input>,
88 recog: T,
89
90 factory: &'input TF,
91
92 error_listeners: RefCell<Vec<Box<dyn ErrorListener<'input, Self>>>>,
93
94 pub token_start_char_index: isize,
95 pub token_start_line: isize,
96 pub token_start_column: isize,
97 current_pos: Rc<LexerPosition>,
98 pub token_type: isize,
100 pub token: Option<TF::Tok>,
102 hit_eof: bool,
103 pub channel: isize,
105 pub mode_stack: Vec<usize>,
107 pub mode: usize,
109 pub text: Option<<TF::Data as ToOwned>::Owned>,
111}
112
113#[derive(Debug)]
114pub(crate) struct LexerPosition {
115 pub(crate) line: Cell<isize>,
116 pub(crate) char_position_in_line: Cell<isize>,
117}
118
119impl<'input, T, Input, TF> Deref for BaseLexer<'input, T, Input, TF>
120where
121 T: LexerRecog<'input, Self> + 'static,
122 Input: CharStream<TF::From>,
123 TF: TokenFactory<'input>,
124{
125 type Target = T;
126
127 fn deref(&self) -> &Self::Target {
128 &self.recog
129 }
130}
131
132impl<'input, T, Input, TF> DerefMut for BaseLexer<'input, T, Input, TF>
133where
134 T: LexerRecog<'input, Self> + 'static,
135 Input: CharStream<TF::From>,
136 TF: TokenFactory<'input>,
137{
138 fn deref_mut(&mut self) -> &mut Self::Target {
139 &mut self.recog
140 }
141}
142
143impl<'input, T, Input, TF> Recognizer<'input> for BaseLexer<'input, T, Input, TF>
144where
145 T: LexerRecog<'input, Self> + 'static,
146 Input: CharStream<TF::From>,
147 TF: TokenFactory<'input>,
148{
149 type Node = EmptyContextType<'input, TF>;
150
151 fn sempred(
152 &mut self,
153 _localctx: Option<&<Self::Node as ParserNodeType<'input>>::Type>,
154 rule_index: isize,
155 action_index: isize,
156 ) -> bool {
157 <T as Actions<'input, Self>>::sempred(_localctx, rule_index, action_index, self)
158 }
159
160 fn action(
161 &mut self,
162 _localctx: Option<&<Self::Node as ParserNodeType<'input>>::Type>,
163 rule_index: isize,
164 action_index: isize,
165 ) {
166 <T as Actions<'input, Self>>::action(_localctx, rule_index, action_index, self)
167 }
168}
169
170pub const LEXER_DEFAULT_MODE: usize = 0;
172pub const LEXER_MORE: isize = -2;
175pub const LEXER_SKIP: isize = -3;
179
180#[doc(inline)]
181pub use super::token::TOKEN_DEFAULT_CHANNEL as LEXER_DEFAULT_TOKEN_CHANNEL;
182
183#[doc(inline)]
184pub use super::token::TOKEN_HIDDEN_CHANNEL as LEXER_HIDDEN;
185
186pub(crate) const LEXER_MIN_CHAR_VALUE: isize = 0x0000;
187pub(crate) const LEXER_MAX_CHAR_VALUE: isize = 0x10FFFF;
188
189impl<'input, T, Input, TF> BaseLexer<'input, T, Input, TF>
190where
191 T: LexerRecog<'input, Self> + 'static,
192 Input: CharStream<TF::From>,
193 TF: TokenFactory<'input>,
194{
195 fn emit_token(&mut self, token: TF::Tok) {
196 self.token = Some(token);
197 }
198
199 fn emit(&mut self) {
200 <T as LexerRecog<Self>>::before_emit(self);
201 let stop = self.get_char_index() - 1;
202 let token = self.factory.create(
203 Some(self.input.as_mut().unwrap()),
204 self.token_type,
205 self.text.take(),
206 self.channel,
207 self.token_start_char_index,
208 stop,
209 self.token_start_line,
210 self.token_start_column,
211 );
212 self.emit_token(token);
213 }
214
215 fn emit_eof(&mut self) {
216 let token = self.factory.create(
217 None::<&mut Input>,
218 super::int_stream::EOF,
219 None,
220 LEXER_DEFAULT_TOKEN_CHANNEL,
221 self.get_char_index(),
222 self.get_char_index() - 1,
223 self.get_line(),
224 self.get_char_position_in_line(),
225 );
226 self.emit_token(token)
227 }
228
229 pub fn get_char_index(&self) -> isize {
231 self.input.as_ref().unwrap().index()
232 }
233
234 pub fn get_text<'a>(&'a self) -> Cow<'a, TF::Data>
236 where
237 'input: 'a,
238 {
239 self.text
240 .as_ref()
241 .map(|it| Borrowed(it.borrow()))
242 .unwrap_or_else(|| {
244 let text = self
245 .input
246 .as_ref()
247 .unwrap()
248 .get_text(self.token_start_char_index, self.get_char_index() - 1);
249 TF::get_data(text)
250 })
251 }
252
253 pub fn set_text(&mut self, _text: <TF::Data as ToOwned>::Owned) {
255 self.text = Some(_text);
256 }
257
258 pub fn add_error_listener(&mut self, listener: Box<dyn ErrorListener<'input, Self>>) {
264 self.error_listeners.borrow_mut().push(listener);
265 }
266
267 pub fn remove_error_listeners(&mut self) {
269 self.error_listeners.borrow_mut().clear();
270 }
271
272 pub fn new_base_lexer(
274 input: Input,
275 interpreter: LexerATNSimulator,
276 recog: T,
277 factory: &'input TF,
278 ) -> Self {
279 let mut lexer = Self {
280 interpreter: Some(Box::new(interpreter)),
281 input: Some(input),
282 recog,
283 factory,
284 error_listeners: RefCell::new(vec![Box::new(ConsoleErrorListener {})]),
285 token_start_char_index: 0,
286 token_start_line: 0,
287 token_start_column: 0,
288 current_pos: Rc::new(LexerPosition {
289 line: Cell::new(1),
290 char_position_in_line: Cell::new(0),
291 }),
292 token_type: super::token::TOKEN_INVALID_TYPE,
293 text: None,
294 token: None,
295 hit_eof: false,
296 channel: super::token::TOKEN_DEFAULT_CHANNEL,
297 mode_stack: Vec::new(),
299 mode: self::LEXER_DEFAULT_MODE,
300 };
301 let pos = lexer.current_pos.clone();
302 lexer.interpreter.as_mut().unwrap().current_pos = pos;
303 lexer
304 }
305}
306
307impl<'input, T, Input, TF> TokenAware<'input> for BaseLexer<'input, T, Input, TF>
308where
309 T: LexerRecog<'input, Self> + 'static,
310 Input: CharStream<TF::From>,
311 TF: TokenFactory<'input>,
312{
313 type TF = TF;
314}
315
316impl<'input, T, Input, TF> TokenSource<'input> for BaseLexer<'input, T, Input, TF>
317where
318 T: LexerRecog<'input, Self> + 'static,
319 Input: CharStream<TF::From>,
320 TF: TokenFactory<'input>,
321{
322 type TF = TF;
323 #[inline]
324 #[allow(unused_labels)]
325 fn next_token(&mut self) -> <Self::TF as TokenFactory<'input>>::Tok {
326 assert!(self.input.is_some());
327
328 let _marker = self.input().mark();
329 'outer: loop {
330 if self.hit_eof {
331 self.emit_eof();
332 break;
333 }
334 self.token = None;
335 self.channel = LEXER_DEFAULT_TOKEN_CHANNEL;
336 self.token_start_column = self
337 .interpreter
338 .as_ref()
339 .unwrap()
340 .get_char_position_in_line();
341 self.token_start_line = self.interpreter.as_ref().unwrap().get_line();
342 self.text = None;
343 let index = self.input().index();
344 self.token_start_char_index = index;
345
346 'inner: loop {
347 self.token_type = TOKEN_INVALID_TYPE;
348 let mut interpreter = self.interpreter.take().unwrap();
350 let result = interpreter.match_token(self.mode, self);
352 self.interpreter = Some(interpreter);
353
354 let ttype = result.unwrap_or_else(|err| {
355 notify_listeners(&mut self.error_listeners.borrow_mut(), &err, self);
357 self.interpreter
358 .as_mut()
359 .unwrap()
360 .recover(err, self.input.as_mut().unwrap());
361 LEXER_SKIP
362 });
363 if self.input().la(1) == super::int_stream::EOF {
366 self.hit_eof = true;
367 }
368
369 if self.token_type == TOKEN_INVALID_TYPE {
370 self.token_type = ttype;
371 }
372
373 if self.token_type == LEXER_SKIP {
374 continue 'outer;
375 }
376
377 if self.token_type != LEXER_MORE {
378 break;
379 }
380 }
381
382 if self.token.is_none() {
383 self.emit();
384 break;
385 }
386 }
387 self.input().release(_marker);
388 self.token.take().unwrap()
389 }
390
391 fn get_line(&self) -> isize {
392 self.current_pos.line.get()
393 }
394
395 fn get_char_position_in_line(&self) -> isize {
396 self.current_pos.char_position_in_line.get()
397 }
398
399 fn get_input_stream(&mut self) -> Option<&mut dyn IntStream> {
400 match &mut self.input {
401 None => None,
402 Some(x) => Some(x as _),
403 }
404 }
405
406 fn get_source_name(&self) -> String {
407 self.input
408 .as_ref()
409 .map(|it| it.get_source_name())
410 .unwrap_or("<none>".to_string())
411 }
412
413 fn get_token_factory(&self) -> &'input TF {
418 self.factory
419 }
420}
421
422#[cold]
423#[inline(never)]
424fn notify_listeners<'input, T, Input, TF>(
425 liseners: &mut Vec<Box<dyn ErrorListener<'input, BaseLexer<'input, T, Input, TF>>>>,
426 e: &ANTLRError,
427 lexer: &BaseLexer<'input, T, Input, TF>,
428) where
429 T: LexerRecog<'input, BaseLexer<'input, T, Input, TF>> + 'static,
430 Input: CharStream<TF::From>,
431 TF: TokenFactory<'input>,
432{
433 let inner = lexer
434 .input
435 .as_ref()
436 .unwrap()
437 .get_text(lexer.token_start_char_index, lexer.get_char_index());
438 let text = format!(
439 "token recognition error at: '{}'",
440 TF::get_data(inner).to_display()
441 );
442 for listener in liseners.iter_mut() {
443 listener.syntax_error(
444 lexer,
445 None,
446 lexer.token_start_line,
447 lexer.token_start_column,
448 &text,
449 Some(e),
450 )
451 }
452}
453
454impl<'input, T, Input, TF> Lexer<'input> for BaseLexer<'input, T, Input, TF>
455where
456 T: LexerRecog<'input, Self> + 'static,
457 Input: CharStream<TF::From>,
458 TF: TokenFactory<'input>,
459{
460 type Input = Input;
461
462 fn input(&mut self) -> &mut Self::Input {
463 self.input.as_mut().unwrap()
464 }
465
466 fn set_channel(&mut self, v: isize) {
467 self.channel = v;
468 }
469
470 fn push_mode(&mut self, m: usize) {
471 self.mode_stack.push(self.mode);
472 self.mode = m;
473 }
474
475 fn pop_mode(&mut self) -> Option<usize> {
476 self.mode_stack.pop().map(|mode| {
477 self.mode = mode;
478 mode
479 })
480 }
481
482 fn set_type(&mut self, t: isize) {
483 self.token_type = t;
484 }
485
486 fn set_mode(&mut self, m: usize) {
487 self.mode = m;
488 }
489
490 fn more(&mut self) {
491 self.set_type(LEXER_MORE)
492 }
493
494 fn skip(&mut self) {
495 self.set_type(LEXER_SKIP)
496 }
497
498 fn reset(&mut self) {
499 unimplemented!()
500 }
501
502 fn get_interpreter(&self) -> Option<&LexerATNSimulator> {
503 self.interpreter.as_deref()
504 }
505}