1use std::borrow::Cow::Borrowed;
3use std::borrow::{Borrow, Cow};
4use std::cell::{Cell, RefCell};
5
6use std::rc::Rc;
7
8use crate::char_stream::{CharStream, InputData};
9use crate::error_listener::{ConsoleErrorListener, ErrorListener};
10use crate::errors::ANTLRError;
11use crate::int_stream::IntStream;
12use crate::lexer_atn_simulator::{ILexerATNSimulator, LexerATNSimulator};
13use crate::parser::ParserNodeType;
14
15use crate::recognizer::{Actions, Recognizer};
16use crate::rule_context::EmptyContextType;
17use crate::token::TOKEN_INVALID_TYPE;
18use crate::token_factory::{CommonTokenFactory, TokenAware, TokenFactory};
19use crate::token_source::TokenSource;
20use std::ops::{Deref, DerefMut};
21
22pub trait Lexer<'input>:
24 TokenSource<'input>
25 + Recognizer<'input, Node = EmptyContextType<'input, <Self as TokenAware<'input>>::TF>>
26{
27 type Input: IntStream;
29 fn input(&mut self) -> &mut Self::Input;
32 fn set_channel(&mut self, v: isize);
38
39 fn push_mode(&mut self, m: usize);
42
43 fn pop_mode(&mut self) -> Option<usize>;
45
46 fn set_type(&mut self, t: isize);
49
50 fn set_mode(&mut self, m: usize);
52
53 fn more(&mut self);
55
56 fn skip(&mut self);
58
59 #[doc(hidden)]
60 fn reset(&mut self);
61
62 #[doc(hidden)]
63 fn get_interpreter(&self) -> Option<&LexerATNSimulator>;
64}
65
66pub trait LexerRecog<'a, T: Recognizer<'a>>: Actions<'a, T> + Sized + 'static {
70 fn before_emit(_lexer: &mut T) {}
72}
73
74#[allow(missing_docs)]
78pub struct BaseLexer<
79 'input,
80 T: LexerRecog<'input, Self> + 'static,
81 Input: CharStream<TF::From>,
82 TF: TokenFactory<'input> = CommonTokenFactory,
83> {
84 pub interpreter: Option<Box<LexerATNSimulator>>,
86 pub input: Option<Input>,
88 recog: T,
89
90 factory: &'input TF,
91
92 error_listeners: RefCell<Vec<Box<dyn ErrorListener<'input, Self>>>>,
93
94 pub token_start_char_index: isize,
95 pub token_start_line: isize,
96 pub token_start_column: isize,
97 current_pos: Rc<LexerPosition>,
98 pub token_type: isize,
100 pub token: Option<TF::Tok>,
102 hit_eof: bool,
103 pub channel: isize,
105 mode_stack: Vec<usize>,
106 pub mode: usize,
108 pub text: Option<<TF::Data as ToOwned>::Owned>,
110}
111
112#[derive(Debug)]
113pub(crate) struct LexerPosition {
114 pub(crate) line: Cell<isize>,
115 pub(crate) char_position_in_line: Cell<isize>,
116}
117
118impl<'input, T, Input, TF> Deref for BaseLexer<'input, T, Input, TF>
119where
120 T: LexerRecog<'input, Self> + 'static,
121 Input: CharStream<TF::From>,
122 TF: TokenFactory<'input>,
123{
124 type Target = T;
125
126 fn deref(&self) -> &Self::Target { &self.recog }
127}
128
129impl<'input, T, Input, TF> DerefMut for BaseLexer<'input, T, Input, TF>
130where
131 T: LexerRecog<'input, Self> + 'static,
132 Input: CharStream<TF::From>,
133 TF: TokenFactory<'input>,
134{
135 fn deref_mut(&mut self) -> &mut Self::Target { &mut self.recog }
136}
137
138impl<'input, T, Input, TF> Recognizer<'input> for BaseLexer<'input, T, Input, TF>
139where
140 T: LexerRecog<'input, Self> + 'static,
141 Input: CharStream<TF::From>,
142 TF: TokenFactory<'input>,
143{
144 type Node = EmptyContextType<'input, TF>;
145
146 fn sempred(
147 &mut self,
148 _localctx: Option<&<Self::Node as ParserNodeType<'input>>::Type>,
149 rule_index: isize,
150 action_index: isize,
151 ) -> bool {
152 <T as Actions<'input, Self>>::sempred(_localctx, rule_index, action_index, self)
153 }
154
155 fn action(
156 &mut self,
157 _localctx: Option<&<Self::Node as ParserNodeType<'input>>::Type>,
158 rule_index: isize,
159 action_index: isize,
160 ) {
161 <T as Actions<'input, Self>>::action(_localctx, rule_index, action_index, self)
162 }
163}
164
165pub const LEXER_DEFAULT_MODE: usize = 0;
167pub const LEXER_MORE: isize = -2;
170pub const LEXER_SKIP: isize = -3;
174
175#[doc(inline)]
176pub use super::token::TOKEN_DEFAULT_CHANNEL as LEXER_DEFAULT_TOKEN_CHANNEL;
177
178#[doc(inline)]
179pub use super::token::TOKEN_HIDDEN_CHANNEL as LEXER_HIDDEN;
180
181pub(crate) const LEXER_MIN_CHAR_VALUE: isize = 0x0000;
182pub(crate) const LEXER_MAX_CHAR_VALUE: isize = 0x10FFFF;
183
184impl<'input, T, Input, TF> BaseLexer<'input, T, Input, TF>
185where
186 T: LexerRecog<'input, Self> + 'static,
187 Input: CharStream<TF::From>,
188 TF: TokenFactory<'input>,
189{
190 fn emit_token(&mut self, token: TF::Tok) { self.token = Some(token); }
191
192 fn emit(&mut self) {
193 <T as LexerRecog<Self>>::before_emit(self);
194 let stop = self.get_char_index() - 1;
195 let token = self.factory.create(
196 Some(self.input.as_mut().unwrap()),
197 self.token_type,
198 self.text.take(),
199 self.channel,
200 self.token_start_char_index,
201 stop,
202 self.token_start_line,
203 self.token_start_column,
204 );
205 self.emit_token(token);
206 }
207
208 fn emit_eof(&mut self) {
209 let token = self.factory.create(
210 None::<&mut Input>,
211 super::int_stream::EOF,
212 None,
213 LEXER_DEFAULT_TOKEN_CHANNEL,
214 self.get_char_index(),
215 self.get_char_index() - 1,
216 self.get_line(),
217 self.get_char_position_in_line(),
218 );
219 self.emit_token(token)
220 }
221
222 pub fn get_char_index(&self) -> isize { self.input.as_ref().unwrap().index() }
224
225 pub fn get_text<'a>(&'a self) -> Cow<'a, TF::Data>
227 where
228 'input: 'a,
229 {
230 self.text
231 .as_ref()
232 .map(|it| Borrowed(it.borrow()))
233 .unwrap_or_else(|| {
235 let text = self
236 .input
237 .as_ref()
238 .unwrap()
239 .get_text(self.token_start_char_index, self.get_char_index() - 1);
240 TF::get_data(text)
241 })
242 }
243
244 pub fn set_text(&mut self, _text: <TF::Data as ToOwned>::Owned) { self.text = Some(_text); }
246
247 pub fn add_error_listener(&mut self, listener: Box<dyn ErrorListener<'input, Self>>) {
253 self.error_listeners.borrow_mut().push(listener);
254 }
255
256 pub fn remove_error_listeners(&mut self) { self.error_listeners.borrow_mut().clear(); }
258
259 pub fn new_base_lexer(
261 input: Input,
262 interpreter: LexerATNSimulator,
263 recog: T,
264 factory: &'input TF,
265 ) -> Self {
266 let mut lexer = Self {
267 interpreter: Some(Box::new(interpreter)),
268 input: Some(input),
269 recog,
270 factory,
271 error_listeners: RefCell::new(vec![Box::new(ConsoleErrorListener {})]),
272 token_start_char_index: 0,
273 token_start_line: 0,
274 token_start_column: 0,
275 current_pos: Rc::new(LexerPosition {
276 line: Cell::new(1),
277 char_position_in_line: Cell::new(0),
278 }),
279 token_type: super::token::TOKEN_INVALID_TYPE,
280 text: None,
281 token: None,
282 hit_eof: false,
283 channel: super::token::TOKEN_DEFAULT_CHANNEL,
284 mode_stack: Vec::new(),
286 mode: self::LEXER_DEFAULT_MODE,
287 };
288 let pos = lexer.current_pos.clone();
289 lexer.interpreter.as_mut().unwrap().current_pos = pos;
290 lexer
291 }
292}
293
294impl<'input, T, Input, TF> TokenAware<'input> for BaseLexer<'input, T, Input, TF>
295where
296 T: LexerRecog<'input, Self> + 'static,
297 Input: CharStream<TF::From>,
298 TF: TokenFactory<'input>,
299{
300 type TF = TF;
301}
302
303impl<'input, T, Input, TF> TokenSource<'input> for BaseLexer<'input, T, Input, TF>
304where
305 T: LexerRecog<'input, Self> + 'static,
306 Input: CharStream<TF::From>,
307 TF: TokenFactory<'input>,
308{
309 type TF = TF;
310 #[inline]
311 #[allow(unused_labels)]
312 fn next_token(&mut self) -> <Self::TF as TokenFactory<'input>>::Tok {
313 assert!(self.input.is_some());
314
315 let _marker = self.input().mark();
316 'outer: loop {
317 if self.hit_eof {
318 self.emit_eof();
319 break;
320 }
321 self.token = None;
322 self.channel = LEXER_DEFAULT_TOKEN_CHANNEL;
323 self.token_start_column = self
324 .interpreter
325 .as_ref()
326 .unwrap()
327 .get_char_position_in_line();
328 self.token_start_line = self.interpreter.as_ref().unwrap().get_line();
329 self.text = None;
330 let index = self.input().index();
331 self.token_start_char_index = index;
332
333 'inner: loop {
334 self.token_type = TOKEN_INVALID_TYPE;
335 let mut interpreter = self.interpreter.take().unwrap();
337 let result = interpreter.match_token(self.mode, self);
339 self.interpreter = Some(interpreter);
340
341 let ttype = result.unwrap_or_else(|err| {
342 notify_listeners(&mut self.error_listeners.borrow_mut(), &err, self);
344 self.interpreter
345 .as_mut()
346 .unwrap()
347 .recover(err, self.input.as_mut().unwrap());
348 LEXER_SKIP
349 });
350 if self.input().la(1) == super::int_stream::EOF {
353 self.hit_eof = true;
354 }
355
356 if self.token_type == TOKEN_INVALID_TYPE {
357 self.token_type = ttype;
358 }
359
360 if self.token_type == LEXER_SKIP {
361 continue 'outer;
362 }
363
364 if self.token_type != LEXER_MORE {
365 break;
366 }
367 }
368
369 if self.token.is_none() {
370 self.emit();
371 break;
372 }
373 }
374 self.input().release(_marker);
375 self.token.take().unwrap()
376 }
377
378 fn get_line(&self) -> isize { self.current_pos.line.get() }
379
380 fn get_char_position_in_line(&self) -> isize { self.current_pos.char_position_in_line.get() }
381
382 fn get_input_stream(&mut self) -> Option<&mut dyn IntStream> {
383 match &mut self.input {
384 None => None,
385 Some(x) => Some(x as _),
386 }
387 }
388
389 fn get_source_name(&self) -> String {
390 self.input
391 .as_ref()
392 .map(|it| it.get_source_name())
393 .unwrap_or("<none>".to_string())
394 }
395
396 fn get_token_factory(&self) -> &'input TF { self.factory }
401}
402
403#[cold]
404#[inline(never)]
405fn notify_listeners<'input, T, Input, TF>(
406 liseners: &mut Vec<Box<dyn ErrorListener<'input, BaseLexer<'input, T, Input, TF>>>>,
407 e: &ANTLRError,
408 lexer: &BaseLexer<'input, T, Input, TF>,
409) where
410 T: LexerRecog<'input, BaseLexer<'input, T, Input, TF>> + 'static,
411 Input: CharStream<TF::From>,
412 TF: TokenFactory<'input>,
413{
414 let inner = lexer
415 .input
416 .as_ref()
417 .unwrap()
418 .get_text(lexer.token_start_char_index, lexer.get_char_index());
419 let text = format!(
420 "token recognition error at: '{}'",
421 TF::get_data(inner).to_display()
422 );
423 for listener in liseners.iter_mut() {
424 listener.syntax_error(
425 lexer,
426 None,
427 lexer.token_start_line,
428 lexer.token_start_column,
429 &text,
430 Some(e),
431 )
432 }
433}
434
435impl<'input, T, Input, TF> Lexer<'input> for BaseLexer<'input, T, Input, TF>
436where
437 T: LexerRecog<'input, Self> + 'static,
438 Input: CharStream<TF::From>,
439 TF: TokenFactory<'input>,
440{
441 type Input = Input;
442
443 fn input(&mut self) -> &mut Self::Input { self.input.as_mut().unwrap() }
444
445 fn set_channel(&mut self, v: isize) { self.channel = v; }
446
447 fn push_mode(&mut self, m: usize) {
448 self.mode_stack.push(self.mode);
449 self.mode = m;
450 }
451
452 fn pop_mode(&mut self) -> Option<usize> {
453 self.mode_stack.pop().map(|mode| {
454 self.mode = mode;
455 mode
456 })
457 }
458
459 fn set_type(&mut self, t: isize) { self.token_type = t; }
460
461 fn set_mode(&mut self, m: usize) { self.mode = m; }
462
463 fn more(&mut self) { self.set_type(LEXER_MORE) }
464
465 fn skip(&mut self) { self.set_type(LEXER_SKIP) }
466
467 fn reset(&mut self) { unimplemented!() }
468
469 fn get_interpreter(&self) -> Option<&LexerATNSimulator> { self.interpreter.as_deref() }
470}