1use std::fmt::{Display, Formatter};
4use crate::fixed_sym_table::{FixedSymTable, SymInfoTable};
5use crate::{AltId, TokenId, VarId};
6use crate::lexer::{Pos, PosSpan};
7use crate::log::Logger;
8use crate::alt::Alternative;
9
10pub(crate) mod tests;
11
12#[derive(Clone, Copy, Default, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
15pub enum Symbol {
16 T(TokenId), NT(VarId), #[default] Empty, End }
21
22impl Symbol {
23 pub fn is_end(&self) -> bool {
24 matches!(self, Symbol::End)
25 }
26
27 pub fn is_empty(&self) -> bool {
28 matches!(self, Symbol::Empty)
29 }
30
31 pub fn is_t(&self) -> bool {
32 matches!(self, Symbol::T(_))
33 }
34
35 pub fn is_nt(&self) -> bool {
36 matches!(self, Symbol::NT(_))
37 }
38
39 pub fn to_str<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
40 symbol_table.map(|t| t.get_str(self)).unwrap_or(self.to_string())
41 }
42
43 pub fn to_str_quote<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
46 symbol_table.map(|t| t.get_name_quote(self)).unwrap_or(format!("{}", self.to_string()))
47 }
48
49 pub fn to_str_name<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
50 symbol_table.map(|t| t.get_name(self)).unwrap_or(format!("{}", self.to_string()))
51 }
52
53 pub fn to_str_ext<T: SymInfoTable>(&self, symbol_table: Option<&T>, ext: &String) -> String {
55 let mut result = self.to_str(symbol_table);
56 if let Some(t) = symbol_table {
57 if t.is_symbol_t_data(self) {
58 result.push_str(&format!("({ext})"));
59 }
60 }
61 result
62 }
63
64 pub fn to_macro_item(&self) -> String {
66 match self {
67 Symbol::Empty => "e".to_string(),
68 Symbol::T(x) => format!("t {x}"),
69 Symbol::NT(x) => format!("nt {x}"),
70 Symbol::End => "end".to_string(),
71 }
72 }
73}
74
75impl Display for Symbol {
76 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
77 match self {
78 Symbol::Empty => write!(f, "ε"),
79 Symbol::T(id) => write!(f, ":{id}"),
80 Symbol::NT(id) => write!(f, "{id}"),
81 Symbol::End => write!(f, "$"),
82 }
83 }
84}
85
86#[derive(Clone, Copy, PartialEq, Debug)]
87pub enum OpCode {
88 Empty, T(TokenId), NT(VarId), Loop(VarId), Exit(VarId), End }
95
96
97impl Display for OpCode {
98 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
99 match self {
100 OpCode::Empty => write!(f, "ε"),
101 OpCode::T(t) => write!(f, ":{t}"),
102 OpCode::NT(v) => write!(f, "►{v}"),
103 OpCode::Loop(v) => write!(f, "●{v}"),
104 OpCode::Exit(v) => write!(f, "◄{v}"),
105 OpCode::End => write!(f, "$"),
106 }
107 }
108}
109
110impl OpCode {
111 pub fn is_loop(&self) -> bool {
112 matches!(self, OpCode::Loop(_))
113 }
114
115 pub fn is_empty(&self) -> bool {
116 matches!(self, OpCode::Empty)
117 }
118
119 pub fn has_span(&self) -> bool {
120 matches!(self, OpCode::T(_) | OpCode::NT(_))
121 }
122
123 pub fn matches(&self, s: Symbol) -> bool {
124 match self {
125 OpCode::Empty => s == Symbol::Empty,
126 OpCode::T(t) => s == Symbol::T(*t),
127 OpCode::NT(v) => s == Symbol::NT(*v),
128 OpCode::End => s == Symbol::End,
129 OpCode::Loop(_) => false,
130 OpCode::Exit(_) => false,
131 }
132 }
133
134 pub fn to_str<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
135 if let Some(t) = symbol_table {
136 match self {
137 OpCode::Empty => "ε".to_string(),
138 OpCode::T(v) => format!("{}{}", t.get_t_str(*v), if t.is_token_data(*v) { "!" } else { "" }),
139 OpCode::NT(v) => format!("►{}", t.get_nt_name(*v)),
140 OpCode::Loop(v) => format!("●{}", t.get_nt_name(*v)),
141 OpCode::Exit(f) => format!("◄{f}"),
142 OpCode::End => "$".to_string(),
143 }
144 } else {
145 self.to_string()
146 }
147 }
148
149 pub fn to_str_quote<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
150 if let Some(t) = symbol_table {
151 match self {
152 OpCode::T(v) => format!("{}{}", Symbol::T(*v).to_str_quote(symbol_table), if t.is_token_data(*v) { "!" } else { "" }),
153 _ => self.to_str(symbol_table)
154 }
155 } else {
156 self.to_string()
157 }
158 }
159
160 pub fn to_str_ext<T: SymInfoTable>(&self, symbol_table: Option<&T>, ext: &String) -> String {
161 let mut result = self.to_str(symbol_table);
162 if let Some(t) = symbol_table {
163 if let OpCode::T(tok) = self {
164 if t.is_symbol_t_data(&Symbol::T(*tok)) {
165 result.push_str(&format!("({ext})"));
166 }
167 }
168 }
169 result
170 }
171}
172
173impl From<Symbol> for OpCode {
174 fn from(value: Symbol) -> Self {
175 match value {
176 Symbol::Empty => OpCode::Empty,
177 Symbol::T(t) => OpCode::T(t),
178 Symbol::NT(v) => OpCode::NT(v),
179 Symbol::End => OpCode::End,
180 }
181 }
182}
183
184#[cfg(feature = "test_utils")]
185impl OpCode {
186 pub fn to_macro_item(&self) -> String {
187 match self {
188 OpCode::Empty => "e".to_string(),
189 OpCode::T(t) => format!("t {t}"),
190 OpCode::NT(v) => format!("nt {v}"),
191 OpCode::Loop(v) => format!("loop {v}"),
192 OpCode::Exit(v) => format!("exit {v}"),
193 OpCode::End => "end".to_string(),
194 }
195 }
196}
197
198#[derive(PartialEq, Debug)]
201pub enum Call { Enter, Loop, Exit, End }
202
203pub trait ListenerWrapper {
204 fn switch(&mut self, _call: Call, _nt: VarId, _alt_id: AltId, _t_data: Option<Vec<String>>) {}
206 fn check_abort_request(&self) -> bool { false }
209 fn abort(&mut self) {}
211 fn get_mut_log(&mut self) -> &mut impl Logger;
213 fn push_span(&mut self, _span: PosSpan) {}
215 fn is_stack_empty(&self) -> bool { true }
217 fn is_stack_t_empty(&self) -> bool { true }
219 fn is_stack_span_empty(&self) -> bool { true }
221}
222
223pub type ParserToken = (TokenId, String, PosSpan);
226
227#[derive(PartialEq, Debug)]
228pub enum ParserError {
229 SyntaxError,
230 TooManyErrors,
231 Irrecoverable,
232 ExtraSymbol,
233 UnexpectedEOS,
234 UnexpectedError,
235 EncounteredErrors,
236 AbortRequest,
237}
238
239impl Display for ParserError {
240 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
241 write!(f, "{}", match self {
242 ParserError::SyntaxError => "syntax error",
243 ParserError::TooManyErrors => "too many errors while trying to recover",
244 ParserError::Irrecoverable => "irrecoverable syntax error",
245 ParserError::ExtraSymbol => "extra symbol after end of parsing",
246 ParserError::UnexpectedEOS => "unexpected end of stream",
247 ParserError::UnexpectedError => "unexpected error",
248 ParserError::EncounteredErrors => "encountered errors",
249 ParserError::AbortRequest => "abort request",
250 })
251 }
252}
253
254pub struct Parser<'a> {
255 num_nt: usize,
256 num_t: usize,
257 alt_var: &'a [VarId],
258 alts: Vec<Alternative>,
259 opcodes: Vec<Vec<OpCode>>,
260 table: &'a [AltId],
261 symbol_table: FixedSymTable,
262 start: VarId,
263 try_recover: bool, }
265
266impl<'a> Parser<'a> {
267 pub const MAX_NBR_RECOVERS: u32 = 5;
269 pub const MAX_NBR_LEXER_ERRORS: u32 = 3;
270
271 pub fn new(
272 num_nt: usize,
273 num_t: usize,
274 alt_var: &'a [VarId],
275 alts: Vec<Alternative>,
276 opcodes: Vec<Vec<OpCode>>,
277 table: &'a [AltId],
278 symbol_table: FixedSymTable,
279 start: VarId,
280 ) -> Self {
281 Parser { num_nt, num_t, alt_var, alts, opcodes, table, symbol_table, start, try_recover: true }
282 }
283
284 pub fn get_symbol_table(&self) -> Option<&FixedSymTable> {
285 Some(&self.symbol_table)
286 }
287
288 pub fn set_start(&mut self, start: VarId) {
289 assert!(self.num_nt > start as usize);
290 self.start = start;
291 }
292
293 pub fn set_try_recover(&mut self, try_recover: bool) {
294 self.try_recover = try_recover;
295 }
296
297 fn simulate(&self, stream_sym: Symbol, mut stack: Vec<OpCode>, mut stack_sym: OpCode) -> bool {
300 const VERBOSE: bool = false;
301 let error_skip_alt_id = self.alt_var.len() as AltId;
302 let end_var_id = (self.num_t - 1) as VarId;
303 if VERBOSE { print!(" next symbol could be: {}?", stream_sym.to_str(self.get_symbol_table())); }
304
305 let ok = loop {
306 match (stack_sym, stream_sym) {
307 (OpCode::NT(var), _) | (OpCode::Loop(var), _) => {
308 let sr = if let Symbol::T(sr) = stream_sym { sr } else { end_var_id };
309 let alt_id = self.table[var as usize * self.num_t + sr as usize];
310 if alt_id >= error_skip_alt_id {
311 break false;
312 }
313 stack.extend(self.opcodes[alt_id as usize].clone());
314 stack_sym = stack.pop().unwrap();
315 }
316 (OpCode::Exit(_), _) => {
317 stack_sym = stack.pop().unwrap();
318 }
319 (OpCode::T(sk), Symbol::T(sr)) => {
320 break sk == sr;
321 }
322 (OpCode::End, Symbol::End) => {
323 break true;
324 }
325 (_, _) => {
326 break false;
327 }
328 }
329 };
330 if VERBOSE { println!(" {}", if ok { "yes" } else { "no" }); }
331 ok
332 }
333
334 pub fn parse_stream<I, L>(&mut self, wrapper: &mut L, mut stream: I) -> Result<(), ParserError>
341 where I: Iterator<Item=ParserToken>,
342 L: ListenerWrapper,
343 {
344 const VERBOSE: bool = false;
345 let sym_table: Option<&FixedSymTable> = Some(&self.symbol_table);
346 let mut stack = Vec::<OpCode>::new();
347 let mut stack_t = Vec::<String>::new();
348 let error_skip_alt_id = self.alt_var.len() as AltId;
349 let error_pop_alt_id = error_skip_alt_id + 1;
350 if VERBOSE { println!("skip = {error_skip_alt_id}, pop = {error_pop_alt_id}"); }
351 let mut recover_mode = false;
352 let mut nbr_recovers = 0;
353 let mut nbr_lexer_errors = 0;
354 let end_var_id = (self.num_t - 1) as VarId;
355 stack.push(OpCode::End);
356 stack.push(OpCode::NT(self.start));
357 let mut stack_sym = stack.pop().unwrap();
358 let mut stream_n = 0;
359 let mut stream_pos = None;
360 let mut stream_span = PosSpan::empty();
361 let mut stream_sym = Symbol::default(); let mut stream_str = String::default(); let mut advance_stream = true;
364 loop {
365 if advance_stream {
366 stream_n += 1;
367 (stream_sym, stream_str) = stream.next().map(|(t, s, span)| {
368 stream_pos = Some(span.first_forced());
369 stream_span = span;
370 (Symbol::T(t), s)
371 }).unwrap_or_else(|| {
372 if let Some((_t, s, _span)) = stream.next() {
374 (Symbol::Empty, s)
375 } else {
376 (Symbol::End, String::new())
377 }
378 });
379 advance_stream = false;
380 }
381 if VERBOSE {
382 println!("{:-<40}", "");
383 println!("input ({stream_n}{}): {} stack_t: [{}] stack: [{}] current: {}",
384 if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() },
385 stream_sym.to_str_ext(sym_table, &stream_str),
386 stack_t.join(", "),
387 stack.iter().map(|s| s.to_str(sym_table)).collect::<Vec<_>>().join(" "),
388 stack_sym.to_str(sym_table));
389 }
390 match (stack_sym, stream_sym) {
391 (_, Symbol::Empty) => {
392 if VERBOSE { println!("lexer error: {stream_str}"); }
394 wrapper.get_mut_log().add_error(format!("lexical error: {stream_str}"));
395 nbr_lexer_errors += 1;
396 if nbr_lexer_errors >= Self::MAX_NBR_LEXER_ERRORS {
397 wrapper.get_mut_log().add_note(format!("too many lexical errors ({nbr_lexer_errors}), giving up"));
398 wrapper.abort();
399 return Err(ParserError::TooManyErrors);
400 }
401 advance_stream = true;
402 }
403 (OpCode::NT(var), _) | (OpCode::Loop(var), _) => {
404 let sr = if let Symbol::T(sr) = stream_sym { sr } else { end_var_id };
405 let alt_id = self.table[var as usize * self.num_t + sr as usize];
406 if VERBOSE {
407 println!("- table[{var}, {sr}] = {alt_id}: {} -> {}",
408 Symbol::NT(var).to_str(self.get_symbol_table()),
409 if alt_id >= error_skip_alt_id {
410 "ERROR".to_string()
411 } else {
412 if let Some(a) = self.alts.get(alt_id as usize) {
413 a.to_str(sym_table)
414 } else {
415 "(alternative)".to_string()
416 }
417 });
418 }
419 if !recover_mode && alt_id >= error_skip_alt_id {
420 let expected = (0..self.num_t as VarId).filter(|t| self.table[var as usize * self.num_t + *t as usize] < error_skip_alt_id)
421 .filter(|t| self.simulate(Symbol::T(*t), stack.clone(), stack_sym))
422 .into_iter().map(|t| format!("'{}'", if t < end_var_id { Symbol::T(t).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() }))
423 .collect::<Vec<_>>().join(", ");
424 let stream_sym_txt = if stream_sym.is_end() { "end of stream".to_string() } else { format!("input '{}'", stream_sym.to_str(sym_table)) };
425 let msg = format!("syntax error: found {stream_sym_txt} instead of {expected} while parsing '{}'{}",
426 stack_sym.to_str(sym_table),
427 if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() });
428 if self.try_recover {
429 wrapper.get_mut_log().add_error(msg);
430 if nbr_recovers >= Self::MAX_NBR_RECOVERS {
431 wrapper.get_mut_log().add_note(format!("too many errors ({nbr_recovers}), giving up"));
432 wrapper.abort();
433 return Err(ParserError::TooManyErrors);
434 }
435 nbr_recovers += 1;
436 recover_mode = true;
437 } else {
438 wrapper.get_mut_log().add_error(msg);
439 wrapper.abort();
440 return Err(ParserError::SyntaxError);
441 }
442 }
443 if recover_mode {
444 if VERBOSE { println!("!NT {} <-> {}, alt_id = {alt_id}", stack_sym.to_str(self.get_symbol_table()), stream_sym.to_str(self.get_symbol_table())); }
445 if alt_id == error_skip_alt_id {
446 if stream_sym == Symbol::End {
447 let msg = "irrecoverable error, reached end of stream".to_string();
448 if VERBOSE { println!("(recovering) {msg}"); }
449 wrapper.get_mut_log().add_note(msg);
450 wrapper.abort();
451 return Err(ParserError::Irrecoverable);
452 }
453 if VERBOSE { println!("(recovering) skipping token {}", stream_sym.to_str(self.get_symbol_table())); }
454 advance_stream = true;
455 } else if alt_id == error_pop_alt_id {
456 if VERBOSE { println!("(recovering) popping {}", stack_sym.to_str(self.get_symbol_table())); }
457 stack_sym = stack.pop().unwrap();
458 } else {
459 if alt_id < error_skip_alt_id {
460 recover_mode = false;
461 let pos_str = if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() };
462 wrapper.get_mut_log().add_note(format!("resynchronized on '{}'{pos_str}",
463 stream_sym.to_str(self.get_symbol_table())));
464 if VERBOSE { println!("(recovering) resynchronized{pos_str}"); }
465 } else {
466 panic!("illegal alt_id {alt_id}")
467 }
468 }
469 }
470 if !recover_mode {
471 let call = if stack_sym.is_loop() { Call::Loop } else { Call::Enter };
472 let t_data = std::mem::take(&mut stack_t);
473 if VERBOSE {
474 let f_str = if let Some(f) = &self.alts.get(alt_id as usize) {
475 f.to_str(sym_table)
476 } else {
477 "(alternative)".to_string()
478 };
479 println!(
480 "- to stack: [{}]",
481 self.opcodes[alt_id as usize].iter().filter(|s| !s.is_empty()).map(|s| s.to_str(sym_table))
482 .collect::<Vec<_>>().join(" "));
483 println!(
484 "- {} {} -> {f_str} ({}): [{}]",
485 if stack_sym.is_loop() { "LOOP" } else { "ENTER" },
486 Symbol::NT(self.alt_var[alt_id as usize]).to_str(sym_table), t_data.len(), t_data.iter()
487 .cloned().collect::<Vec<_>>().join(" "));
488 }
489 if nbr_recovers == 0 {
490 wrapper.switch(call, var, alt_id, Some(t_data));
491 }
492 stack.extend(self.opcodes[alt_id as usize].clone());
493 stack_sym = stack.pop().unwrap();
494 }
495 }
496 (OpCode::Exit(alt_id), _) => {
497 let var = self.alt_var[alt_id as usize];
498 let t_data = std::mem::take(&mut stack_t);
499 if VERBOSE {
500 println!(
501 "- EXIT {} syn ({}): [{}]",
502 Symbol::NT(var).to_str(sym_table), t_data.len(), t_data.iter()
503 .cloned().collect::<Vec<_>>().join(" "));
504 }
505 if nbr_recovers == 0 {
506 wrapper.switch(Call::Exit, var, alt_id, Some(t_data));
507 }
508 stack_sym = stack.pop().unwrap();
509 }
510 (OpCode::T(sk), Symbol::T(sr)) => {
511 if !recover_mode && sk != sr {
512 let msg = format!("syntax error: found input '{}' instead of '{}'{}", stream_sym.to_str(sym_table), stack_sym.to_str(sym_table),
513 if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() });
514 if self.try_recover {
515 wrapper.get_mut_log().add_error(msg);
516 if nbr_recovers >= Self::MAX_NBR_RECOVERS {
517 wrapper.get_mut_log().add_note(format!("too many errors ({nbr_recovers}), giving up"));
518 wrapper.abort();
519 return Err(ParserError::TooManyErrors);
520 }
521 nbr_recovers += 1;
522 recover_mode = true;
523 } else {
524 wrapper.get_mut_log().add_error(msg);
525 wrapper.abort();
526 return Err(ParserError::SyntaxError);
527 }
528 }
529 if recover_mode {
530 if VERBOSE { println!("!T {} <-> {}", stack_sym.to_str(self.get_symbol_table()), stream_sym.to_str(self.get_symbol_table())); }
531 if sk == sr {
532 recover_mode = false;
533 let pos_str = if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() };
534 wrapper.get_mut_log().add_note(format!("resynchronized on '{}'{pos_str}",
535 stream_sym.to_str(self.get_symbol_table())));
536 if VERBOSE { println!("(recovering) resynchronized{pos_str}"); }
537 } else {
538 if VERBOSE { println!("(recovering) popping {}", stack_sym.to_str(self.get_symbol_table())); }
539 stack_sym = stack.pop().unwrap();
540 }
541 }
542 if !recover_mode {
543 if VERBOSE { println!("- MATCH {}", stream_sym.to_str(sym_table)); }
544 if self.symbol_table.is_token_data(sk) {
545 stack_t.push(std::mem::take(&mut stream_str)); }
547 stack_sym = stack.pop().unwrap();
548 wrapper.push_span(stream_span.take());
549 advance_stream = true;
550 }
551 }
552 (OpCode::End, Symbol::End) => {
553 if nbr_recovers == 0 {
554 wrapper.switch(Call::End, 0, 0, None);
555 }
556 break;
557 }
558 (OpCode::End, _) => {
559 wrapper.get_mut_log().add_error(format!("syntax error: found extra symbol '{}' after end of parsing", stream_sym.to_str(sym_table)));
560 wrapper.abort();
561 return Err(ParserError::ExtraSymbol);
562 }
563 (_, Symbol::End) => {
564 wrapper.get_mut_log().add_error(format!("syntax error: found end of stream instead of '{}'", stack_sym.to_str(sym_table)));
565 wrapper.abort();
566 return Err(ParserError::UnexpectedEOS);
567 }
568 (_, _) => {
569 wrapper.get_mut_log().add_error(format!("unexpected syntax error: input '{}' while expecting '{}'{}",
570 stream_sym.to_str(sym_table), stack_sym.to_str(sym_table),
571 if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() }));
572 wrapper.abort();
573 return Err(ParserError::UnexpectedError);
574 }
575 }
576 if wrapper.check_abort_request() {
577 wrapper.abort();
578 return Err(ParserError::AbortRequest);
579 }
580 }
581 assert!(stack_t.is_empty(), "stack_t: {}", stack_t.join(", "));
582 assert!(stack.is_empty(), "stack: {}", stack.iter().map(|s| s.to_str(sym_table)).collect::<Vec<_>>().join(", "));
583 if nbr_recovers == 0 {
584 assert!(wrapper.is_stack_empty(), "symbol stack isn't empty");
585 assert!(wrapper.is_stack_t_empty(), "text stack isn't empty");
586 assert!(wrapper.is_stack_span_empty(), "span stack isn't empty");
587 Ok(())
588 } else {
589 wrapper.abort();
591 Err(ParserError::EncounteredErrors)
592 }
593 }
594}
595
596#[cfg(feature = "test_utils")]
597impl<'a> Parser<'a> {
598 pub fn get_alt_var(&self) -> &[VarId] {
599 self.alt_var
600 }
601
602 pub fn get_alts(&self) -> &Vec<Alternative> {
603 &self.alts
604 }
605
606 pub fn get_opcodes(&self) -> &Vec<Vec<OpCode>> {
607 &self.opcodes
608 }
609}