1use litrs::StringLit;
2use proc_macro2::{Delimiter, TokenTree};
3use std::collections::{HashMap, HashSet};
4use std::mem;
5use syn::parse::{Parse, ParseStream};
6use syn::Error;
7
8pub mod lexer;
9pub mod parser;
10
11pub fn parse_rules(tokens: &Vec<TokenTree>) -> Result<(HashMap<String, BNFRule>, bool), Error> {
12 let mut non_terminal_symbol_name = String::new();
13 let mut buffered_tokens = Vec::<TokenTree>::new();
14 let mut rule_map = HashMap::<String, BNFRule>::new();
15
16 let mut i = 0;
17 let mut token = &tokens[0];
18
19 let mut non_duplicate_number = NonDuplicateNumber::new();
20 let mut unnamed_pattern_map = HashMap::new();
21
22 let mut generate_code = true;
23 if let TokenTree::Punct(punctuation) = tokens.first().unwrap() {
24 if punctuation.as_char() != '#' {
25 return Err(Error::new(token.span(), "Invalid syntax."));
26 }
27
28 if let TokenTree::Group(group) = next(tokens, &mut i)? {
29 if group.delimiter() != Delimiter::Bracket {
30 return Err(Error::new(token.span(), "Invalid syntax."));
31 }
32
33 let mut i = 0;
34 let tokens = group.stream().into_iter().collect::<Vec<_>>();
35
36 check_current_identifier(&tokens, &mut i, "generate_code")?;
37 check_next_punct(&tokens, &mut i, '=')?;
38
39 if let TokenTree::Ident(identifier) = next(&tokens, &mut i)? {
40 generate_code = match identifier.to_string().as_str() {
41 "true" => true,
42 "false" => false,
43 _ => return Err(Error::new(token.span(), "Invalid syntax.")),
44 }
45 }
46 }
47
48 i += 1;
49 token = &tokens[i];
50 }
51
52 let mut is_first_rule_token = true;
53
54 loop {
55 buffered_tokens.push(token.clone());
56
57 if is_first_rule_token {
58 buffered_tokens.pop();
59
60 non_terminal_symbol_name = match token {
61 TokenTree::Ident(ident) => ident.to_string(),
62 _ => {
63 return Err(Error::new(
64 token.span(),
65 "Invalid non terminal symbol name.",
66 ))
67 }
68 };
69
70 check_next_punct(&tokens, &mut i, ':')?;
71 check_next_punct(&tokens, &mut i, ':')?;
72 check_next_punct(&tokens, &mut i, '=')?;
73
74 token = next(&tokens, &mut i)?;
75 } else {
76 match check_next_punct(&tokens, &mut i, ':') {
77 Ok(_) => {
78 buffered_tokens.pop();
79
80 parse_rule(
81 &mut rule_map,
82 &mut non_terminal_symbol_name,
83 &buffered_tokens,
84 &mut non_duplicate_number,
85 &mut unnamed_pattern_map,
86 )?;
87
88 buffered_tokens.clear();
89
90 non_terminal_symbol_name = match token {
91 TokenTree::Ident(ident) => ident.to_string(),
92 _ => {
93 return Err(Error::new(
94 token.span(),
95 "Invalid non terminal symbol name.",
96 ))
97 }
98 };
99
100 check_next_punct(&tokens, &mut i, ':')?;
101 check_next_punct(&tokens, &mut i, '=')?;
102
103 token = next(&tokens, &mut i)?;
104 }
105 Err(_) => {
106 token = &tokens[i];
107 }
108 }
109 }
110
111 is_first_rule_token = false;
112
113 if i + 1 == tokens.len() {
114 buffered_tokens.push(token.clone());
115 parse_rule(
116 &mut rule_map,
117 &mut non_terminal_symbol_name,
118 &buffered_tokens,
119 &mut non_duplicate_number,
120 &mut unnamed_pattern_map,
121 )?;
122 break;
123 }
124 }
125
126 return Ok((rule_map, generate_code));
127}
128
129fn check_next_punct(tokens: &Vec<TokenTree>, i: &mut usize, char: char) -> Result<(), Error> {
130 let token = next(tokens, i)?;
131
132 return match token {
133 TokenTree::Punct(punct) => {
134 if punct.as_char() == char {
135 Ok(())
136 } else {
137 Err(Error::new(punct.span(), "Invalid syntax."))
138 }
139 }
140 _ => Err(Error::new(token.span(), "Invalid syntax.")),
141 };
142}
143
144fn check_current_identifier(
145 tokens: &Vec<TokenTree>,
146 i: &mut usize,
147 identifier_str: &str,
148) -> Result<(), Error> {
149 let token = current(tokens, i)?;
150
151 return match token {
152 TokenTree::Ident(identifier) => {
153 if identifier.to_string().as_str() == identifier_str {
154 Ok(())
155 } else {
156 Err(Error::new(
157 identifier.span(),
158 format!("Invalid syntax.{}", identifier.to_string()),
159 ))
160 }
161 }
162 _ => Err(Error::new(token.span(), "Invalid syntax.1")),
163 };
164}
165
166fn next<'a>(tokens: &'a Vec<TokenTree>, i: &mut usize) -> Result<&'a TokenTree, Error> {
167 *i += 1;
168 if *i == tokens.len() {
169 return Err(Error::new(
170 tokens[tokens.len() - 1].span(),
171 "Unexpected EOF.",
172 ));
173 }
174 return Ok(&tokens[*i]);
175}
176
177fn current<'a>(tokens: &'a Vec<TokenTree>, i: &usize) -> Result<&'a TokenTree, Error> {
178 if *i == tokens.len() {
179 return Err(Error::new(
180 tokens[tokens.len() - 1].span(),
181 "Unexpected EOF.",
182 ));
183 }
184 return Ok(&tokens[*i]);
185}
186
187fn parse_rule(
188 rule_map: &mut HashMap<String, BNFRule>,
189 non_terminal_symbol_name: &mut String,
190 tokens: &Vec<TokenTree>,
191 non_duplicate_number: &mut NonDuplicateNumber,
192 unnamed_pattern_map: &mut HashMap<Vec<Vec<BNFSymbol>>, String>,
193) -> Result<(), Error> {
194 let mut rule = BNFRule::new(non_terminal_symbol_name.clone());
195
196 let mut pattern = Vec::<BNFSymbol>::new();
197 let or_patterns = &mut rule.or_patterns;
198
199 let mut index = 0;
200 loop {
201 if index >= tokens.len() {
202 break;
203 }
204 let token = &tokens[index];
205
206 match token {
207 TokenTree::Punct(punct) => {
208 if punct.as_char() != '|' {
209 return Err(Error::new(punct.span(), "Invalid punctuation."));
210 }
211 if pattern.is_empty() {
212 pattern.push(BNFSymbol::Null);
213 }
214
215 let mut pattern_temp = Vec::new();
216 mem::swap(&mut pattern_temp, &mut pattern);
217
218 or_patterns.push(pattern_temp);
219 }
220 TokenTree::Ident(ident) => {
221 if ident.to_string() == "fn" {
222 let next_index = index + 1;
223 if next_index >= tokens.len() {
224 return Err(Error::new(ident.span(), "A function must be specified."));
225 }
226 let next_token = &tokens[next_index];
227 if let TokenTree::Group(next_token) = next_token {
228 if next_token.delimiter() != Delimiter::Parenthesis {
229 return Err(Error::new(next_token.span(), "Invalid delimiter."));
230 }
231 let ident_chars = next_token.to_string().chars().collect::<Vec<char>>();
232 if ident_chars.len() <= 2 {
233 return Err(Error::new(
234 next_token.span(),
235 "A function must be specified.",
236 ));
237 }
238 let func_string = ident_chars[1..(ident_chars.len() - 1)]
239 .iter()
240 .collect::<String>();
241 pattern.push(BNFSymbol::TerminalSymbolFunction(func_string));
242
243 index = next_index;
244 } else {
245 return Err(Error::new(ident.span(), "A function must be specified."));
246 }
247 } else {
248 pattern.push(BNFSymbol::NonTerminalSymbolName(ident.to_string()));
249 }
250 }
251 TokenTree::Group(group) => {
252 let delimiter = group.delimiter();
253 let symbols = group.stream().into_iter().collect::<Vec<TokenTree>>();
254 let mut new_symbol_name = non_duplicate_number.as_symbol_name();
255
256 match delimiter {
257 Delimiter::Parenthesis => {
258 parse_rule(
259 rule_map,
260 &mut new_symbol_name,
261 &symbols,
262 non_duplicate_number,
263 unnamed_pattern_map,
264 )?;
265 }
266 Delimiter::Brace => {
267 let mut new_pattern_name = non_duplicate_number.as_symbol_name();
269 parse_rule(
270 rule_map,
271 &mut new_pattern_name,
272 &symbols,
273 non_duplicate_number,
274 unnamed_pattern_map,
275 )?;
276
277 let mut rule = BNFRule::new(new_symbol_name.clone());
278 let or_patterns = &mut rule.or_patterns;
279
280 or_patterns.push(vec![BNFSymbol::Null]);
281 or_patterns.push(vec![
282 BNFSymbol::NonTerminalSymbolName(new_pattern_name),
283 BNFSymbol::NonTerminalSymbolName(new_symbol_name.clone()),
284 ]);
285
286 rule_map.insert(rule.non_terminal_symbol_name.clone(), rule);
287 }
288 Delimiter::Bracket => {
289 let mut new_pattern_name = non_duplicate_number.as_symbol_name();
291 parse_rule(
292 rule_map,
293 &mut new_pattern_name,
294 &symbols,
295 non_duplicate_number,
296 unnamed_pattern_map,
297 )?;
298
299 let mut rule = BNFRule::new(new_symbol_name.clone());
300 let or_patterns = &mut rule.or_patterns;
301
302 or_patterns.push(vec![BNFSymbol::NonTerminalSymbolName(new_pattern_name)]);
303 or_patterns.push(vec![BNFSymbol::Null]);
304
305 rule_map.insert(rule.non_terminal_symbol_name.clone(), rule);
306 }
307 _ => {
308 return Err(Error::new(group.span(), "Invalid delimiter."));
309 }
310 }
311
312 pattern.push(BNFSymbol::NonTerminalSymbolName(new_symbol_name.clone()));
313 }
314 TokenTree::Literal(literal) => {
315 let string = match StringLit::try_from(literal) {
316 Ok(string) => string.value().to_string(),
317 Err(err) => {
318 return Err(Error::new(
319 literal.span(),
320 format!("Invalid terminal symbol. {}", err.to_string()),
321 ))
322 }
323 };
324
325 if literal.to_string().starts_with("r") {
326 pattern.push(BNFSymbol::TerminalSymbolRegex(string));
327 } else {
328 pattern.push(BNFSymbol::TerminalSymbolString(string));
329 }
330 }
331 }
332
333 index += 1;
334 }
335
336 if !pattern.is_empty() {
337 or_patterns.push(pattern);
338 }
339
340 if non_terminal_symbol_name.starts_with(' ') {
342 match unnamed_pattern_map.get(or_patterns) {
343 Some(temp_name) => {
344 *non_terminal_symbol_name = temp_name.clone();
345 }
346 _ => {
347 unnamed_pattern_map.insert(or_patterns.clone(), non_terminal_symbol_name.clone());
348
349 rule.non_terminal_symbol_name = non_terminal_symbol_name.clone();
350 rule_map.insert(non_terminal_symbol_name.clone(), rule);
351 }
352 }
353 } else {
354 rule.non_terminal_symbol_name = non_terminal_symbol_name.clone();
355 rule_map.insert(non_terminal_symbol_name.clone(), rule);
356 }
357
358 return Ok(());
359}
360
361#[derive()]
362pub struct TokenParser {
363 pub symbols: Vec<TokenTree>,
364}
365
366impl Parse for TokenParser {
367 fn parse(input: ParseStream) -> syn::Result<Self> {
368 let mut symbols = Vec::<TokenTree>::new();
369
370 while !input.is_empty() {
371 let token_tree = TokenTree::parse(input).unwrap();
372
373 symbols.push(token_tree);
374 }
375
376 return Ok(TokenParser { symbols });
377 }
378}
379
380#[derive(Debug)]
381pub struct BNFRule {
382 pub non_terminal_symbol_name: String,
383 pub or_patterns: Vec<Vec<BNFSymbol>>,
384 pub first_set: HashSet<BNFSymbol>,
385 pub is_nullable: bool,
386}
387
388impl BNFRule {
389 pub fn new(non_terminal_symbol_name: String) -> Self {
390 return Self {
391 non_terminal_symbol_name,
392 or_patterns: Vec::new(),
393 first_set: HashSet::new(),
394 is_nullable: false,
395 };
396 }
397}
398
399#[derive(Debug, Eq, Clone, Hash, PartialEq)]
400pub enum BNFSymbol {
401 NonTerminalSymbolName(String),
402 TerminalSymbolString(String),
403 TerminalSymbolRegex(String),
404 TerminalSymbolFunction(String),
405 Null,
406 EOF,
407}
408
409impl BNFSymbol {
410 pub fn is_terminal_symbol(&self) -> bool {
411 return match self {
412 BNFSymbol::NonTerminalSymbolName(_) => false,
413 _ => true,
414 };
415 }
416
417 pub fn get_symbol_name(&self) -> &str {
418 return match self {
419 BNFSymbol::TerminalSymbolFunction(name) => name.as_str(),
420 BNFSymbol::NonTerminalSymbolName(name) => name.as_str(),
421 BNFSymbol::TerminalSymbolString(name) => name.as_str(),
422 BNFSymbol::TerminalSymbolRegex(name) => name.as_str(),
423 BNFSymbol::Null => "Null",
424 BNFSymbol::EOF => "EOF",
425 };
426 }
427}
428
429pub struct NonDuplicateNumber {
430 number: usize,
431}
432
433impl NonDuplicateNumber {
434 pub fn new() -> Self {
435 return Self { number: 0 };
436 }
437
438 pub fn next(&mut self) -> usize {
439 self.number += 1;
440 return self.number;
441 }
442
443 pub fn as_symbol_name(&mut self) -> String {
444 return format!(" {}", self.next());
445 }
446}
447
448pub struct ParserGenerator {
449 rule_map: HashMap<String, BNFRule>,
450 single_pattern_rules: Vec<SinglePatternRule>,
451 symbol_id_map: HashMap<BNFSymbol, usize>,
452}
453
454impl ParserGenerator {
455 pub fn new(mut rule_map: HashMap<String, BNFRule>) -> Self {
456 let mut source_rule = BNFRule::new(" source".to_string());
457 source_rule
458 .or_patterns
459 .push(vec![BNFSymbol::NonTerminalSymbolName("source".to_string())]);
460
461 rule_map.insert(" source".to_string(), source_rule);
462
463 let mut single_pattern_rules = Vec::<SinglePatternRule>::new();
464 for rule in rule_map.values() {
465 for pattern in rule.or_patterns.iter() {
466 let mut new_pattern = Vec::<BNFSymbol>::new();
467 for symbol in pattern.iter() {
468 match symbol {
469 BNFSymbol::Null => {}
470 BNFSymbol::EOF => {}
471 _ => new_pattern.push(symbol.clone()),
472 }
473 }
474
475 single_pattern_rules.push(SinglePatternRule::new(
476 rule.non_terminal_symbol_name.clone(),
477 new_pattern,
478 ));
479 }
480 }
481
482 let mut symbol_id_map = HashMap::<BNFSymbol, usize>::new();
483 let mut last_id = 0;
484
485 symbol_id_map.insert(BNFSymbol::EOF, last_id);
486 last_id += 1;
487
488 for rule in rule_map.values() {
489 for pattern in rule.or_patterns.iter() {
490 for symbol in pattern.iter() {
491 match symbol {
492 BNFSymbol::Null => {}
493 BNFSymbol::EOF => {}
494 _ => {
495 if !symbol_id_map.contains_key(symbol) {
496 symbol_id_map.insert(symbol.clone(), last_id);
497 last_id += 1;
498 }
499 }
500 }
501 }
502 }
503 }
504
505 symbol_id_map.insert(
506 BNFSymbol::NonTerminalSymbolName(" source".to_string()),
507 last_id,
508 );
509
510 return Self {
511 rule_map,
512 single_pattern_rules,
513 symbol_id_map,
514 };
515 }
516
517 pub fn generate(&mut self, generate_code: bool) -> Result<String, String> {
518 self.search_nulls_and_first_set();
519 return Ok(self.generate_parser(generate_code)?);
520 }
521
522 fn search_nulls_and_first_set(&mut self) {
523 let mut rule_nullable_map = HashMap::<String, bool>::new();
524
525 loop {
526 let mut retry = false;
527
528 for rule in self.rule_map.values() {
529 match rule_nullable_map.get(&rule.non_terminal_symbol_name) {
530 Some(is_nullable) => {
531 if *is_nullable {
532 continue;
533 }
534 }
535 _ => {}
536 }
537
538 let mut is_nullable_rule = false;
539
540 if rule.or_patterns.is_empty() {
541 is_nullable_rule = true;
542 }
543
544 for pattern in rule.or_patterns.iter() {
545 let mut nullable_count = 0;
546
547 for symbol in pattern.iter() {
548 match symbol {
549 BNFSymbol::NonTerminalSymbolName(name) => {
550 match rule_nullable_map.get(name) {
551 Some(is_nullable) => {
552 if *is_nullable {
553 nullable_count += 1;
554 }
555 }
556 _ => {}
557 }
558 }
559 BNFSymbol::Null => {
560 nullable_count += 1;
561 }
562 _ => {}
563 }
564 }
565
566 is_nullable_rule |= nullable_count == pattern.len();
567 }
568
569 rule_nullable_map.insert(rule.non_terminal_symbol_name.clone(), is_nullable_rule);
570 if is_nullable_rule {
571 retry = true;
572 }
573 }
574
575 if !retry {
576 break;
577 }
578 }
579
580 for entry in rule_nullable_map.iter() {
581 self.rule_map.get_mut(entry.0).unwrap().is_nullable = *entry.1
582 }
583
584 let mut rule_first_set_map = HashMap::<String, HashSet<BNFSymbol>>::new();
585
586 loop {
587 let mut retry = false;
588
589 for rule in self.rule_map.values() {
590 match rule_first_set_map.get_mut(&rule.non_terminal_symbol_name) {
591 Some(_) => {}
592 _ => {
593 rule_first_set_map
594 .insert(rule.non_terminal_symbol_name.clone(), HashSet::new());
595 }
596 };
597
598 let first_set = rule_first_set_map
599 .get(&rule.non_terminal_symbol_name)
600 .unwrap();
601 let mut first_set_add = HashSet::<BNFSymbol>::new();
602
603 for pattern in rule.or_patterns.iter() {
604 for symbol in pattern.iter() {
605 match symbol {
606 BNFSymbol::NonTerminalSymbolName(name) => {
607 let is_nullable = match rule_nullable_map.get(name) {
608 Some(is_nullable) => *is_nullable,
609 _ => false,
610 };
611
612 match rule_first_set_map.get(name) {
613 Some(sub_set) => {
614 for symbol in sub_set.iter() {
615 if !first_set.contains(symbol) {
616 first_set_add.insert(symbol.clone());
617 retry = true;
618 }
619 }
620 }
621 _ => {}
622 }
623
624 if !is_nullable {
625 break;
626 }
627 }
628 BNFSymbol::TerminalSymbolString(_)
629 | BNFSymbol::TerminalSymbolFunction(_)
630 | BNFSymbol::TerminalSymbolRegex(_) => {
631 if !first_set.contains(symbol) {
632 first_set_add.insert(symbol.clone());
633 retry = true;
634 }
635 break;
636 }
637 BNFSymbol::EOF => {
638 if !first_set.contains(symbol) {
639 first_set_add.insert(symbol.clone());
640 retry = true;
641 }
642 break;
643 }
644 _ => {}
645 }
646 }
647 }
648
649 let first_set = rule_first_set_map
650 .get_mut(&rule.non_terminal_symbol_name)
651 .unwrap();
652 first_set.extend(first_set_add);
653 }
654
655 if !retry {
656 break;
657 }
658 }
659
660 for entry in rule_nullable_map.iter() {
661 let symbol_name = entry.0;
662 let is_nullable = entry.1;
663 let rule = self.rule_map.get_mut(symbol_name).unwrap();
664 rule.is_nullable = *is_nullable;
665 }
666
667 for entry in rule_first_set_map.iter() {
668 let symbol_name = entry.0;
669 let first_set = entry.1;
670 let rule = self.rule_map.get_mut(symbol_name).unwrap();
671 rule.first_set = first_set.clone();
672 }
673 }
674
675 fn generate_parser(&self, generate_code: bool) -> Result<String, String> {
676 let mut lr_group_map = HashMap::<usize, LRGroup>::new();
677 let mut not_scanned_group_list = Vec::<usize>::new();
678 let mut last_group_number = 0;
679
680 loop {
681 let lr_group_number = if last_group_number == 0 {
682 let mut first_group = LRGroup::new(0);
683
684 let source_first_rule = self.rule_map.get(" source").unwrap();
685
686 for pattern in source_first_rule.or_patterns.iter() {
687 let mut item = LRItem::new(" source".to_string());
688 item.pattern.extend(pattern.clone());
689 item.first_set.insert(BNFSymbol::EOF);
690 first_group.default_item_list.push(item.clone());
691 first_group.item_list.push(item);
692 }
693
694 self.add_items(&mut first_group);
695
696 lr_group_map.insert(0, first_group);
697 last_group_number += 1;
698 0
699 } else {
700 match not_scanned_group_list.pop() {
701 Some(number) => number,
702 _ => break,
703 }
704 };
705
706 let lr_group = lr_group_map.get(&lr_group_number).unwrap();
707
708 let mut next_group_map = HashMap::<BNFSymbol, Vec<&LRItem>>::new();
709 for item in lr_group.item_list.iter() {
710 match item.get_next_symbol() {
711 Some(symbol) => {
712 let list = next_group_map.entry(symbol).or_insert_with(|| Vec::new());
713 list.push(item);
714 }
715 _ => continue,
716 }
717 }
718
719 let mut next_group_number_map = HashMap::<BNFSymbol, usize>::new();
720 let mut add_group_map = HashMap::<usize, LRGroup>::new();
721
722 for entry in next_group_map.iter() {
723 let symbol = entry.0;
724 let items = entry.1;
725
726 let mut next_group_items = Vec::<LRItem>::new();
727 for item in items.iter() {
728 next_group_items.push(item.create_next().unwrap());
729 }
730
731 let mut group_number = Option::<usize>::None;
732 for entry in lr_group_map.iter() {
733 let number = entry.0;
734 let lr_group = entry.1;
735
736 let mut is_all_matched = true;
737
738 if next_group_items.len() == lr_group.default_item_list.len() {
739 for i in 0..next_group_items.len() {
740 let item = &next_group_items[i];
741 let mut found = false;
742
743 for j in 0..lr_group.default_item_list.len() {
744 let group_item = &lr_group.default_item_list[j];
745
746 if item.root_name == group_item.root_name
747 && item.current_position == group_item.current_position
748 && item.pattern == group_item.pattern
749 && item.first_set == group_item.first_set
750 {
751 found = true;
752 break;
753 }
754 }
755
756 if !found {
757 is_all_matched = false;
758 break;
759 }
760 }
761 } else {
762 is_all_matched = false;
763 }
764
765 if is_all_matched {
766 group_number = Some(*number);
767 }
768 }
769
770 match group_number {
771 Some(number) => {
772 next_group_number_map.insert(symbol.clone(), number);
773 }
774 _ => {
775 let mut new_group = LRGroup::new(last_group_number);
776 new_group.default_item_list = next_group_items.clone();
777 new_group.item_list = next_group_items;
778
779 self.add_items(&mut new_group);
780
781 add_group_map.insert(last_group_number, new_group);
782 not_scanned_group_list.push(last_group_number);
783
784 next_group_number_map.insert(symbol.clone(), last_group_number);
785
786 last_group_number += 1;
787 }
788 }
789 }
790
791 let lr_group = lr_group_map.get_mut(&lr_group_number).unwrap();
792 lr_group.next_group_number_map = next_group_number_map;
793
794 lr_group_map.extend(add_group_map);
795 }
796
797 let mut table = Vec::<Vec<Option<Operation>>>::new();
798 for group_number in 0..lr_group_map.len() {
799 let group = lr_group_map.get(&group_number).unwrap();
800 let mut operation_map = HashMap::<BNFSymbol, Operation>::new();
801
802 for entry in group.next_group_number_map.iter() {
803 let symbol = entry.0;
804 let next_group_number = *entry.1;
805
806 match symbol {
807 BNFSymbol::NonTerminalSymbolName(_) => {
808 self.insert_opreration(
809 &mut operation_map,
810 symbol,
811 Operation::GoTo(next_group_number),
812 )?;
813 }
814 BNFSymbol::TerminalSymbolString(_) => {
815 self.insert_opreration(
816 &mut operation_map,
817 symbol,
818 Operation::Shift(next_group_number),
819 )?;
820 }
821 BNFSymbol::TerminalSymbolRegex(_) => {
822 self.insert_opreration(
823 &mut operation_map,
824 symbol,
825 Operation::Shift(next_group_number),
826 )?;
827 }
828 BNFSymbol::TerminalSymbolFunction(_) => {
829 self.insert_opreration(
830 &mut operation_map,
831 symbol,
832 Operation::Shift(next_group_number),
833 )?;
834 }
835 _ => {
836 return Err(format!("Unexpected symbol. {:?}", symbol));
837 }
838 }
839 }
840
841 for item in group.item_list.iter() {
842 if item.is_last_position() {
843 if item.root_name == " source" {
844 self.insert_opreration(
845 &mut operation_map,
846 &BNFSymbol::EOF,
847 Operation::Accept,
848 )?;
849 } else {
850 let pattern_id = self.get_pattern_id(item)?;
851 for symbol in item.first_set.iter() {
852 self.insert_opreration(
853 &mut operation_map,
854 symbol,
855 Operation::Reduce(pattern_id),
856 )?;
857 }
858 }
859 }
860 }
861
862 let mut operations = Vec::<Option<Operation>>::new();
863 for _ in 0..self.symbol_id_map.len() {
864 operations.push(None);
865 }
866
867 for entry in operation_map.iter() {
868 let symbol_id = self.get_symbol_id(entry.0)?;
869 let operation = entry.1;
870 operations[symbol_id] = Some(operation.clone());
871 }
872
873 table.push(operations);
874 }
875
876 if !generate_code {
877 return Ok(String::new());
878 }
879
880 let mut right_side_counts = Vec::<usize>::new();
881 let mut left_side_symbol_ids = Vec::<usize>::new();
882
883 for rule in self.single_pattern_rules.iter() {
884 right_side_counts.push(rule.pattern.len());
885 let symbol_id = self.get_symbol_id(&BNFSymbol::NonTerminalSymbolName(
886 rule.root_symbol_name.clone(),
887 ))?;
888 left_side_symbol_ids.push(symbol_id);
889 }
890
891 let mut symbol_is_terminal = Vec::<bool>::new();
892 for _ in 0..self.symbol_id_map.len() {
893 symbol_is_terminal.push(false);
894 }
895 for symbol in self.symbol_id_map.keys() {
896 let symbol_id = self.symbol_id_map[symbol];
897 symbol_is_terminal[symbol_id] = symbol.is_terminal_symbol();
898 }
899
900 let mut code = "".to_string();
901 code += "
902 use bnf_rules::bnf_rules_parser::lexer::{*};
903 use bnf_rules::bnf_rules_parser::parser::{*};
904 use bnf_rules::bnf_rules_parser::parser::ASTNode::{NonTerminal, Terminal};
905 ";
906
907 code += "pub fn parse_source(source: &str) -> Result<ASTNode, ParseError> {";
908
909 let mut array_str = String::new();
910 for rule_root_name in self.single_pattern_rules.iter() {
911 array_str += format!("\"{}\", ", &rule_root_name.root_symbol_name).as_str();
912 }
913 code += format!("static RULE_PATTERN_NAME: &[&str] = &[{}];", array_str).as_str();
914
915 let mut group_array_str = String::new();
916 for group in table.iter() {
917 let mut array_str = String::new();
918
919 for operation in group.iter() {
920 match operation {
921 Some(operation) => {
922 let tuple = operation.to_tuple();
923 array_str += format!("({}, {}), ", tuple.0, tuple.1).as_str();
924 }
925 _ => array_str += format!("(0, 0), ").as_str(),
926 }
927 }
928
929 group_array_str += format!("&[{}], ", array_str).as_str();
930 }
931 code += format!(
932 "static LR_TABLE: &[&[(usize, usize)]] = &[{}];",
933 group_array_str
934 )
935 .as_str();
936
937 let mut rule_array_str = String::new();
938 for pattern in self.single_pattern_rules.iter() {
939 let root_symbol_id = self.symbol_id_map
940 [&BNFSymbol::NonTerminalSymbolName(pattern.root_symbol_name.clone())];
941
942 let mut array_str = String::new();
943 for symbol in pattern.pattern.iter() {
944 array_str += format!("{}, ", self.symbol_id_map[symbol]).as_str();
945 }
946
947 rule_array_str += format!("({}, &[{}]), ", root_symbol_id, array_str).as_str();
948 }
949
950 code += format!(
951 "static BNF_RULES: &[(u32, &[u32])] = &[{}];",
952 rule_array_str
953 )
954 .as_str();
955
956 code += "let terminal_symbols = vec![";
957 for entry in self.symbol_id_map.iter() {
958 let symbol = entry.0;
959 let symbol_id = entry.1;
960
961 match symbol {
962 BNFSymbol::TerminalSymbolString(string) => {
963 code += format!(
964 "TerminalSymbol::new_from_string(\"{}\", {}),",
965 string, symbol_id
966 )
967 .as_str();
968 }
969 BNFSymbol::TerminalSymbolRegex(regex) => {
970 code += format!(
971 "TerminalSymbol::new_from_regex(r\"{}\", {}),",
972 regex, symbol_id
973 )
974 .as_str();
975 }
976 BNFSymbol::TerminalSymbolFunction(fn_string) => {
977 code += format!(
978 "TerminalSymbol::new_from_tokenizer_fn({}, {}),",
979 fn_string, symbol_id
980 )
981 .as_str();
982 }
983 _ => {}
984 }
985 }
986 code += "];";
987 code += "let lexer = Lexer::new(terminal_symbols);";
988
989 code += "let tokens = lexer.scan(source);";
990 code += "return __parse(tokens, RULE_PATTERN_NAME, LR_TABLE, BNF_RULES);";
991 code += "}";
992
993 return Ok(code);
994 }
995
996 fn insert_opreration(
997 &self,
998 operation_map: &mut HashMap<BNFSymbol, Operation>,
999 symbol: &BNFSymbol,
1000 operation: Operation,
1001 ) -> Result<(), String> {
1002 if operation_map.contains_key(symbol) {
1003 let mut message = format!(
1004 "{:?} {:?} conflict!",
1005 operation_map.get(symbol).unwrap(),
1006 operation
1007 );
1008
1009 match symbol {
1010 BNFSymbol::NonTerminalSymbolName(symbol_name) => {
1011 if !symbol_name.is_empty() {
1012 message += format!(" | Symbol : {} ::=", symbol_name).as_str();
1013
1014 let rule = self.rule_map.get(symbol_name).unwrap();
1015 for pattern in rule.or_patterns.iter() {
1016 for symbol in pattern.iter() {
1017 message += format!(" {} ", symbol.get_symbol_name()).as_str();
1018 }
1019 message += "|";
1020 }
1021 }
1022 }
1023 _ => {
1024 message += format!(" | Symbol : '{}'", symbol.get_symbol_name()).as_str();
1025 }
1026 }
1027
1028 return Err(message);
1029 }
1030 operation_map.insert(symbol.clone(), operation);
1031 return Ok(());
1032 }
1033
1034 fn add_items(&self, lr_group: &mut LRGroup) {
1035 loop {
1036 let mut is_all_scanned = true;
1037 let mut add_item_list = Vec::<LRItem>::new();
1038
1039 for i in 0..lr_group.item_list.len() {
1040 let item = &mut lr_group.item_list[i];
1041
1042 if item.is_scanned || item.is_last_position() {
1043 continue;
1044 }
1045 is_all_scanned = false;
1046 item.is_scanned = true;
1047
1048 let next_symbol_name = match item.get_next_nonterminal_symbol_name() {
1049 Some(name) => name,
1050 _ => continue,
1051 };
1052
1053 let item = &lr_group.item_list[i];
1054
1055 let latter_pattern = item.get_latter_pattern();
1056
1057 let first_set = self.get_first_set(&latter_pattern, &item.first_set);
1058
1059 let rule = self.rule_map.get(&next_symbol_name).unwrap();
1060
1061 for pattern in rule.or_patterns.iter() {
1062 let mut found = false;
1063 for item in lr_group.item_list.iter_mut() {
1064 if item.root_name == next_symbol_name
1065 && &item.pattern == pattern
1066 && item.current_position == 0
1067 {
1068 found = true;
1069 item.first_set.extend(first_set.clone());
1070 break;
1071 }
1072 }
1073
1074 if !found {
1075 let mut item = LRItem::new(next_symbol_name.clone());
1076
1077 for symbol in pattern.iter() {
1078 match symbol {
1079 BNFSymbol::Null => {}
1080 BNFSymbol::EOF => {}
1081 _ => {
1082 item.pattern.push(symbol.clone());
1083 }
1084 }
1085 }
1086 item.first_set = first_set.clone();
1087 add_item_list.push(item);
1088 }
1089 }
1090 }
1091
1092 lr_group.item_list.extend(add_item_list);
1093
1094 if is_all_scanned {
1095 break;
1096 }
1097 }
1098 }
1099
1100 fn get_pattern_id(&self, item: &LRItem) -> Result<usize, String> {
1101 for i in 0..self.single_pattern_rules.len() {
1102 let pattern = &self.single_pattern_rules[i];
1103 if pattern.root_symbol_name == item.root_name && pattern.pattern == item.pattern {
1104 return Ok(i);
1105 }
1106 }
1107 return Err(format!("Internal error. Not found pattern. {:?}", item));
1108 }
1109
1110 fn get_symbol_id(&self, symbol: &BNFSymbol) -> Result<usize, String> {
1111 return match self.symbol_id_map.get(symbol) {
1112 Some(id) => Ok(*id),
1113 _ => Err(format!(
1114 "Internal error. Symbol's id is not found. {:?}",
1115 symbol
1116 )),
1117 };
1118 }
1119
1120 fn get_first_set(
1121 &self,
1122 symbol_list: &Vec<BNFSymbol>,
1123 item_first_set: &HashSet<BNFSymbol>,
1124 ) -> HashSet<BNFSymbol> {
1125 let mut first_set = HashSet::<BNFSymbol>::new();
1126
1127 let mut is_nullable = true;
1128 for symbol in symbol_list.iter() {
1129 match symbol {
1130 BNFSymbol::NonTerminalSymbolName(name) => {
1131 let rule = self.rule_map.get(name).unwrap();
1132 first_set.extend(rule.first_set.clone());
1133 if !rule.is_nullable {
1134 is_nullable = false;
1135 break;
1136 }
1137 }
1138 BNFSymbol::Null => {}
1139 _ => {
1140 first_set.insert(symbol.clone());
1141 is_nullable = false;
1142 break;
1143 }
1144 }
1145 }
1146
1147 if is_nullable {
1148 first_set.extend(item_first_set.clone());
1149 }
1150
1151 return first_set;
1152 }
1153}
1154
1155#[derive(Debug)]
1156pub struct LRGroup {
1157 pub group_number: usize,
1158 pub default_item_list: Vec<LRItem>,
1159 pub item_list: Vec<LRItem>,
1160 pub next_group_number_map: HashMap<BNFSymbol, usize>,
1161}
1162
1163impl LRGroup {
1164 pub fn new(group_number: usize) -> Self {
1165 return Self {
1166 group_number,
1167 default_item_list: Vec::new(),
1168 item_list: Vec::new(),
1169 next_group_number_map: HashMap::new(),
1170 };
1171 }
1172}
1173
1174#[derive(Debug, Clone, Eq, PartialEq)]
1175pub struct LRItem {
1176 pub root_name: String,
1177 pub pattern: Vec<BNFSymbol>,
1178 pub first_set: HashSet<BNFSymbol>,
1179 pub current_position: usize,
1180 pub is_scanned: bool,
1181}
1182
1183impl LRItem {
1184 pub fn new(root_name: String) -> Self {
1185 return Self {
1186 root_name,
1187 pattern: Vec::new(),
1188 first_set: HashSet::new(),
1189 current_position: 0,
1190 is_scanned: false,
1191 };
1192 }
1193
1194 pub fn is_last_position(&self) -> bool {
1195 return self.pattern.len() == self.current_position;
1196 }
1197
1198 pub fn get_next_nonterminal_symbol_name(&self) -> Option<String> {
1199 return match self.pattern.get(self.current_position) {
1200 Some(symbol) => match symbol {
1201 BNFSymbol::NonTerminalSymbolName(name) => Some(name.clone()),
1202 _ => None,
1203 },
1204 _ => None,
1205 };
1206 }
1207
1208 pub fn get_next_symbol(&self) -> Option<BNFSymbol> {
1209 return self.pattern.get(self.current_position).cloned();
1210 }
1211
1212 pub fn get_latter_pattern(&self) -> Vec<BNFSymbol> {
1213 let mut latter_pattern = Vec::<BNFSymbol>::new();
1214 if self.current_position + 1 >= self.pattern.len() {
1215 return latter_pattern;
1216 }
1217
1218 for i in (self.current_position + 1)..self.pattern.len() {
1219 let symbol = &self.pattern[i];
1220 latter_pattern.push(symbol.clone());
1221 }
1222
1223 return latter_pattern;
1224 }
1225
1226 pub fn create_next(&self) -> Option<Self> {
1227 if self.is_last_position() {
1228 return None;
1229 }
1230
1231 let mut cloned = self.clone();
1232 cloned.current_position += 1;
1233 cloned.is_scanned = false;
1234
1235 return Some(cloned);
1236 }
1237}
1238
1239#[derive(Debug, Clone, Eq, PartialEq)]
1240pub struct SinglePatternRule {
1241 pub root_symbol_name: String,
1242 pub pattern: Vec<BNFSymbol>,
1243}
1244
1245impl SinglePatternRule {
1246 pub fn new(root_symbol_name: String, pattern: Vec<BNFSymbol>) -> Self {
1247 let mut new_pattern = Vec::<BNFSymbol>::new();
1248 for symbol in pattern.iter() {
1249 match symbol {
1250 BNFSymbol::Null => {}
1251 _ => {
1252 new_pattern.push(symbol.clone());
1253 }
1254 }
1255 }
1256
1257 return Self {
1258 root_symbol_name,
1259 pattern: new_pattern,
1260 };
1261 }
1262}
1263
1264#[derive(Debug, Clone)]
1265pub enum Operation {
1266 Shift(usize),
1267 Reduce(usize),
1268 GoTo(usize),
1269 Accept,
1270}
1271
1272pub const OPERATION_NONE: usize = 0;
1273pub const OPERATION_SHIFT: usize = 1;
1274pub const OPERATION_REDUCE: usize = 2;
1275pub const OPERATION_GOTO: usize = 3;
1276pub const OPERATION_ACCEPT: usize = 4;
1277
1278impl Operation {
1279 pub fn to_tuple(&self) -> (usize, usize) {
1280 return match self {
1281 Operation::Shift(i) => (OPERATION_SHIFT, *i),
1282 Operation::Reduce(i) => (OPERATION_REDUCE, *i),
1283 Operation::GoTo(i) => (OPERATION_GOTO, *i),
1284 Operation::Accept => (OPERATION_ACCEPT, 0),
1285 };
1286 }
1287}