bnf_rules_parser/
lib.rs

1use litrs::StringLit;
2use proc_macro2::{Delimiter, TokenTree};
3use std::collections::{HashMap, HashSet};
4use std::mem;
5use syn::parse::{Parse, ParseStream};
6use syn::Error;
7
8pub mod lexer;
9pub mod parser;
10
11pub fn parse_rules(tokens: &Vec<TokenTree>) -> Result<(HashMap<String, BNFRule>, bool), Error> {
12    let mut non_terminal_symbol_name = String::new();
13    let mut buffered_tokens = Vec::<TokenTree>::new();
14    let mut rule_map = HashMap::<String, BNFRule>::new();
15
16    let mut i = 0;
17    let mut token = &tokens[0];
18
19    let mut non_duplicate_number = NonDuplicateNumber::new();
20    let mut unnamed_pattern_map = HashMap::new();
21
22    let mut generate_code = true;
23    if let TokenTree::Punct(punctuation) = tokens.first().unwrap() {
24        if punctuation.as_char() != '#' {
25            return Err(Error::new(token.span(), "Invalid syntax."));
26        }
27
28        if let TokenTree::Group(group) = next(tokens, &mut i)? {
29            if group.delimiter() != Delimiter::Bracket {
30                return Err(Error::new(token.span(), "Invalid syntax."));
31            }
32
33            let mut i = 0;
34            let tokens = group.stream().into_iter().collect::<Vec<_>>();
35
36            check_current_identifier(&tokens, &mut i, "generate_code")?;
37            check_next_punct(&tokens, &mut i, '=')?;
38
39            if let TokenTree::Ident(identifier) = next(&tokens, &mut i)? {
40                generate_code = match identifier.to_string().as_str() {
41                    "true" => true,
42                    "false" => false,
43                    _ => return Err(Error::new(token.span(), "Invalid syntax.")),
44                }
45            }
46        }
47
48        i += 1;
49        token = &tokens[i];
50    }
51
52    let mut is_first_rule_token = true;
53
54    loop {
55        buffered_tokens.push(token.clone());
56
57        if is_first_rule_token {
58            buffered_tokens.pop();
59
60            non_terminal_symbol_name = match token {
61                TokenTree::Ident(ident) => ident.to_string(),
62                _ => {
63                    return Err(Error::new(
64                        token.span(),
65                        "Invalid non terminal symbol name.",
66                    ))
67                }
68            };
69
70            check_next_punct(&tokens, &mut i, ':')?;
71            check_next_punct(&tokens, &mut i, ':')?;
72            check_next_punct(&tokens, &mut i, '=')?;
73
74            token = next(&tokens, &mut i)?;
75        } else {
76            match check_next_punct(&tokens, &mut i, ':') {
77                Ok(_) => {
78                    buffered_tokens.pop();
79
80                    parse_rule(
81                        &mut rule_map,
82                        &mut non_terminal_symbol_name,
83                        &buffered_tokens,
84                        &mut non_duplicate_number,
85                        &mut unnamed_pattern_map,
86                    )?;
87
88                    buffered_tokens.clear();
89
90                    non_terminal_symbol_name = match token {
91                        TokenTree::Ident(ident) => ident.to_string(),
92                        _ => {
93                            return Err(Error::new(
94                                token.span(),
95                                "Invalid non terminal symbol name.",
96                            ))
97                        }
98                    };
99
100                    check_next_punct(&tokens, &mut i, ':')?;
101                    check_next_punct(&tokens, &mut i, '=')?;
102
103                    token = next(&tokens, &mut i)?;
104                }
105                Err(_) => {
106                    token = &tokens[i];
107                }
108            }
109        }
110
111        is_first_rule_token = false;
112
113        if i + 1 == tokens.len() {
114            buffered_tokens.push(token.clone());
115            parse_rule(
116                &mut rule_map,
117                &mut non_terminal_symbol_name,
118                &buffered_tokens,
119                &mut non_duplicate_number,
120                &mut unnamed_pattern_map,
121            )?;
122            break;
123        }
124    }
125
126    return Ok((rule_map, generate_code));
127}
128
129fn check_next_punct(tokens: &Vec<TokenTree>, i: &mut usize, char: char) -> Result<(), Error> {
130    let token = next(tokens, i)?;
131
132    return match token {
133        TokenTree::Punct(punct) => {
134            if punct.as_char() == char {
135                Ok(())
136            } else {
137                Err(Error::new(punct.span(), "Invalid syntax."))
138            }
139        }
140        _ => Err(Error::new(token.span(), "Invalid syntax.")),
141    };
142}
143
144fn check_current_identifier(
145    tokens: &Vec<TokenTree>,
146    i: &mut usize,
147    identifier_str: &str,
148) -> Result<(), Error> {
149    let token = current(tokens, i)?;
150
151    return match token {
152        TokenTree::Ident(identifier) => {
153            if identifier.to_string().as_str() == identifier_str {
154                Ok(())
155            } else {
156                Err(Error::new(
157                    identifier.span(),
158                    format!("Invalid syntax.{}", identifier.to_string()),
159                ))
160            }
161        }
162        _ => Err(Error::new(token.span(), "Invalid syntax.1")),
163    };
164}
165
166fn next<'a>(tokens: &'a Vec<TokenTree>, i: &mut usize) -> Result<&'a TokenTree, Error> {
167    *i += 1;
168    if *i == tokens.len() {
169        return Err(Error::new(
170            tokens[tokens.len() - 1].span(),
171            "Unexpected EOF.",
172        ));
173    }
174    return Ok(&tokens[*i]);
175}
176
177fn current<'a>(tokens: &'a Vec<TokenTree>, i: &usize) -> Result<&'a TokenTree, Error> {
178    if *i == tokens.len() {
179        return Err(Error::new(
180            tokens[tokens.len() - 1].span(),
181            "Unexpected EOF.",
182        ));
183    }
184    return Ok(&tokens[*i]);
185}
186
187fn parse_rule(
188    rule_map: &mut HashMap<String, BNFRule>,
189    non_terminal_symbol_name: &mut String,
190    tokens: &Vec<TokenTree>,
191    non_duplicate_number: &mut NonDuplicateNumber,
192    unnamed_pattern_map: &mut HashMap<Vec<Vec<BNFSymbol>>, String>,
193) -> Result<(), Error> {
194    let mut rule = BNFRule::new(non_terminal_symbol_name.clone());
195
196    let mut pattern = Vec::<BNFSymbol>::new();
197    let or_patterns = &mut rule.or_patterns;
198
199    let mut index = 0;
200    loop {
201        if index >= tokens.len() {
202            break;
203        }
204        let token = &tokens[index];
205
206        match token {
207            TokenTree::Punct(punct) => {
208                if punct.as_char() != '|' {
209                    return Err(Error::new(punct.span(), "Invalid punctuation."));
210                }
211                if pattern.is_empty() {
212                    pattern.push(BNFSymbol::Null);
213                }
214
215                let mut pattern_temp = Vec::new();
216                mem::swap(&mut pattern_temp, &mut pattern);
217
218                or_patterns.push(pattern_temp);
219            }
220            TokenTree::Ident(ident) => {
221                if ident.to_string() == "fn" {
222                    let next_index = index + 1;
223                    if next_index >= tokens.len() {
224                        return Err(Error::new(ident.span(), "A function must be specified."));
225                    }
226                    let next_token = &tokens[next_index];
227                    if let TokenTree::Group(next_token) = next_token {
228                        if next_token.delimiter() != Delimiter::Parenthesis {
229                            return Err(Error::new(next_token.span(), "Invalid delimiter."));
230                        }
231                        let ident_chars = next_token.to_string().chars().collect::<Vec<char>>();
232                        if ident_chars.len() <= 2 {
233                            return Err(Error::new(
234                                next_token.span(),
235                                "A function must be specified.",
236                            ));
237                        }
238                        let func_string = ident_chars[1..(ident_chars.len() - 1)]
239                            .iter()
240                            .collect::<String>();
241                        pattern.push(BNFSymbol::TerminalSymbolFunction(func_string));
242
243                        index = next_index;
244                    } else {
245                        return Err(Error::new(ident.span(), "A function must be specified."));
246                    }
247                } else {
248                    pattern.push(BNFSymbol::NonTerminalSymbolName(ident.to_string()));
249                }
250            }
251            TokenTree::Group(group) => {
252                let delimiter = group.delimiter();
253                let symbols = group.stream().into_iter().collect::<Vec<TokenTree>>();
254                let mut new_symbol_name = non_duplicate_number.as_symbol_name();
255
256                match delimiter {
257                    Delimiter::Parenthesis => {
258                        parse_rule(
259                            rule_map,
260                            &mut new_symbol_name,
261                            &symbols,
262                            non_duplicate_number,
263                            unnamed_pattern_map,
264                        )?;
265                    }
266                    Delimiter::Brace => {
267                        // new_symbol ::= null | new_pattern new_symbol
268                        let mut new_pattern_name = non_duplicate_number.as_symbol_name();
269                        parse_rule(
270                            rule_map,
271                            &mut new_pattern_name,
272                            &symbols,
273                            non_duplicate_number,
274                            unnamed_pattern_map,
275                        )?;
276
277                        let mut rule = BNFRule::new(new_symbol_name.clone());
278                        let or_patterns = &mut rule.or_patterns;
279
280                        or_patterns.push(vec![BNFSymbol::Null]);
281                        or_patterns.push(vec![
282                            BNFSymbol::NonTerminalSymbolName(new_pattern_name),
283                            BNFSymbol::NonTerminalSymbolName(new_symbol_name.clone()),
284                        ]);
285
286                        rule_map.insert(rule.non_terminal_symbol_name.clone(), rule);
287                    }
288                    Delimiter::Bracket => {
289                        // new_symbol ::= new_pattern | null
290                        let mut new_pattern_name = non_duplicate_number.as_symbol_name();
291                        parse_rule(
292                            rule_map,
293                            &mut new_pattern_name,
294                            &symbols,
295                            non_duplicate_number,
296                            unnamed_pattern_map,
297                        )?;
298
299                        let mut rule = BNFRule::new(new_symbol_name.clone());
300                        let or_patterns = &mut rule.or_patterns;
301
302                        or_patterns.push(vec![BNFSymbol::NonTerminalSymbolName(new_pattern_name)]);
303                        or_patterns.push(vec![BNFSymbol::Null]);
304
305                        rule_map.insert(rule.non_terminal_symbol_name.clone(), rule);
306                    }
307                    _ => {
308                        return Err(Error::new(group.span(), "Invalid delimiter."));
309                    }
310                }
311
312                pattern.push(BNFSymbol::NonTerminalSymbolName(new_symbol_name.clone()));
313            }
314            TokenTree::Literal(literal) => {
315                let string = match StringLit::try_from(literal) {
316                    Ok(string) => string.value().to_string(),
317                    Err(err) => {
318                        return Err(Error::new(
319                            literal.span(),
320                            format!("Invalid terminal symbol. {}", err.to_string()),
321                        ))
322                    }
323                };
324
325                if literal.to_string().starts_with("r") {
326                    pattern.push(BNFSymbol::TerminalSymbolRegex(string));
327                } else {
328                    pattern.push(BNFSymbol::TerminalSymbolString(string));
329                }
330            }
331        }
332
333        index += 1;
334    }
335
336    if !pattern.is_empty() {
337        or_patterns.push(pattern);
338    }
339
340    // merge unnamed pattern
341    if non_terminal_symbol_name.starts_with(' ') {
342        match unnamed_pattern_map.get(or_patterns) {
343            Some(temp_name) => {
344                *non_terminal_symbol_name = temp_name.clone();
345            }
346            _ => {
347                unnamed_pattern_map.insert(or_patterns.clone(), non_terminal_symbol_name.clone());
348
349                rule.non_terminal_symbol_name = non_terminal_symbol_name.clone();
350                rule_map.insert(non_terminal_symbol_name.clone(), rule);
351            }
352        }
353    } else {
354        rule.non_terminal_symbol_name = non_terminal_symbol_name.clone();
355        rule_map.insert(non_terminal_symbol_name.clone(), rule);
356    }
357
358    return Ok(());
359}
360
361#[derive()]
362pub struct TokenParser {
363    pub symbols: Vec<TokenTree>,
364}
365
366impl Parse for TokenParser {
367    fn parse(input: ParseStream) -> syn::Result<Self> {
368        let mut symbols = Vec::<TokenTree>::new();
369
370        while !input.is_empty() {
371            let token_tree = TokenTree::parse(input).unwrap();
372
373            symbols.push(token_tree);
374        }
375
376        return Ok(TokenParser { symbols });
377    }
378}
379
380#[derive(Debug)]
381pub struct BNFRule {
382    pub non_terminal_symbol_name: String,
383    pub or_patterns: Vec<Vec<BNFSymbol>>,
384    pub first_set: HashSet<BNFSymbol>,
385    pub is_nullable: bool,
386}
387
388impl BNFRule {
389    pub fn new(non_terminal_symbol_name: String) -> Self {
390        return Self {
391            non_terminal_symbol_name,
392            or_patterns: Vec::new(),
393            first_set: HashSet::new(),
394            is_nullable: false,
395        };
396    }
397}
398
399#[derive(Debug, Eq, Clone, Hash, PartialEq)]
400pub enum BNFSymbol {
401    NonTerminalSymbolName(String),
402    TerminalSymbolString(String),
403    TerminalSymbolRegex(String),
404    TerminalSymbolFunction(String),
405    Null,
406    EOF,
407}
408
409impl BNFSymbol {
410    pub fn is_terminal_symbol(&self) -> bool {
411        return match self {
412            BNFSymbol::NonTerminalSymbolName(_) => false,
413            _ => true,
414        };
415    }
416
417    pub fn get_symbol_name(&self) -> &str {
418        return match self {
419            BNFSymbol::TerminalSymbolFunction(name) => name.as_str(),
420            BNFSymbol::NonTerminalSymbolName(name) => name.as_str(),
421            BNFSymbol::TerminalSymbolString(name) => name.as_str(),
422            BNFSymbol::TerminalSymbolRegex(name) => name.as_str(),
423            BNFSymbol::Null => "Null",
424            BNFSymbol::EOF => "EOF",
425        };
426    }
427}
428
429pub struct NonDuplicateNumber {
430    number: usize,
431}
432
433impl NonDuplicateNumber {
434    pub fn new() -> Self {
435        return Self { number: 0 };
436    }
437
438    pub fn next(&mut self) -> usize {
439        self.number += 1;
440        return self.number;
441    }
442
443    pub fn as_symbol_name(&mut self) -> String {
444        return format!(" {}", self.next());
445    }
446}
447
448pub struct ParserGenerator {
449    rule_map: HashMap<String, BNFRule>,
450    single_pattern_rules: Vec<SinglePatternRule>,
451    symbol_id_map: HashMap<BNFSymbol, usize>,
452}
453
454impl ParserGenerator {
455    pub fn new(mut rule_map: HashMap<String, BNFRule>) -> Self {
456        let mut source_rule = BNFRule::new(" source".to_string());
457        source_rule
458            .or_patterns
459            .push(vec![BNFSymbol::NonTerminalSymbolName("source".to_string())]);
460
461        rule_map.insert(" source".to_string(), source_rule);
462
463        let mut single_pattern_rules = Vec::<SinglePatternRule>::new();
464        for rule in rule_map.values() {
465            for pattern in rule.or_patterns.iter() {
466                let mut new_pattern = Vec::<BNFSymbol>::new();
467                for symbol in pattern.iter() {
468                    match symbol {
469                        BNFSymbol::Null => {}
470                        BNFSymbol::EOF => {}
471                        _ => new_pattern.push(symbol.clone()),
472                    }
473                }
474
475                single_pattern_rules.push(SinglePatternRule::new(
476                    rule.non_terminal_symbol_name.clone(),
477                    new_pattern,
478                ));
479            }
480        }
481
482        let mut symbol_id_map = HashMap::<BNFSymbol, usize>::new();
483        let mut last_id = 0;
484
485        symbol_id_map.insert(BNFSymbol::EOF, last_id);
486        last_id += 1;
487
488        for rule in rule_map.values() {
489            for pattern in rule.or_patterns.iter() {
490                for symbol in pattern.iter() {
491                    match symbol {
492                        BNFSymbol::Null => {}
493                        BNFSymbol::EOF => {}
494                        _ => {
495                            if !symbol_id_map.contains_key(symbol) {
496                                symbol_id_map.insert(symbol.clone(), last_id);
497                                last_id += 1;
498                            }
499                        }
500                    }
501                }
502            }
503        }
504
505        symbol_id_map.insert(
506            BNFSymbol::NonTerminalSymbolName(" source".to_string()),
507            last_id,
508        );
509
510        return Self {
511            rule_map,
512            single_pattern_rules,
513            symbol_id_map,
514        };
515    }
516
517    pub fn generate(&mut self, generate_code: bool) -> Result<String, String> {
518        self.search_nulls_and_first_set();
519        return Ok(self.generate_parser(generate_code)?);
520    }
521
522    fn search_nulls_and_first_set(&mut self) {
523        let mut rule_nullable_map = HashMap::<String, bool>::new();
524
525        loop {
526            let mut retry = false;
527
528            for rule in self.rule_map.values() {
529                match rule_nullable_map.get(&rule.non_terminal_symbol_name) {
530                    Some(is_nullable) => {
531                        if *is_nullable {
532                            continue;
533                        }
534                    }
535                    _ => {}
536                }
537
538                let mut is_nullable_rule = false;
539
540                if rule.or_patterns.is_empty() {
541                    is_nullable_rule = true;
542                }
543
544                for pattern in rule.or_patterns.iter() {
545                    let mut nullable_count = 0;
546
547                    for symbol in pattern.iter() {
548                        match symbol {
549                            BNFSymbol::NonTerminalSymbolName(name) => {
550                                match rule_nullable_map.get(name) {
551                                    Some(is_nullable) => {
552                                        if *is_nullable {
553                                            nullable_count += 1;
554                                        }
555                                    }
556                                    _ => {}
557                                }
558                            }
559                            BNFSymbol::Null => {
560                                nullable_count += 1;
561                            }
562                            _ => {}
563                        }
564                    }
565
566                    is_nullable_rule |= nullable_count == pattern.len();
567                }
568
569                rule_nullable_map.insert(rule.non_terminal_symbol_name.clone(), is_nullable_rule);
570                if is_nullable_rule {
571                    retry = true;
572                }
573            }
574
575            if !retry {
576                break;
577            }
578        }
579
580        for entry in rule_nullable_map.iter() {
581            self.rule_map.get_mut(entry.0).unwrap().is_nullable = *entry.1
582        }
583
584        let mut rule_first_set_map = HashMap::<String, HashSet<BNFSymbol>>::new();
585
586        loop {
587            let mut retry = false;
588
589            for rule in self.rule_map.values() {
590                match rule_first_set_map.get_mut(&rule.non_terminal_symbol_name) {
591                    Some(_) => {}
592                    _ => {
593                        rule_first_set_map
594                            .insert(rule.non_terminal_symbol_name.clone(), HashSet::new());
595                    }
596                };
597
598                let first_set = rule_first_set_map
599                    .get(&rule.non_terminal_symbol_name)
600                    .unwrap();
601                let mut first_set_add = HashSet::<BNFSymbol>::new();
602
603                for pattern in rule.or_patterns.iter() {
604                    for symbol in pattern.iter() {
605                        match symbol {
606                            BNFSymbol::NonTerminalSymbolName(name) => {
607                                let is_nullable = match rule_nullable_map.get(name) {
608                                    Some(is_nullable) => *is_nullable,
609                                    _ => false,
610                                };
611
612                                match rule_first_set_map.get(name) {
613                                    Some(sub_set) => {
614                                        for symbol in sub_set.iter() {
615                                            if !first_set.contains(symbol) {
616                                                first_set_add.insert(symbol.clone());
617                                                retry = true;
618                                            }
619                                        }
620                                    }
621                                    _ => {}
622                                }
623
624                                if !is_nullable {
625                                    break;
626                                }
627                            }
628                            BNFSymbol::TerminalSymbolString(_)
629                            | BNFSymbol::TerminalSymbolFunction(_)
630                            | BNFSymbol::TerminalSymbolRegex(_) => {
631                                if !first_set.contains(symbol) {
632                                    first_set_add.insert(symbol.clone());
633                                    retry = true;
634                                }
635                                break;
636                            }
637                            BNFSymbol::EOF => {
638                                if !first_set.contains(symbol) {
639                                    first_set_add.insert(symbol.clone());
640                                    retry = true;
641                                }
642                                break;
643                            }
644                            _ => {}
645                        }
646                    }
647                }
648
649                let first_set = rule_first_set_map
650                    .get_mut(&rule.non_terminal_symbol_name)
651                    .unwrap();
652                first_set.extend(first_set_add);
653            }
654
655            if !retry {
656                break;
657            }
658        }
659
660        for entry in rule_nullable_map.iter() {
661            let symbol_name = entry.0;
662            let is_nullable = entry.1;
663            let rule = self.rule_map.get_mut(symbol_name).unwrap();
664            rule.is_nullable = *is_nullable;
665        }
666
667        for entry in rule_first_set_map.iter() {
668            let symbol_name = entry.0;
669            let first_set = entry.1;
670            let rule = self.rule_map.get_mut(symbol_name).unwrap();
671            rule.first_set = first_set.clone();
672        }
673    }
674
675    fn generate_parser(&self, generate_code: bool) -> Result<String, String> {
676        let mut lr_group_map = HashMap::<usize, LRGroup>::new();
677        let mut not_scanned_group_list = Vec::<usize>::new();
678        let mut last_group_number = 0;
679
680        loop {
681            let lr_group_number = if last_group_number == 0 {
682                let mut first_group = LRGroup::new(0);
683
684                let source_first_rule = self.rule_map.get(" source").unwrap();
685
686                for pattern in source_first_rule.or_patterns.iter() {
687                    let mut item = LRItem::new(" source".to_string());
688                    item.pattern.extend(pattern.clone());
689                    item.first_set.insert(BNFSymbol::EOF);
690                    first_group.default_item_list.push(item.clone());
691                    first_group.item_list.push(item);
692                }
693
694                self.add_items(&mut first_group);
695
696                lr_group_map.insert(0, first_group);
697                last_group_number += 1;
698                0
699            } else {
700                match not_scanned_group_list.pop() {
701                    Some(number) => number,
702                    _ => break,
703                }
704            };
705
706            let lr_group = lr_group_map.get(&lr_group_number).unwrap();
707
708            let mut next_group_map = HashMap::<BNFSymbol, Vec<&LRItem>>::new();
709            for item in lr_group.item_list.iter() {
710                match item.get_next_symbol() {
711                    Some(symbol) => {
712                        let list = next_group_map.entry(symbol).or_insert_with(|| Vec::new());
713                        list.push(item);
714                    }
715                    _ => continue,
716                }
717            }
718
719            let mut next_group_number_map = HashMap::<BNFSymbol, usize>::new();
720            let mut add_group_map = HashMap::<usize, LRGroup>::new();
721
722            for entry in next_group_map.iter() {
723                let symbol = entry.0;
724                let items = entry.1;
725
726                let mut next_group_items = Vec::<LRItem>::new();
727                for item in items.iter() {
728                    next_group_items.push(item.create_next().unwrap());
729                }
730
731                let mut group_number = Option::<usize>::None;
732                for entry in lr_group_map.iter() {
733                    let number = entry.0;
734                    let lr_group = entry.1;
735
736                    let mut is_all_matched = true;
737
738                    if next_group_items.len() == lr_group.default_item_list.len() {
739                        for i in 0..next_group_items.len() {
740                            let item = &next_group_items[i];
741                            let mut found = false;
742
743                            for j in 0..lr_group.default_item_list.len() {
744                                let group_item = &lr_group.default_item_list[j];
745
746                                if item.root_name == group_item.root_name
747                                    && item.current_position == group_item.current_position
748                                    && item.pattern == group_item.pattern
749                                    && item.first_set == group_item.first_set
750                                {
751                                    found = true;
752                                    break;
753                                }
754                            }
755
756                            if !found {
757                                is_all_matched = false;
758                                break;
759                            }
760                        }
761                    } else {
762                        is_all_matched = false;
763                    }
764
765                    if is_all_matched {
766                        group_number = Some(*number);
767                    }
768                }
769
770                match group_number {
771                    Some(number) => {
772                        next_group_number_map.insert(symbol.clone(), number);
773                    }
774                    _ => {
775                        let mut new_group = LRGroup::new(last_group_number);
776                        new_group.default_item_list = next_group_items.clone();
777                        new_group.item_list = next_group_items;
778
779                        self.add_items(&mut new_group);
780
781                        add_group_map.insert(last_group_number, new_group);
782                        not_scanned_group_list.push(last_group_number);
783
784                        next_group_number_map.insert(symbol.clone(), last_group_number);
785
786                        last_group_number += 1;
787                    }
788                }
789            }
790
791            let lr_group = lr_group_map.get_mut(&lr_group_number).unwrap();
792            lr_group.next_group_number_map = next_group_number_map;
793
794            lr_group_map.extend(add_group_map);
795        }
796
797        let mut table = Vec::<Vec<Option<Operation>>>::new();
798        for group_number in 0..lr_group_map.len() {
799            let group = lr_group_map.get(&group_number).unwrap();
800            let mut operation_map = HashMap::<BNFSymbol, Operation>::new();
801
802            for entry in group.next_group_number_map.iter() {
803                let symbol = entry.0;
804                let next_group_number = *entry.1;
805
806                match symbol {
807                    BNFSymbol::NonTerminalSymbolName(_) => {
808                        self.insert_opreration(
809                            &mut operation_map,
810                            symbol,
811                            Operation::GoTo(next_group_number),
812                        )?;
813                    }
814                    BNFSymbol::TerminalSymbolString(_) => {
815                        self.insert_opreration(
816                            &mut operation_map,
817                            symbol,
818                            Operation::Shift(next_group_number),
819                        )?;
820                    }
821                    BNFSymbol::TerminalSymbolRegex(_) => {
822                        self.insert_opreration(
823                            &mut operation_map,
824                            symbol,
825                            Operation::Shift(next_group_number),
826                        )?;
827                    }
828                    BNFSymbol::TerminalSymbolFunction(_) => {
829                        self.insert_opreration(
830                            &mut operation_map,
831                            symbol,
832                            Operation::Shift(next_group_number),
833                        )?;
834                    }
835                    _ => {
836                        return Err(format!("Unexpected symbol. {:?}", symbol));
837                    }
838                }
839            }
840
841            for item in group.item_list.iter() {
842                if item.is_last_position() {
843                    if item.root_name == " source" {
844                        self.insert_opreration(
845                            &mut operation_map,
846                            &BNFSymbol::EOF,
847                            Operation::Accept,
848                        )?;
849                    } else {
850                        let pattern_id = self.get_pattern_id(item)?;
851                        for symbol in item.first_set.iter() {
852                            self.insert_opreration(
853                                &mut operation_map,
854                                symbol,
855                                Operation::Reduce(pattern_id),
856                            )?;
857                        }
858                    }
859                }
860            }
861
862            let mut operations = Vec::<Option<Operation>>::new();
863            for _ in 0..self.symbol_id_map.len() {
864                operations.push(None);
865            }
866
867            for entry in operation_map.iter() {
868                let symbol_id = self.get_symbol_id(entry.0)?;
869                let operation = entry.1;
870                operations[symbol_id] = Some(operation.clone());
871            }
872
873            table.push(operations);
874        }
875
876        if !generate_code {
877            return Ok(String::new());
878        }
879
880        let mut right_side_counts = Vec::<usize>::new();
881        let mut left_side_symbol_ids = Vec::<usize>::new();
882
883        for rule in self.single_pattern_rules.iter() {
884            right_side_counts.push(rule.pattern.len());
885            let symbol_id = self.get_symbol_id(&BNFSymbol::NonTerminalSymbolName(
886                rule.root_symbol_name.clone(),
887            ))?;
888            left_side_symbol_ids.push(symbol_id);
889        }
890
891        let mut symbol_is_terminal = Vec::<bool>::new();
892        for _ in 0..self.symbol_id_map.len() {
893            symbol_is_terminal.push(false);
894        }
895        for symbol in self.symbol_id_map.keys() {
896            let symbol_id = self.symbol_id_map[symbol];
897            symbol_is_terminal[symbol_id] = symbol.is_terminal_symbol();
898        }
899
900        let mut code = "".to_string();
901        code += "
902        use bnf_rules::bnf_rules_parser::lexer::{*};
903        use bnf_rules::bnf_rules_parser::parser::{*};
904        use bnf_rules::bnf_rules_parser::parser::ASTNode::{NonTerminal, Terminal};
905        ";
906
907        code += "pub fn parse_source(source: &str) -> Result<ASTNode, ParseError> {";
908
909        let mut array_str = String::new();
910        for rule_root_name in self.single_pattern_rules.iter() {
911            array_str += format!("\"{}\", ", &rule_root_name.root_symbol_name).as_str();
912        }
913        code += format!("static RULE_PATTERN_NAME: &[&str] = &[{}];", array_str).as_str();
914
915        let mut group_array_str = String::new();
916        for group in table.iter() {
917            let mut array_str = String::new();
918
919            for operation in group.iter() {
920                match operation {
921                    Some(operation) => {
922                        let tuple = operation.to_tuple();
923                        array_str += format!("({}, {}), ", tuple.0, tuple.1).as_str();
924                    }
925                    _ => array_str += format!("(0, 0), ").as_str(),
926                }
927            }
928
929            group_array_str += format!("&[{}], ", array_str).as_str();
930        }
931        code += format!(
932            "static LR_TABLE: &[&[(usize, usize)]] = &[{}];",
933            group_array_str
934        )
935        .as_str();
936
937        let mut rule_array_str = String::new();
938        for pattern in self.single_pattern_rules.iter() {
939            let root_symbol_id = self.symbol_id_map
940                [&BNFSymbol::NonTerminalSymbolName(pattern.root_symbol_name.clone())];
941
942            let mut array_str = String::new();
943            for symbol in pattern.pattern.iter() {
944                array_str += format!("{}, ", self.symbol_id_map[symbol]).as_str();
945            }
946
947            rule_array_str += format!("({}, &[{}]), ", root_symbol_id, array_str).as_str();
948        }
949
950        code += format!(
951            "static BNF_RULES: &[(u32, &[u32])] = &[{}];",
952            rule_array_str
953        )
954        .as_str();
955
956        code += "let terminal_symbols = vec![";
957        for entry in self.symbol_id_map.iter() {
958            let symbol = entry.0;
959            let symbol_id = entry.1;
960
961            match symbol {
962                BNFSymbol::TerminalSymbolString(string) => {
963                    code += format!(
964                        "TerminalSymbol::new_from_string(\"{}\", {}),",
965                        string, symbol_id
966                    )
967                    .as_str();
968                }
969                BNFSymbol::TerminalSymbolRegex(regex) => {
970                    code += format!(
971                        "TerminalSymbol::new_from_regex(r\"{}\", {}),",
972                        regex, symbol_id
973                    )
974                    .as_str();
975                }
976                BNFSymbol::TerminalSymbolFunction(fn_string) => {
977                    code += format!(
978                        "TerminalSymbol::new_from_tokenizer_fn({}, {}),",
979                        fn_string, symbol_id
980                    )
981                    .as_str();
982                }
983                _ => {}
984            }
985        }
986        code += "];";
987        code += "let lexer = Lexer::new(terminal_symbols);";
988
989        code += "let tokens = lexer.scan(source);";
990        code += "return __parse(tokens, RULE_PATTERN_NAME, LR_TABLE, BNF_RULES);";
991        code += "}";
992
993        return Ok(code);
994    }
995
996    fn insert_opreration(
997        &self,
998        operation_map: &mut HashMap<BNFSymbol, Operation>,
999        symbol: &BNFSymbol,
1000        operation: Operation,
1001    ) -> Result<(), String> {
1002        if operation_map.contains_key(symbol) {
1003            let mut message = format!(
1004                "{:?} {:?} conflict!",
1005                operation_map.get(symbol).unwrap(),
1006                operation
1007            );
1008
1009            match symbol {
1010                BNFSymbol::NonTerminalSymbolName(symbol_name) => {
1011                    if !symbol_name.is_empty() {
1012                        message += format!(" | Symbol : {} ::=", symbol_name).as_str();
1013
1014                        let rule = self.rule_map.get(symbol_name).unwrap();
1015                        for pattern in rule.or_patterns.iter() {
1016                            for symbol in pattern.iter() {
1017                                message += format!(" {} ", symbol.get_symbol_name()).as_str();
1018                            }
1019                            message += "|";
1020                        }
1021                    }
1022                }
1023                _ => {
1024                    message += format!(" | Symbol : '{}'", symbol.get_symbol_name()).as_str();
1025                }
1026            }
1027
1028            return Err(message);
1029        }
1030        operation_map.insert(symbol.clone(), operation);
1031        return Ok(());
1032    }
1033
1034    fn add_items(&self, lr_group: &mut LRGroup) {
1035        loop {
1036            let mut is_all_scanned = true;
1037            let mut add_item_list = Vec::<LRItem>::new();
1038
1039            for i in 0..lr_group.item_list.len() {
1040                let item = &mut lr_group.item_list[i];
1041
1042                if item.is_scanned || item.is_last_position() {
1043                    continue;
1044                }
1045                is_all_scanned = false;
1046                item.is_scanned = true;
1047
1048                let next_symbol_name = match item.get_next_nonterminal_symbol_name() {
1049                    Some(name) => name,
1050                    _ => continue,
1051                };
1052
1053                let item = &lr_group.item_list[i];
1054
1055                let latter_pattern = item.get_latter_pattern();
1056
1057                let first_set = self.get_first_set(&latter_pattern, &item.first_set);
1058
1059                let rule = self.rule_map.get(&next_symbol_name).unwrap();
1060
1061                for pattern in rule.or_patterns.iter() {
1062                    let mut found = false;
1063                    for item in lr_group.item_list.iter_mut() {
1064                        if item.root_name == next_symbol_name
1065                            && &item.pattern == pattern
1066                            && item.current_position == 0
1067                        {
1068                            found = true;
1069                            item.first_set.extend(first_set.clone());
1070                            break;
1071                        }
1072                    }
1073
1074                    if !found {
1075                        let mut item = LRItem::new(next_symbol_name.clone());
1076
1077                        for symbol in pattern.iter() {
1078                            match symbol {
1079                                BNFSymbol::Null => {}
1080                                BNFSymbol::EOF => {}
1081                                _ => {
1082                                    item.pattern.push(symbol.clone());
1083                                }
1084                            }
1085                        }
1086                        item.first_set = first_set.clone();
1087                        add_item_list.push(item);
1088                    }
1089                }
1090            }
1091
1092            lr_group.item_list.extend(add_item_list);
1093
1094            if is_all_scanned {
1095                break;
1096            }
1097        }
1098    }
1099
1100    fn get_pattern_id(&self, item: &LRItem) -> Result<usize, String> {
1101        for i in 0..self.single_pattern_rules.len() {
1102            let pattern = &self.single_pattern_rules[i];
1103            if pattern.root_symbol_name == item.root_name && pattern.pattern == item.pattern {
1104                return Ok(i);
1105            }
1106        }
1107        return Err(format!("Internal error. Not found pattern. {:?}", item));
1108    }
1109
1110    fn get_symbol_id(&self, symbol: &BNFSymbol) -> Result<usize, String> {
1111        return match self.symbol_id_map.get(symbol) {
1112            Some(id) => Ok(*id),
1113            _ => Err(format!(
1114                "Internal error. Symbol's id is not found. {:?}",
1115                symbol
1116            )),
1117        };
1118    }
1119
1120    fn get_first_set(
1121        &self,
1122        symbol_list: &Vec<BNFSymbol>,
1123        item_first_set: &HashSet<BNFSymbol>,
1124    ) -> HashSet<BNFSymbol> {
1125        let mut first_set = HashSet::<BNFSymbol>::new();
1126
1127        let mut is_nullable = true;
1128        for symbol in symbol_list.iter() {
1129            match symbol {
1130                BNFSymbol::NonTerminalSymbolName(name) => {
1131                    let rule = self.rule_map.get(name).unwrap();
1132                    first_set.extend(rule.first_set.clone());
1133                    if !rule.is_nullable {
1134                        is_nullable = false;
1135                        break;
1136                    }
1137                }
1138                BNFSymbol::Null => {}
1139                _ => {
1140                    first_set.insert(symbol.clone());
1141                    is_nullable = false;
1142                    break;
1143                }
1144            }
1145        }
1146
1147        if is_nullable {
1148            first_set.extend(item_first_set.clone());
1149        }
1150
1151        return first_set;
1152    }
1153}
1154
1155#[derive(Debug)]
1156pub struct LRGroup {
1157    pub group_number: usize,
1158    pub default_item_list: Vec<LRItem>,
1159    pub item_list: Vec<LRItem>,
1160    pub next_group_number_map: HashMap<BNFSymbol, usize>,
1161}
1162
1163impl LRGroup {
1164    pub fn new(group_number: usize) -> Self {
1165        return Self {
1166            group_number,
1167            default_item_list: Vec::new(),
1168            item_list: Vec::new(),
1169            next_group_number_map: HashMap::new(),
1170        };
1171    }
1172}
1173
1174#[derive(Debug, Clone, Eq, PartialEq)]
1175pub struct LRItem {
1176    pub root_name: String,
1177    pub pattern: Vec<BNFSymbol>,
1178    pub first_set: HashSet<BNFSymbol>,
1179    pub current_position: usize,
1180    pub is_scanned: bool,
1181}
1182
1183impl LRItem {
1184    pub fn new(root_name: String) -> Self {
1185        return Self {
1186            root_name,
1187            pattern: Vec::new(),
1188            first_set: HashSet::new(),
1189            current_position: 0,
1190            is_scanned: false,
1191        };
1192    }
1193
1194    pub fn is_last_position(&self) -> bool {
1195        return self.pattern.len() == self.current_position;
1196    }
1197
1198    pub fn get_next_nonterminal_symbol_name(&self) -> Option<String> {
1199        return match self.pattern.get(self.current_position) {
1200            Some(symbol) => match symbol {
1201                BNFSymbol::NonTerminalSymbolName(name) => Some(name.clone()),
1202                _ => None,
1203            },
1204            _ => None,
1205        };
1206    }
1207
1208    pub fn get_next_symbol(&self) -> Option<BNFSymbol> {
1209        return self.pattern.get(self.current_position).cloned();
1210    }
1211
1212    pub fn get_latter_pattern(&self) -> Vec<BNFSymbol> {
1213        let mut latter_pattern = Vec::<BNFSymbol>::new();
1214        if self.current_position + 1 >= self.pattern.len() {
1215            return latter_pattern;
1216        }
1217
1218        for i in (self.current_position + 1)..self.pattern.len() {
1219            let symbol = &self.pattern[i];
1220            latter_pattern.push(symbol.clone());
1221        }
1222
1223        return latter_pattern;
1224    }
1225
1226    pub fn create_next(&self) -> Option<Self> {
1227        if self.is_last_position() {
1228            return None;
1229        }
1230
1231        let mut cloned = self.clone();
1232        cloned.current_position += 1;
1233        cloned.is_scanned = false;
1234
1235        return Some(cloned);
1236    }
1237}
1238
1239#[derive(Debug, Clone, Eq, PartialEq)]
1240pub struct SinglePatternRule {
1241    pub root_symbol_name: String,
1242    pub pattern: Vec<BNFSymbol>,
1243}
1244
1245impl SinglePatternRule {
1246    pub fn new(root_symbol_name: String, pattern: Vec<BNFSymbol>) -> Self {
1247        let mut new_pattern = Vec::<BNFSymbol>::new();
1248        for symbol in pattern.iter() {
1249            match symbol {
1250                BNFSymbol::Null => {}
1251                _ => {
1252                    new_pattern.push(symbol.clone());
1253                }
1254            }
1255        }
1256
1257        return Self {
1258            root_symbol_name,
1259            pattern: new_pattern,
1260        };
1261    }
1262}
1263
1264#[derive(Debug, Clone)]
1265pub enum Operation {
1266    Shift(usize),
1267    Reduce(usize),
1268    GoTo(usize),
1269    Accept,
1270}
1271
1272pub const OPERATION_NONE: usize = 0;
1273pub const OPERATION_SHIFT: usize = 1;
1274pub const OPERATION_REDUCE: usize = 2;
1275pub const OPERATION_GOTO: usize = 3;
1276pub const OPERATION_ACCEPT: usize = 4;
1277
1278impl Operation {
1279    pub fn to_tuple(&self) -> (usize, usize) {
1280        return match self {
1281            Operation::Shift(i) => (OPERATION_SHIFT, *i),
1282            Operation::Reduce(i) => (OPERATION_REDUCE, *i),
1283            Operation::GoTo(i) => (OPERATION_GOTO, *i),
1284            Operation::Accept => (OPERATION_ACCEPT, 0),
1285        };
1286    }
1287}