parol/generators/
parser_generator.rs

1use crate::analysis::LookaheadDFA;
2use crate::analysis::compiled_la_dfa::CompiledDFA;
3use crate::analysis::lalr1_parse_table::LR1State;
4use crate::analysis::lookahead_dfa::CompiledProductionIndex;
5use crate::config::{CommonGeneratorConfig, ParserGeneratorConfig};
6use crate::conversions::dot::render_dfa_dot_string;
7use crate::generators::{GrammarConfig, NamingHelper};
8use crate::{LRAction, LRParseTable, Pr, Symbol, Terminal};
9use anyhow::{Result, anyhow};
10use parol_runtime::lexer::{
11    BLOCK_COMMENT, EOI, FIRST_USER_TOKEN, LINE_COMMENT, NEW_LINE, WHITESPACE,
12};
13use parol_runtime::log::trace;
14use parol_runtime::{NonTerminalIndex, TerminalIndex};
15use std::collections::{BTreeMap, BTreeSet};
16
17use crate::StrVec;
18use std::fmt::Debug;
19
20#[derive(Debug, Default)]
21pub(crate) struct Dfa {
22    prod0: CompiledProductionIndex,
23    transitions: StrVec,
24    k: usize,
25    nt_index: usize,
26    nt_name: String,
27}
28
29impl Dfa {
30    fn from_la_dfa(la_dfa: &LookaheadDFA, nt_index: usize, nt_name: String) -> Self {
31        let compiled_dfa = CompiledDFA::from_lookahead_dfa(la_dfa);
32        Dfa::from_compiled_dfa(compiled_dfa, nt_index, nt_name)
33    }
34
35    pub(crate) fn from_compiled_dfa(
36        compiled_dfa: CompiledDFA,
37        nt_index: usize,
38        nt_name: String,
39    ) -> Dfa {
40        let prod0 = compiled_dfa.prod0;
41        let transitions = compiled_dfa.transitions.iter().fold(
42            StrVec::new(4).first_line_no_indent(),
43            |mut acc, t| {
44                acc.push(format!(
45                    "Trans({}, {}, {}, {}),",
46                    t.from_state, t.term, t.to_state, t.prod_num
47                ));
48                acc
49            },
50        );
51        let k = compiled_dfa.k;
52
53        Self {
54            prod0,
55            transitions,
56            k,
57            nt_index,
58            nt_name,
59        }
60    }
61}
62
63impl std::fmt::Display for Dfa {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        let Dfa {
66            prod0,
67            transitions,
68            k,
69            nt_index,
70            nt_name,
71        } = self;
72        writeln!(f, r#"/* {nt_index} - "{nt_name}" */"#)?;
73        f.write_fmt(ume::ume! {
74            LookaheadDFA {
75                prod0: #prod0,
76                transitions: &[#transitions],
77                k: #k,
78            },
79        })
80    }
81}
82
83#[derive(Debug, Default)]
84struct Dfas {
85    dfa_count: usize,
86    lookahead_dfa_s: String,
87}
88
89impl std::fmt::Display for Dfas {
90    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91        let Dfas {
92            dfa_count,
93            lookahead_dfa_s,
94        } = self;
95        f.write_fmt(ume::ume! {
96            pub const LOOKAHEAD_AUTOMATA: &[LookaheadDFA; #dfa_count] = &[
97            #lookahead_dfa_s];
98        })
99    }
100}
101
102#[derive(Debug, Default)]
103struct Production {
104    lhs: usize,
105    production: StrVec,
106    prod_num: usize,
107    prod_string: String,
108}
109
110impl Production {
111    fn from_cfg_production(
112        pr: &Pr,
113        prod_num: usize,
114        non_terminals: &[&str],
115        terminals: &[&str],
116    ) -> Self {
117        let get_non_terminal_index =
118            |nt: &str| non_terminals.iter().position(|n| *n == nt).unwrap();
119        let get_terminal_index = |tr: &str| {
120            terminals.iter().position(|t| *t == tr).unwrap() as TerminalIndex + FIRST_USER_TOKEN
121        };
122        let lhs = get_non_terminal_index(pr.get_n_str());
123        let production =
124            pr.get_r()
125                .iter()
126                .rev()
127                .fold(StrVec::new(4).first_line_no_indent(), |mut acc, s| {
128                    match s {
129                        Symbol::N(n, ..) => {
130                            acc.push(format!("ParseType::N({}),", get_non_terminal_index(n)))
131                        }
132                        Symbol::T(Terminal::Trm(t, ..)) => {
133                            acc.push(format!("ParseType::T({}),", get_terminal_index(t)))
134                        }
135                        Symbol::S(s) => acc.push(format!("ParseType::S({s}),")),
136                        Symbol::Push(s) => acc.push(format!("ParseType::Push({s}),")),
137                        Symbol::Pop => acc.push("ParseType::Pop,".to_string()),
138                        _ => panic!("Unexpected symbol type in production!"),
139                    }
140                    acc
141                });
142        let prod_string = format!("{pr}");
143        Self {
144            lhs,
145            production,
146            prod_num,
147            prod_string,
148        }
149    }
150}
151
152impl std::fmt::Display for Production {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        let Production {
155            lhs,
156            production,
157            prod_num,
158            prod_string,
159        } = self;
160        writeln!(f, "// {prod_num} - {prod_string}")?;
161        f.write_fmt(ume::ume! {
162            Production {
163                lhs: #lhs,
164                production: &[#production],
165            },
166        })?;
167        writeln!(f)
168    }
169}
170
171#[derive(Debug, Default)]
172struct LRProduction {
173    lhs: usize,
174    len: usize,
175    prod_num: usize,
176    prod_string: String,
177}
178
179impl LRProduction {
180    fn from_cfg_production(pr: &Pr, prod_num: usize, non_terminals: &[&str]) -> Self {
181        let get_non_terminal_index =
182            |nt: &str| non_terminals.iter().position(|n| *n == nt).unwrap();
183        let lhs = get_non_terminal_index(pr.get_n_str());
184        let len = pr.get_r().iter().count();
185        let prod_string = format!("{pr}");
186        Self {
187            lhs,
188            len,
189            prod_num,
190            prod_string,
191        }
192    }
193}
194
195impl std::fmt::Display for LRProduction {
196    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
197        let LRProduction {
198            lhs,
199            len,
200            prod_num,
201            prod_string,
202        } = self;
203        writeln!(f, "// {prod_num} - {prod_string}")?;
204        f.write_fmt(ume::ume! {
205            LRProduction {
206                lhs: #lhs,
207                len: #len,
208            },
209        })?;
210        writeln!(f)
211    }
212}
213
214#[derive(Debug, Default)]
215struct Productions {
216    production_count: usize,
217    productions: String,
218}
219
220impl std::fmt::Display for Productions {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        let Productions {
223            production_count,
224            productions,
225        } = self;
226        f.write_fmt(ume::ume! {
227            pub const PRODUCTIONS: &[Production; #production_count] = &[
228            #productions];
229        })
230    }
231}
232
233#[derive(Debug, Default)]
234struct LRProductions {
235    production_count: usize,
236    productions: String,
237}
238
239impl std::fmt::Display for LRProductions {
240    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241        let LRProductions {
242            production_count,
243            productions,
244        } = self;
245        f.write_fmt(ume::ume! {
246            pub const PRODUCTIONS: &[LRProduction; #production_count] = &[
247            #productions];
248        })
249    }
250}
251
252#[derive(Debug, Default)]
253struct ParserData<'a> {
254    start_symbol_index: usize,
255    lexer_source: &'a str,
256    non_terminals: StrVec,
257    non_terminal_count: usize,
258    dfa_source: String,
259    productions: String,
260    max_k: usize,
261    user_type_name: &'a str,
262    user_type_life_time: &'static str,
263    scanner_type_name: String,
264    scanner_module_name: String,
265    module_name: &'a str,
266    trim_parse_tree: bool,
267    disable_recovery: bool,
268}
269
270impl std::fmt::Display for ParserData<'_> {
271    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272        let ParserData {
273            start_symbol_index,
274            lexer_source,
275            non_terminals,
276            non_terminal_count,
277            dfa_source,
278            productions,
279            max_k,
280            user_type_name,
281            scanner_type_name,
282            scanner_module_name,
283            user_type_life_time,
284            module_name,
285            trim_parse_tree,
286            disable_recovery,
287        } = self;
288
289        writeln!(
290            f,
291            "
292            // ---------------------------------------------------------
293            // This file was generated by parol.
294            // It is not intended for manual editing and changes will be
295            // lost after next build.
296            // ---------------------------------------------------------
297            "
298        )?;
299
300        f.write_fmt(ume::ume! {
301            use parol_runtime::{
302                parser::{
303                    parse_tree_type::TreeConstruct, LLKParser, LookaheadDFA, ParseType, Production, Trans,
304                },
305                ParolError, ParseTree, TokenStream,
306            };
307            use scnr2::scanner;
308            use std::path::Path;
309        })?;
310
311        writeln!(f, "\n")?;
312        let auto_name = format!("{user_type_name}Auto");
313        let trait_module_name = format!("{module_name}_trait");
314        f.write_fmt(ume::ume! {
315            use crate::#module_name::#user_type_name;
316            use crate::#trait_module_name::#auto_name;
317        })?;
318        writeln!(f, "\n")?;
319
320        writeln!(f, "{lexer_source}\n")?;
321
322        f.write_fmt(ume::ume! {
323            const MAX_K: usize = #max_k;
324        })?;
325        writeln!(f, "\n\n")?;
326        f.write_fmt(ume::ume! {
327            pub const NON_TERMINALS: &[&str; #non_terminal_count] = &[#non_terminals];
328        })?;
329
330        writeln!(f, "\n\n{dfa_source}")?;
331        writeln!(f, "\n{productions}\n")?;
332
333        writeln!(f, "\n")?;
334
335        let user_actions = ume::ume!(&mut #user_type_name #user_type_life_time).to_string();
336        let lifetime_on_parse = if *user_type_life_time == "<'t>" {
337            "'t,"
338        } else {
339            ""
340        };
341        let lifetime_on_input = if *user_type_life_time == "<'t>" {
342            "'t"
343        } else {
344            ""
345        };
346        let use_scanner_type = ume::ume! {
347            use #scanner_module_name::#scanner_type_name;
348        }
349        .to_string();
350        let scanner_instance = ume::ume! {
351            let scanner = #scanner_type_name::new();
352        }
353        .to_string();
354        let auto_wrapper = format!(
355            "\n// Initialize wrapper\n{}",
356            ume::ume! {
357                let mut user_actions = #auto_name::new(user_actions);
358            }
359        );
360        let mut_ref_user_actions = ume::ume!(&mut user_actions);
361        let enable_trimming = if *trim_parse_tree {
362            "llk_parser.trim_parse_tree();\n"
363        } else {
364            ""
365        };
366        let recovery = if *disable_recovery {
367            "llk_parser.disable_recovery();\n"
368        } else {
369            ""
370        };
371        f.write_fmt(ume::ume! {
372            pub fn parse<#lifetime_on_parse T>(
373                input: &#lifetime_on_input str,
374                file_name: T,
375                user_actions: #user_actions,
376            ) -> Result<ParseTree, ParolError> where T: AsRef<Path> {
377                use parol_runtime::{
378                    parser::{parse_tree_type::SynTree, parser_types::SynTreeFlavor},
379                    syntree::Builder,
380                };
381                let mut builder = Builder::<SynTree, SynTreeFlavor>::new_with();
382                parse_into(input, &mut builder, file_name, user_actions)?;
383                Ok(builder.build()?)
384            }
385        })?;
386        f.write_fmt(ume::ume! {
387            #[allow(dead_code)]
388            pub fn parse_into<'t, T: TreeConstruct<'t>>(
389                input: &'t str,
390                tree_builder: &mut T,
391                file_name: impl AsRef<Path>,
392                user_actions: #user_actions,
393            ) -> Result<(), ParolError> where ParolError: From<T::Error> {
394                #use_scanner_type
395                let mut llk_parser = LLKParser::new(
396                    #start_symbol_index,
397                    LOOKAHEAD_AUTOMATA,
398                    PRODUCTIONS,
399                    TERMINAL_NAMES,
400                    NON_TERMINALS,
401                );
402                #enable_trimming
403                #recovery
404                #scanner_instance
405                #auto_wrapper
406
407                llk_parser.parse_into(
408                    tree_builder,
409                    TokenStream::new(
410                        input,
411                        file_name,
412                        scanner.scanner_impl.clone(),
413                        &#scanner_type_name::match_function,
414                        MAX_K,
415                    )
416                    .unwrap(),
417                    #mut_ref_user_actions
418                )
419            }
420        })
421    }
422}
423
424#[derive(Debug, Default)]
425struct LRParserData<'a> {
426    start_symbol_index: usize,
427    lexer_source: &'a str,
428    non_terminals: StrVec,
429    non_terminal_count: usize,
430    productions: String,
431    user_type_name: &'a str,
432    user_type_life_time: &'static str,
433    scanner_type_name: String,
434    scanner_module_name: String,
435    module_name: &'a str,
436    trim_parse_tree: bool,
437    parse_table_source: String,
438}
439
440impl std::fmt::Display for LRParserData<'_> {
441    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
442        let LRParserData {
443            start_symbol_index,
444            lexer_source,
445            non_terminals,
446            non_terminal_count,
447            productions,
448            user_type_name,
449            user_type_life_time,
450            scanner_type_name,
451            scanner_module_name,
452            module_name,
453            trim_parse_tree,
454            parse_table_source,
455        } = self;
456
457        writeln!(
458            f,
459            "
460            // ---------------------------------------------------------
461            // This file was generated by parol.
462            // It is not intended for manual editing and changes will be
463            // lost after next build.
464            // ---------------------------------------------------------
465            "
466        )?;
467
468        f.write_fmt(ume::ume! {
469            use parol_runtime::{
470                ParolError, ParseTree, TokenStream,
471                lr_parser::{LR1State, LRAction, LRParseTable, LRParser, LRProduction},
472                parser::parse_tree_type::TreeConstruct,
473            };
474            use scnr2::scanner;
475            use std::path::Path;
476        })?;
477
478        writeln!(f, "\n")?;
479        let auto_name = format!("{user_type_name}Auto");
480        let trait_module_name = format!("{module_name}_trait");
481        f.write_fmt(ume::ume! {
482            use crate::#module_name::#user_type_name;
483            use crate::#trait_module_name::#auto_name;
484        })?;
485        writeln!(f, "\n")?;
486
487        writeln!(f, "{lexer_source}\n")?;
488
489        writeln!(f, "\n\n")?;
490        f.write_fmt(ume::ume! {
491            pub const NON_TERMINALS: &[&str; #non_terminal_count] = &[#non_terminals];
492        })?;
493
494        writeln!(
495            f,
496            "\n\nstatic PARSE_TABLE: LRParseTable  = {parse_table_source};\n"
497        )?;
498        writeln!(f, "\n{productions}\n")?;
499
500        writeln!(f, "\n")?;
501
502        let user_actions = ume::ume!(&mut #user_type_name #user_type_life_time).to_string();
503        let lifetime_on_parse = if *user_type_life_time == "<'t>" {
504            "'t,"
505        } else {
506            ""
507        };
508        let lifetime_on_input = if *user_type_life_time == "<'t>" {
509            "'t"
510        } else {
511            ""
512        };
513        let auto_wrapper = format!(
514            "\n// Initialize wrapper\n{}",
515            ume::ume! {
516                let mut user_actions = #auto_name::new(user_actions);
517            }
518        );
519        let mut_ref_user_actions = ume::ume!(&mut user_actions);
520        let enable_trimming = if *trim_parse_tree {
521            "lr_parser.trim_parse_tree();\n"
522        } else {
523            ""
524        };
525        let use_scanner_type = ume::ume! {
526            use #scanner_module_name::#scanner_type_name;
527        }
528        .to_string();
529        let scanner_instance = ume::ume! {
530            let scanner = #scanner_type_name::new();
531        }
532        .to_string();
533
534        f.write_fmt(ume::ume! {
535            pub fn parse<#lifetime_on_parse T>(
536                input: &#lifetime_on_input str,
537                file_name: T,
538                user_actions: #user_actions,
539            ) -> Result<ParseTree, ParolError> where T: AsRef<Path> {
540                use parol_runtime::{
541                    parser::{parse_tree_type::SynTree, parser_types::SynTreeFlavor},
542                    syntree::Builder,
543                };
544                let mut builder = Builder::<SynTree, SynTreeFlavor>::new_with();
545                parse_into(input, &mut builder, file_name, user_actions)?;
546                Ok(builder.build()?)
547            }
548        })?;
549        f.write_fmt(ume::ume! {
550            #[allow(dead_code)]
551            pub fn parse_into<'t, T: TreeConstruct<'t>>(
552                input: &'t str,
553                tree_builder: &mut T,
554                file_name: impl AsRef<Path>,
555                user_actions: #user_actions,
556            ) -> Result<(), ParolError> where ParolError: From<T::Error> {
557                #use_scanner_type
558                let mut lr_parser = LRParser::new(
559                    #start_symbol_index,
560                    &PARSE_TABLE,
561                    PRODUCTIONS,
562                    TERMINAL_NAMES,
563                    NON_TERMINALS,
564                );
565                #enable_trimming
566                #auto_wrapper
567                #scanner_instance
568                lr_parser.parse_into(
569                    tree_builder,
570                    TokenStream::new(
571                        input,
572                        file_name,
573                        scanner.scanner_impl.clone(),
574                        &#scanner_type_name::match_function,
575                        1,
576                    )
577                    .unwrap(),
578                    #mut_ref_user_actions
579                )
580            }
581        })
582    }
583}
584
585// ---------------------------------------------------
586// Part of the Public API
587// *Changes will affect crate's version according to semver*
588// ---------------------------------------------------
589///
590/// Generates the parser part of the parser output file.
591///
592pub fn generate_parser_source<C: CommonGeneratorConfig + ParserGeneratorConfig>(
593    grammar_config: &GrammarConfig,
594    lexer_source: &str,
595    config: &C,
596    la_dfa: &BTreeMap<String, LookaheadDFA>,
597    ast_type_has_lifetime: bool,
598) -> Result<String> {
599    let terminals = get_terminals(grammar_config);
600    let original_non_terminals = grammar_config.cfg.get_non_terminal_set();
601    let non_terminal_count = original_non_terminals.len();
602    let width = (non_terminal_count as f32).log10() as usize + 1;
603
604    let non_terminals = original_non_terminals.iter().collect::<Vec<_>>();
605    let start_symbol_index: usize =
606        find_start_symbol_index(non_terminals.as_slice(), grammar_config)?;
607
608    let non_terminals = non_terminals
609        .iter()
610        .enumerate()
611        .fold(StrVec::new(4), |mut acc, (i, n)| {
612            acc.push(format!(r#"/* {i:width$} */ "{n}","#));
613            acc
614        });
615
616    let dfa_source = generate_dfa_source(la_dfa);
617
618    let productions = generate_productions(grammar_config, &original_non_terminals, &terminals);
619
620    let max_k = grammar_config.lookahead_size;
621
622    let user_type_life_time = if ast_type_has_lifetime { "<'t>" } else { "" };
623
624    let parser_data = ParserData {
625        start_symbol_index,
626        lexer_source,
627        non_terminals,
628        non_terminal_count,
629        dfa_source,
630        productions,
631        max_k,
632        user_type_name: config.user_type_name(),
633        user_type_life_time,
634        scanner_type_name: get_scanner_type_name(config),
635        scanner_module_name: get_scanner_module_name(config),
636        module_name: config.module_name(),
637        trim_parse_tree: config.trim_parse_tree(),
638        disable_recovery: config.recovery_disabled(),
639    };
640
641    Ok(format!("{parser_data}"))
642}
643
644fn get_terminals(grammar_config: &GrammarConfig) -> Vec<&str> {
645    grammar_config
646        .cfg
647        .get_ordered_terminals()
648        .iter()
649        .map(|(t, _, _, _)| *t)
650        .collect::<Vec<&str>>()
651}
652
653fn find_start_symbol_index(
654    non_terminals: &[&String],
655    grammar_config: &GrammarConfig,
656) -> Result<usize, anyhow::Error> {
657    non_terminals
658        .iter()
659        .position(|n| *n == grammar_config.cfg.get_start_symbol())
660        .ok_or_else(|| {
661            anyhow!(
662                "Start symbol '{}' is not part of the given grammar!",
663                grammar_config.cfg.get_start_symbol()
664            )
665        })
666}
667
668fn get_scanner_module_name<C: CommonGeneratorConfig>(config: &C) -> String {
669    let scanner_module_name = NamingHelper::to_lower_snake_case(config.user_type_name());
670    scanner_module_name + "_scanner"
671}
672
673fn get_scanner_type_name<C: CommonGeneratorConfig>(config: &C) -> String {
674    let scanner_type_name = NamingHelper::to_upper_camel_case(config.user_type_name());
675    scanner_type_name + "Scanner"
676}
677
678// ---------------------------------------------------
679// Part of the Public API
680// *Changes will affect crate's version according to semver*
681// ---------------------------------------------------
682///
683/// Generates the parser part of the parser output file in case of LALR(1) parser.
684///
685pub fn generate_lalr1_parser_source<C: CommonGeneratorConfig + ParserGeneratorConfig>(
686    grammar_config: &GrammarConfig,
687    lexer_source: &str,
688    config: &C,
689    parse_table: &LRParseTable,
690    ast_type_has_lifetime: bool,
691) -> Result<String> {
692    let terminals = get_terminals(grammar_config);
693    let original_non_terminals = grammar_config.cfg.get_non_terminal_set();
694    let non_terminal_count = original_non_terminals.len();
695    let width = (non_terminal_count as f32).log10() as usize + 1;
696
697    let non_terminals = original_non_terminals.iter().collect::<Vec<_>>();
698    let start_symbol_index: usize = find_start_symbol_index(&non_terminals, grammar_config)?;
699
700    let non_terminals_with_index_comment =
701        non_terminals
702            .iter()
703            .enumerate()
704            .fold(StrVec::new(4), |mut acc, (i, n)| {
705                acc.push(format!(r#"/* {i:width$} */ "{n}","#));
706                acc
707            });
708    let productions = generate_lr_productions(grammar_config, &original_non_terminals);
709
710    let user_type_life_time = if ast_type_has_lifetime { "<'t>" } else { "" };
711
712    let parse_table_source = generate_parse_table_source(parse_table, &terminals, &non_terminals);
713
714    let parser_data = LRParserData {
715        start_symbol_index,
716        lexer_source,
717        non_terminals: non_terminals_with_index_comment,
718        non_terminal_count,
719        productions,
720        user_type_name: config.user_type_name(),
721        user_type_life_time,
722        scanner_type_name: get_scanner_type_name(config),
723        scanner_module_name: get_scanner_module_name(config),
724        module_name: config.module_name(),
725        trim_parse_tree: config.trim_parse_tree(),
726        parse_table_source,
727    };
728
729    Ok(format!("{parser_data}"))
730}
731
732fn generate_parse_table_source(
733    parse_table: &LRParseTable,
734    terminals: &[&str],
735    non_terminals: &[&String],
736) -> String {
737    // Create a terminal resolver function
738    let tr = |ti: TerminalIndex| {
739        if ti >= FIRST_USER_TOKEN {
740            terminals[(ti - FIRST_USER_TOKEN) as usize]
741        } else {
742            match ti {
743                EOI => "<$>",
744                NEW_LINE => "<NL>",
745                WHITESPACE => "<WS>",
746                LINE_COMMENT => "<LC>",
747                BLOCK_COMMENT => "<BC>",
748                _ => unreachable!(),
749            }
750        }
751    };
752
753    // Create a non-terminal resolver function
754    let nr = |ni: usize| non_terminals[ni].as_str();
755
756    let actions = parse_table
757        .states
758        .iter()
759        .fold(BTreeSet::<LRAction>::new(), |mut acc, s| {
760            s.actions.iter().for_each(|(_, a)| {
761                acc.insert(a.clone());
762            });
763            acc
764        });
765
766    // Sorted array of actions
767    let actions_array = actions.iter().cloned().collect::<Vec<_>>();
768
769    let actions = actions_array
770        .iter()
771        .enumerate()
772        .fold(String::new(), |mut acc, (i, a)| {
773            acc.push_str(format!("/* {} */ {}, ", i, generate_source_for_action(a, nr)).as_str());
774            acc
775        });
776
777    let states = parse_table
778        .states
779        .iter()
780        .enumerate()
781        .fold(String::new(), |mut acc, (i, s)| {
782            acc.push_str(&generate_source_for_lrstate(s, i, &actions_array, &tr, &nr));
783            acc.push(',');
784            acc
785        });
786
787    format!("LRParseTable {{ actions: &[{actions}], states: &[{states}] }}",)
788}
789
790fn generate_source_for_lrstate<'a>(
791    state: &'a LR1State,
792    state_num: usize,
793    actions_array: &[LRAction],
794    tr: &impl Fn(TerminalIndex) -> &'a str,
795    nr: &impl Fn(NonTerminalIndex) -> &'a str,
796) -> String {
797    format!(
798        r#"
799        // State {}
800        LR1State {{
801            actions: {},
802            gotos: {} }}"#,
803        state_num,
804        generate_source_for_actions(state, actions_array, tr, nr),
805        generate_source_for_gotos(state, nr)
806    )
807}
808
809fn generate_source_for_actions<'a>(
810    state: &LR1State,
811    actions_array: &[LRAction],
812    tr: &impl Fn(TerminalIndex) -> &'a str,
813    nr: &impl Fn(NonTerminalIndex) -> &'a str,
814) -> String {
815    format!(
816        r#"&[{}]"#,
817        state
818            .actions
819            .iter()
820            .map(|(t, a)| {
821                format!(
822                    r#"
823        ({}, {}) /* '{}' => {} */"#,
824                    t,
825                    generate_source_for_action_ref(a, actions_array),
826                    tr(*t),
827                    generate_action_comment(a, nr)
828                )
829            })
830            .collect::<Vec<String>>()
831            .join(", ")
832    )
833}
834
835fn generate_source_for_gotos<'a>(
836    state: &LR1State,
837    nr: &impl Fn(NonTerminalIndex) -> &'a str,
838) -> String {
839    if state.gotos.is_empty() {
840        return "&[]".to_string();
841    }
842    format!(
843        r#"&[{}]"#,
844        state
845            .gotos
846            .iter()
847            .map(|(n, s)| {
848                format!(
849                    r#"
850                ({}, {}) /* {} => {} */"#,
851                    n,
852                    s,
853                    nr(*n),
854                    s,
855                )
856            })
857            .collect::<Vec<String>>()
858            .join(", ")
859    )
860}
861
862fn generate_source_for_action<'a>(
863    action: &LRAction,
864    nr: impl Fn(NonTerminalIndex) -> &'a str,
865) -> String {
866    match action {
867        LRAction::Shift(s) => format!("LRAction::Shift({s})"),
868        LRAction::Reduce(n, p) => format!("LRAction::Reduce({} /* {} */, {})", n, nr(*n), p),
869        LRAction::Accept => "LRAction::Accept".to_string(),
870    }
871}
872
873fn generate_source_for_action_ref(action: &LRAction, actions_array: &[LRAction]) -> String {
874    let index = actions_array.iter().position(|a| a == action).unwrap();
875    format!("{index}")
876}
877
878fn generate_action_comment<'a>(
879    action: &LRAction,
880    nr: impl Fn(NonTerminalIndex) -> &'a str,
881) -> String {
882    match action {
883        LRAction::Shift(s) => format!("LRAction::Shift({s})"),
884        LRAction::Reduce(n, p) => format!("LRAction::Reduce({}, {})", nr(*n), p),
885        LRAction::Accept => "LRAction::Accept".to_string(),
886    }
887}
888
889fn generate_dfa_source(la_dfa: &BTreeMap<String, LookaheadDFA>) -> String {
890    let lookahead_dfa_s = la_dfa
891        .iter()
892        .enumerate()
893        .fold(StrVec::new(0), |mut acc, (i, (n, d))| {
894            trace!("{d}");
895            trace!("{}", render_dfa_dot_string(d, n));
896            let dfa = Dfa::from_la_dfa(d, i, n.clone());
897            acc.push(format!("{dfa}"));
898            acc
899        });
900    let dfa_count = la_dfa.len();
901
902    let dfas = Dfas {
903        dfa_count,
904        lookahead_dfa_s: format!("{lookahead_dfa_s}"),
905    };
906
907    format!("{dfas}")
908}
909
910fn generate_productions(
911    grammar_config: &GrammarConfig,
912    non_terminals: &BTreeSet<String>,
913    terminals: &[&str],
914) -> String {
915    let non_terminals = non_terminals
916        .iter()
917        .map(|n| n.as_str())
918        .collect::<Vec<&str>>();
919    let production_count = grammar_config.cfg.pr.len();
920    let productions =
921        grammar_config
922            .cfg
923            .pr
924            .iter()
925            .enumerate()
926            .fold(String::new(), |mut acc, (i, p)| {
927                let production = Production::from_cfg_production(p, i, &non_terminals, terminals);
928                acc.push_str(format!("{production}").as_str());
929                acc
930            });
931
932    let productions = Productions {
933        production_count,
934        productions,
935    };
936
937    format!("{productions}")
938}
939
940fn generate_lr_productions(
941    grammar_config: &GrammarConfig,
942    non_terminals: &BTreeSet<String>,
943) -> String {
944    let non_terminals = non_terminals
945        .iter()
946        .map(|n| n.as_str())
947        .collect::<Vec<&str>>();
948    let production_count = grammar_config.cfg.pr.len();
949    let productions =
950        grammar_config
951            .cfg
952            .pr
953            .iter()
954            .enumerate()
955            .fold(String::new(), |mut acc, (i, p)| {
956                let production = LRProduction::from_cfg_production(p, i, &non_terminals);
957                acc.push_str(format!("{production}").as_str());
958                acc
959            });
960
961    let productions = LRProductions {
962        production_count,
963        productions,
964    };
965
966    format!("{productions}")
967}