compiler_course_helper/grammar/
pretty_print.rs

1use std::collections::HashSet;
2
3use super::{
4    lr_fsm::{DotProduction, LRItem, LRParsingTable, LRParsingTableAction, LRFSM},
5    Grammar, EPSILON,
6};
7use crowbook_text_processing::escape;
8use serde::Serialize;
9
10fn production_right_to_latex<'a>(
11    production: impl Iterator<Item = &'a str>,
12    terminal_set: &HashSet<&str>,
13) -> String {
14    production
15        .map(|s| {
16            if terminal_set.contains(s) {
17                format!("\\text{{{}}}", escape::tex(s))
18            } else {
19                escape::tex(s).to_string()
20            }
21        })
22        .collect::<Vec<_>>()
23        .join(" \\  ")
24        .replace(super::EPSILON, "\\epsilon")
25}
26
27#[derive(Debug, Clone, Serialize)]
28pub struct ProductionOutput<'a> {
29    pub left: &'a str,
30    pub rights: Vec<Vec<&'a str>>,
31}
32
33impl ProductionOutput<'_> {
34    pub fn to_plaintext(&self, left_width: usize, multiline: bool) -> String {
35        self.rights
36            .iter()
37            .map(|right| right.join(" "))
38            .enumerate()
39            .map(|(i, right)| {
40                if i == 0 {
41                    format!("{:>width$} -> {}", self.left, right, width = left_width)
42                } else {
43                    if multiline {
44                        format!("{:>width$}  | {}", "", right, width = left_width)
45                    } else {
46                        format!(" | {}", right)
47                    }
48                }
49            })
50            .collect::<Vec<_>>()
51            .join(if multiline { "\n" } else { "" })
52    }
53    pub fn to_latex(&self, and_sign: bool, terminal_set: &HashSet<&str>) -> String {
54        if self.rights.len() == 0 {
55            return String::new();
56        }
57
58        let right = self
59            .rights
60            .iter()
61            .map(|right| production_right_to_latex(right.iter().cloned(), terminal_set))
62            .collect::<Vec<_>>()
63            .join(" \\mid ");
64
65        if and_sign {
66            format!("{} & \\rightarrow & {}", escape::tex(self.left), right)
67        } else {
68            format!("{} \\rightarrow {}", escape::tex(self.left), right)
69        }
70    }
71}
72
73#[derive(Serialize)]
74pub struct ProductionOutputVec<'a> {
75    productions: Vec<ProductionOutput<'a>>,
76    terminal_set: HashSet<&'a str>,
77}
78
79impl ProductionOutputVec<'_> {
80    pub fn to_plaintext(&self) -> String {
81        let left_max_len = self.productions.iter().map(|p| p.left.len()).max().unwrap();
82        self.productions
83            .iter()
84            .map(|s| s.to_plaintext(left_max_len, true))
85            .collect::<Vec<String>>()
86            .join("\n")
87    }
88
89    pub fn to_latex(&self) -> String {
90        std::iter::once("\\[\\begin{array}{cll}".to_string())
91            .chain(
92                self.productions
93                    .iter()
94                    .map(|s| s.to_latex(true, &self.terminal_set)),
95            )
96            .chain(std::iter::once("\\end{array}\\]".to_string()))
97            .collect::<Vec<String>>()
98            .join("\\\\\n")
99    }
100}
101
102impl Grammar {
103    pub fn to_production_output_vec(&self) -> ProductionOutputVec {
104        let mut productions = Vec::new();
105        for symbol in self.symbols.iter().skip(1) {
106            // skip(1): skip epsilon
107            if let Some(non_terminal) = symbol.non_terminal() {
108                let mut rights = Vec::new();
109                for production in &non_terminal.productions {
110                    rights.push(self.production_to_vec_str(&production));
111                }
112                productions.push(ProductionOutput {
113                    left: non_terminal.name.as_str(),
114                    rights,
115                });
116            }
117        }
118        ProductionOutputVec {
119            productions,
120            terminal_set: self.terminal_iter().map(|s| s.as_str()).collect(),
121        }
122    }
123}
124
125#[derive(Serialize)]
126struct NonTerminalOutput<'a> {
127    name: &'a str,
128    nullable: bool,
129    first: Vec<&'a str>,
130    follow: Vec<&'a str>,
131}
132
133impl NonTerminalOutput<'_> {
134    fn to_plaintext(&self) -> String {
135        format!(
136            "{} | {} | {} | {}",
137            self.name,
138            self.nullable,
139            self.first.join(", "),
140            self.follow.join(", ")
141        )
142    }
143    fn to_latex(&self) -> String {
144        fn f(a: &Vec<&str>) -> String {
145            a.iter()
146                .map(|s| escape::tex(*s))
147                .collect::<Vec<_>>()
148                .join(r"\ ")
149                .replace(EPSILON, r"$\epsilon$")
150        }
151
152        format!(
153            "{} & {} & {} & {}",
154            escape::tex(self.name),
155            self.nullable,
156            f(&self.first),
157            f(&self.follow)
158        )
159    }
160}
161
162#[derive(Serialize)]
163pub struct NonTerminalOutputVec<'a> {
164    non_terminals: Vec<NonTerminalOutput<'a>>,
165    terminal_set: HashSet<&'a str>,
166}
167
168impl NonTerminalOutputVec<'_> {
169    pub fn to_plaintext(&self) -> String {
170        self.non_terminals
171            .iter()
172            .map(|s| s.to_plaintext())
173            .collect::<Vec<String>>()
174            .join("\n")
175    }
176    pub fn to_latex(&self) -> String {
177        let content = self
178            .non_terminals
179            .iter()
180            .map(|e| e.to_latex())
181            .collect::<Vec<_>>()
182            .join("\\\\\n ");
183
184        "\\begin{tabular}{c|c|c|c}\n".to_string()
185            + "Symbol & Nullable & First & Follow\\\\\\hline\n"
186            + &content
187            + "\\\\\n\\end{tabular}"
188    }
189}
190
191impl Grammar {
192    pub fn to_non_terminal_output_vec(&mut self) -> NonTerminalOutputVec {
193        if !self.is_nullable_first_follow_valid() {
194            self.calculate_nullable_first_follow();
195        }
196
197        let mut data = Vec::new();
198        for symbol in self.symbols.iter().skip(1) {
199            // skip(1): skip epsilon
200            if let Some(non_terminal) = symbol.non_terminal() {
201                let mut t = NonTerminalOutput {
202                    name: non_terminal.name.as_str(),
203                    nullable: non_terminal.nullable,
204                    first: non_terminal
205                        .first
206                        .iter()
207                        .map(|idx| self.get_symbol_name(*idx))
208                        .collect(),
209                    follow: non_terminal
210                        .follow
211                        .iter()
212                        .map(|idx| self.get_symbol_name(*idx))
213                        .collect(),
214                };
215                t.first.sort();
216                t.follow.sort();
217
218                if non_terminal.nullable {
219                    t.first.push(EPSILON);
220                }
221                data.push(t);
222            }
223        }
224        NonTerminalOutputVec {
225            non_terminals: data,
226            terminal_set: self.terminal_iter().map(|s| s.as_str()).collect(),
227        }
228    }
229}
230
231impl DotProduction {
232    pub fn to_plaintext(&self) -> String {
233        let mut output = String::new();
234        output.push_str(&self.left);
235        output.push_str(" -> ");
236        for (i, s) in self.production.iter().enumerate() {
237            if i != 0 {
238                output.push_str(" ");
239            }
240
241            if i == self.position {
242                output.push_str(".");
243            }
244            output.push_str(s);
245        }
246        if self.position == self.production.len() {
247            output.push_str(".");
248        }
249        if let Some(lookahead) = &self.lookahead {
250            output.push_str(", ");
251            output.push_str(&lookahead.join("/"));
252        }
253
254        output
255    }
256    pub fn to_latex(&self, terminal_set: &HashSet<&str>) -> String {
257        let right = self
258            .production
259            .iter()
260            .map(|s| s.as_str())
261            .take(self.position)
262            .chain(std::iter::once("."))
263            .chain(
264                self.production
265                    .iter()
266                    .map(|s| s.as_str())
267                    .skip(self.position),
268            );
269        let right = production_right_to_latex(right, terminal_set);
270
271        if let Some(lookahead) = &self.lookahead {
272            let lookahead = lookahead
273                .iter()
274                .map(|s| escape::tex(s))
275                .collect::<Vec<_>>()
276                .join(" ");
277            format!("${} \\rightarrow {}$, {}", self.left, right, lookahead)
278        } else {
279            format!("${} \\rightarrow {}$", self.left, right)
280        }
281    }
282}
283
284impl LRItem {
285    pub fn to_plaintext(&self, is_end: bool) -> String {
286        let kernel = self
287            .kernel
288            .iter()
289            .map(|c| c.to_plaintext())
290            .collect::<Vec<_>>()
291            .join("\n");
292
293        let extend = if self.extend.len() > 0 {
294            format!(
295                "\n---\n{}",
296                self.extend
297                    .iter()
298                    .map(|c| c.to_plaintext())
299                    .collect::<Vec<_>>()
300                    .join("\n")
301            )
302        } else {
303            String::new()
304        };
305
306        let edges = if self.edges.len() > 0 || is_end {
307            format!(
308                "\n===\n{}",
309                self.edges
310                    .iter()
311                    .map(|(k, v)| format!("- {} -> {}", k, v))
312                    .chain(std::iter::once("- $ -> accept".to_string()))
313                    .collect::<Vec<_>>()
314                    .join("\n")
315            )
316        } else {
317            String::new()
318        };
319
320        format!("{}{}{}", kernel, extend, edges)
321    }
322
323    pub fn node_to_latex(&self, id: usize, terminal_set: &HashSet<&str>) -> String {
324        let content = self
325            .kernel
326            .iter()
327            .chain(self.extend.iter())
328            .map(|e| e.to_latex(terminal_set))
329            .collect::<Vec<_>>()
330            .join(" \\\\ \n");
331        format!(
332            "\\node [block] (I_{}){}\n{{\n$I_{}$\\\\\n{}\n}};",
333            id,
334            if id > 0 {
335                if id % 2 == 0 {
336                    format!(" [below of = I_{}] ", id - 2)
337                } else {
338                    format!(" [right of = I_{}] ", id - 1)
339                }
340            } else {
341                String::new()
342            },
343            id,
344            content
345        )
346    }
347
348    pub fn edge_to_latex(&self, id: usize) -> String {
349        self.edges
350            .iter()
351            .map(|(e, v)| {
352                format!(
353                    "\\path [->] (I_{}) edge {} node [above]{{{}}} (I_{});",
354                    id,
355                    if id == *v { "[loop left]" } else { "[right]" },
356                    e,
357                    v
358                )
359            })
360            .collect::<Vec<_>>()
361            .join("\n")
362    }
363}
364
365impl LRFSM {
366    pub fn to_plaintext(&self) -> String {
367        let states = self
368            .states
369            .iter()
370            .enumerate()
371            .map(|(i, s)| format!("I{}\n{}", i, s.to_plaintext(i == self.end)))
372            .collect::<Vec<_>>()
373            .join("\n\n");
374
375        states
376    }
377
378    pub fn to_latex(&self) -> String {
379        let terminal_set: HashSet<&str> = self.terminals.iter().map(|s| s.as_str()).collect();
380        format!(
381            "\\begin{{tikzpicture}}[node distance=5cm,block/.style={{state, rectangle, text width=6em}}]\n{}\n\\node (accept) [right of = I_1] {{accept}};\n\\path [->] (I_{}) edge [right] node [above right]{{\\$}} (accept);\n\\end{{tikzpicture}}",
382            self.states
383                .iter()
384                .enumerate()
385                .map(|(i, s)| s.node_to_latex(i, &terminal_set))
386                .chain(self.states.iter().enumerate().map(|(i,s)| s.edge_to_latex(i)))
387                .collect::<Vec<_>>()
388                .join("\n"),
389                self.end
390        )
391    }
392}
393
394impl LRParsingTableAction {
395    pub fn to_plaintext(&self) -> String {
396        match self {
397            LRParsingTableAction::Reduce(r) => {
398                format!("r({} -> {})", r.0, r.1.join(" "))
399            }
400            LRParsingTableAction::Shift(s) => {
401                format!("s{}", s)
402            }
403            LRParsingTableAction::Accept => "acc".to_string(),
404        }
405    }
406
407    pub fn to_latex(&self, terminal_set: &HashSet<&str>) -> String {
408        match self {
409            LRParsingTableAction::Reduce(r) => {
410                format!(
411                    "reduce ${} \\rightarrow {}$",
412                    escape::tex(&r.0),
413                    production_right_to_latex(r.1.iter().map(|s| s.as_str()), terminal_set)
414                )
415            }
416            LRParsingTableAction::Shift(s) => {
417                format!("shift {}", s)
418            }
419            LRParsingTableAction::Accept => "accept".to_string(),
420        }
421    }
422}
423
424impl LRParsingTable {
425    pub fn to_plaintext(&self) -> String {
426        let mut output: Vec<Vec<String>> = Vec::new();
427
428        output.push(vec![String::new()]);
429        for s in self.terminals.iter().chain(self.non_terminals.iter()) {
430            output[0].push(s.clone());
431        }
432
433        for (r1, r2) in self.action.iter().zip(self.goto.iter()) {
434            let i = output.len() - 1;
435            let row: Vec<String> = std::iter::once(i.to_string())
436                .chain(r1.iter().map(|actions| {
437                    actions
438                        .iter()
439                        .map(|action| action.to_plaintext())
440                        .collect::<Vec<_>>()
441                        .join("; ")
442                }))
443                .chain(r2.iter().map(|goto| {
444                    if let Some(goto) = goto {
445                        goto.to_string()
446                    } else {
447                        String::new()
448                    }
449                }))
450                .collect::<Vec<_>>();
451            output.push(row);
452        }
453
454        let width: Vec<usize> = (0..output[0].len())
455            .map(|j| output.iter().map(|row| row[j].len()).max().unwrap())
456            .collect();
457
458        output
459            .iter()
460            .map(|line| {
461                line.iter()
462                    .enumerate()
463                    .map(|(i, s)| format!("{:>width$}", s, width = width[i]))
464                    .collect::<Vec<_>>()
465                    .join(" | ")
466            })
467            .collect::<Vec<_>>()
468            .join("\n")
469    }
470
471    pub fn to_latex(&self) -> String {
472        let header: String = format!(
473            "\\begin{{tabular}}{{c{}}}\n & \\multicolumn{{{}}}{{c}}{{action}} & \\multicolumn{{{}}}{{|c}}{{goto}}\\\\",
474            "|l".repeat(self.terminals.len() + self.non_terminals.len()),
475            self.terminals.len(),
476            self.non_terminals.len(),
477        );
478
479        let mut content: Vec<Vec<String>> = Vec::new();
480
481        let mut first_row: Vec<String> = vec![String::new()];
482        for s in self.terminals.iter().chain(self.non_terminals.iter()) {
483            first_row.push(escape::tex(s).to_string());
484        }
485        let first_row = first_row.join(" & ");
486
487        let terminal_set: HashSet<&str> = self.terminals.iter().map(|s| s.as_str()).collect();
488
489        for (r1, r2) in self.action.iter().zip(self.goto.iter()) {
490            let i = content.len();
491            let row: Vec<String> = std::iter::once(i.to_string())
492                .chain(r1.iter().map(|actions| {
493                    let r = actions
494                        .iter()
495                        .map(|action| action.to_latex(&terminal_set))
496                        .collect::<Vec<_>>()
497                        .join("; ");
498                    if actions.len() > 1 {
499                        format!("{{\\color{{red}}{}}}", r)
500                    } else {
501                        r
502                    }
503                }))
504                .chain(r2.iter().map(|goto| {
505                    if let Some(goto) = goto {
506                        goto.to_string()
507                    } else {
508                        String::new()
509                    }
510                }))
511                .collect::<Vec<_>>();
512            content.push(row);
513        }
514
515        let content = content
516            .iter()
517            .map(|row| row.join(" & "))
518            .collect::<Vec<_>>();
519        let content = content.join(" \\\\\n");
520
521        format!(
522            "{}\n{} \\\\\\hline\n{}\n\\end{{tabular}}",
523            header, first_row, content
524        )
525    }
526}