lwb_parser/parser/peg/
parser_sugar_ast.rs

1use crate::sources::character_class::CharacterClass;
2use derive_more::Display;
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5use thiserror::Error;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct Constructor {
9    pub documentation: Option<String>,
10    pub name: String,
11    pub expression: Expression,
12    pub annotations: Vec<Annotation>,
13    /// This is set when rules are merged with part-of. In that case,
14    /// constructors that used to link the two rules should not be put in the ast.
15    pub dont_put_in_ast: bool,
16}
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
19pub enum Expression {
20    Sort(String),
21    Literal(String),
22    Sequence(Vec<Expression>),
23    Repeat {
24        e: Box<Expression>,
25        min: u64,
26        max: Option<u64>,
27    },
28    CharacterClass(CharacterClass),
29    Choice(Vec<Expression>),
30    Delimited {
31        e: Box<Expression>,
32        delim: Box<Expression>,
33        min: u64,
34        max: Option<u64>,
35        trailing: bool,
36    },
37
38    Negative(Box<Expression>),
39    Positive(Box<Expression>),
40}
41
42#[derive(Debug, Clone, Display, Serialize, Deserialize, PartialEq, Eq)]
43pub enum Annotation {
44    #[display(fmt = "no-pretty-print")]
45    NoPrettyPrint,
46
47    /// Don't accept layout in this rule and any child rule
48    #[display(fmt = "no-layout")]
49    NoLayout,
50
51    #[display(fmt = "injection")]
52    Injection,
53
54    /// represent this constructor as a single string in the final ast
55    #[display(fmt = "single-string")]
56    SingleString,
57
58    /// this rule does not appear in the ast anywhere its used.
59    #[display(fmt = "hidden")]
60    Hidden,
61
62    /// This rule gives an error when parsed
63    #[display(fmt = "error")]
64    Error(String),
65
66    /// Says that one rule must generate its constructors as part of another rule
67    #[display(fmt = "part-of: {}", _0)]
68    PartOf(String),
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct Sort {
73    pub documentation: Option<String>,
74    pub name: String,
75    pub constructors: Vec<Constructor>,
76    pub annotations: Vec<Annotation>,
77}
78
79#[derive(Debug, Serialize, Deserialize, Clone)]
80pub struct SyntaxFileAst {
81    pub sorts: HashMap<String, Sort>,
82    pub starting_sort: String,
83    pub merges: HashMap<String, String>,
84    pub old_sort_names: Vec<String>,
85}
86
87#[derive(Error, Debug, Clone)]
88pub enum SimplifyError {
89    #[error("your `part-of` annotations form a cycle")]
90    Cycle,
91
92    #[error("You marked {0} as part of {1} but {1} contains no rule to get to {0} (a line in the form of `{0};`")]
93    NoConnection(String, String),
94
95    #[error(
96        "You marked the starting sort ({0}) as part of another sort ({1}). This is not allowed."
97    )]
98    StartingSort(String, String),
99
100    #[error("The AST was simplified twice. This is a bug.")]
101    AlreadySimplified,
102}
103
104impl SyntaxFileAst {
105    pub fn names(&self, name: &str) -> Vec<&str> {
106        let mut result = Vec::new();
107
108        for i in &self.old_sort_names {
109            let new_name = Self::get_new_name(i, &self.merges);
110            if new_name == name {
111                result.push(i.as_str());
112            }
113        }
114
115        result
116    }
117
118    /// Simplification of the ast means that all rules that are marked as `part-of` are
119    /// actually integrated with their associated sort. This is useful before code generation
120    /// since in codegen they actually are one sort.
121    pub fn simplify(mut self) -> Result<Self, SimplifyError> {
122        if !self.merges.is_empty() {
123            return Err(SimplifyError::AlreadySimplified);
124        }
125
126        let mut order = Vec::new();
127        let mut had = HashSet::new();
128        let mut todo = Vec::new();
129
130        let old_sort_names = self.sorts.keys().cloned().collect::<Vec<_>>();
131
132        // determine all relations between sorts.
133        // handle the ones that aren't part-of another sort last
134        // by pretty much topologically sorting them.
135        for (name, sort) in self.sorts {
136            if let Some(other) = sort.annotations.iter().find_map(|i| {
137                if let Annotation::PartOf(a) = i {
138                    Some(a)
139                } else {
140                    None
141                }
142            }) {
143                let other = other.clone();
144                todo.push((name, sort, other));
145            } else {
146                had.insert(name.clone());
147                order.push((name, sort, None));
148            }
149        }
150
151        // for the ones that had a part-of relation, do topological sort
152        // and add then to the ordering
153        while !todo.is_empty() {
154            let start_length = todo.len();
155            let mut new_todo = Vec::new();
156
157            for (name, sort, part_of) in todo {
158                if had.contains(&part_of) {
159                    had.insert(name.clone());
160                    order.push((name, sort, Some(part_of)));
161                } else {
162                    new_todo.push((name, sort, part_of));
163                }
164            }
165
166            if start_length == new_todo.len() {
167                return Err(SimplifyError::Cycle);
168            }
169
170            todo = new_todo;
171        }
172
173        // keep some refs to the ordered sorts
174        let sort_refs: HashMap<_, _> = order.iter().map(|(name, sort, _)| (name, sort)).collect();
175
176        // now determine if this set of relations is allowed. Are there proper paths between
177        // related sorts?
178        for (name, sort, part_of) in &order {
179            if let Some(other) = part_of {
180                if sort.name == self.starting_sort {
181                    return Err(SimplifyError::StartingSort(name.clone(), other.to_string()));
182                }
183            }
184
185            if let Some(other) = part_of.as_ref().and_then(|i| sort_refs.get(i)) {
186                let mut has_connection = false;
187                for i in &other.constructors {
188                    if i.expression == Expression::Sort(sort.name.clone()) {
189                        has_connection = true;
190                        break;
191                    }
192                }
193
194                if !has_connection {
195                    return Err(SimplifyError::NoConnection(
196                        sort.name.clone(),
197                        other.name.clone(),
198                    ));
199                }
200            }
201        }
202
203        // now generate some new sorts
204        let mut new_sorts = HashMap::new();
205        let mut merges = HashMap::<_, Vec<Sort>>::new();
206        let mut occurred_merges = HashMap::new();
207
208        // evaluate the sorts in the predetermined order (but in reverse)
209        for (name, mut sort, part_of) in order.into_iter().rev() {
210            // if another sort needs to merge into this one? Do so.
211            if let Some(others) = merges.remove(&name) {
212                for other in others {
213                    // extend the constructors
214                    sort.constructors = sort
215                        .constructors
216                        .into_iter()
217                        .map(|mut i| {
218                            i.dont_put_in_ast =
219                                i.expression == Expression::Sort(other.name.clone());
220                            i
221                        })
222                        .chain(other.constructors)
223                        .collect();
224
225                    // merge documentation nicely
226                    if let Some(ref documentation) = other.documentation {
227                        if let Some(ref mut i) = sort.documentation {
228                            *i = format!("{i}\n\n# {name}\n since {name} is a part of {}, its documentation is shown below:\n{documentation}", sort.name);
229                        } else {
230                            sort.documentation = Some(format!("(from {name}:)\n{documentation}"));
231                        }
232                    }
233
234                    occurred_merges.insert(other.name, name.clone());
235                }
236            }
237
238            // some sorts should be merged with others. Mark these in `merges`
239            if let Some(part_of) = part_of {
240                merges.entry(part_of).or_insert(vec![]).push(sort);
241            } else {
242                // when we found a sort that needs no more merging, add them to the new list
243                // of sorts
244                new_sorts.insert(name, sort);
245            }
246        }
247
248        // now some sorts got removed. Rename their references to the appropriate parent.
249        // then undo partial move (and restore self)
250        self.sorts = new_sorts
251            .into_iter()
252            .map(|(name, mut sort)| {
253                sort.constructors = sort
254                    .constructors
255                    .into_iter()
256                    .map(|mut c| {
257                        c.expression = Self::rewrite_expression(c.expression, &occurred_merges);
258                        c
259                    })
260                    .collect();
261
262                (name, sort)
263            })
264            .collect();
265
266        self.merges = occurred_merges;
267        self.old_sort_names = old_sort_names;
268
269        Ok(self)
270    }
271
272    fn get_new_name(name: &String, merges: &HashMap<String, String>) -> String {
273        if let Some(new_name) = merges.get(name) {
274            Self::get_new_name(new_name, merges)
275        } else {
276            name.clone()
277        }
278    }
279
280    fn rewrite_expression(e: Expression, merges: &HashMap<String, String>) -> Expression {
281        match e {
282            Expression::Sort(name) => Expression::Sort(Self::get_new_name(&name, merges)),
283            a @ Expression::Literal(_) => a,
284            Expression::Sequence(s) => Expression::Sequence(
285                s.into_iter()
286                    .map(|e| Self::rewrite_expression(e, merges))
287                    .collect(),
288            ),
289            Expression::Repeat { e, min, max } => Expression::Repeat {
290                e: Box::new(Self::rewrite_expression(*e, merges)),
291                min,
292                max,
293            },
294            a @ Expression::CharacterClass(_) => a,
295            Expression::Choice(s) => Expression::Choice(
296                s.into_iter()
297                    .map(|e| Self::rewrite_expression(e, merges))
298                    .collect(),
299            ),
300            Expression::Delimited {
301                e,
302                delim,
303                min,
304                max,
305                trailing,
306            } => Expression::Delimited {
307                e: Box::new(Self::rewrite_expression(*e, merges)),
308                delim,
309                min,
310                max,
311                trailing,
312            },
313            Expression::Negative(e) => {
314                Expression::Negative(Box::new(Self::rewrite_expression(*e, merges)))
315            }
316            Expression::Positive(e) => {
317                Expression::Positive(Box::new(Self::rewrite_expression(*e, merges)))
318            }
319        }
320    }
321}