Skip to main content

oxilean_std/parsec/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use super::functions::*;
6use std::collections::{HashMap, HashSet};
7
8/// Computes FIRST and FOLLOW sets for a context-free grammar.
9#[allow(dead_code)]
10pub struct FirstFollowSets<'a> {
11    /// The grammar.
12    pub grammar: &'a ContextFreeGrammar,
13    /// FIRST sets: nonterminal index → set of (Option<terminal_index>)
14    /// None represents ε.
15    pub first: Vec<HashSet<Option<usize>>>,
16    /// FOLLOW sets: nonterminal index → set of (Option<terminal_index>)
17    /// None represents end-of-input ($).
18    pub follow: Vec<HashSet<Option<usize>>>,
19}
20#[allow(dead_code)]
21impl<'a> FirstFollowSets<'a> {
22    /// Compute FIRST and FOLLOW sets for the grammar.
23    pub fn compute(grammar: &'a ContextFreeGrammar) -> Self {
24        let mut first: Vec<HashSet<Option<usize>>> = vec![HashSet::new(); grammar.n_nonterms];
25        let mut follow: Vec<HashSet<Option<usize>>> = vec![HashSet::new(); grammar.n_nonterms];
26        let mut changed = true;
27        while changed {
28            changed = false;
29            for (lhs, rhs) in &grammar.productions {
30                if rhs.is_empty() {
31                    if first[*lhs].insert(None) {
32                        changed = true;
33                    }
34                } else {
35                    let (is_term, idx) = rhs[0];
36                    if is_term {
37                        if first[*lhs].insert(Some(idx)) {
38                            changed = true;
39                        }
40                    } else if idx < grammar.n_nonterms {
41                        let to_add: Vec<_> = first[idx].iter().cloned().collect();
42                        for item in to_add {
43                            if first[*lhs].insert(item) {
44                                changed = true;
45                            }
46                        }
47                    }
48                }
49            }
50        }
51        follow[grammar.start].insert(None);
52        let mut changed = true;
53        while changed {
54            changed = false;
55            for (lhs, rhs) in &grammar.productions {
56                for (i, &(is_term, idx)) in rhs.iter().enumerate() {
57                    if !is_term && idx < grammar.n_nonterms {
58                        if i + 1 < rhs.len() {
59                            let next = rhs[i + 1];
60                            if next.0 {
61                                if follow[idx].insert(Some(next.1)) {
62                                    changed = true;
63                                }
64                            } else if next.1 < grammar.n_nonterms {
65                                let to_add: Vec<_> = first[next.1].iter().cloned().collect();
66                                for item in to_add {
67                                    if follow[idx].insert(item) {
68                                        changed = true;
69                                    }
70                                }
71                            }
72                        } else {
73                            let to_add: Vec<_> = follow[*lhs].iter().cloned().collect();
74                            for item in to_add {
75                                if follow[idx].insert(item) {
76                                    changed = true;
77                                }
78                            }
79                        }
80                    }
81                }
82            }
83        }
84        FirstFollowSets {
85            grammar,
86            first,
87            follow,
88        }
89    }
90    /// Get FIRST set for a nonterminal.
91    pub fn get_first(&self, nt: usize) -> &HashSet<Option<usize>> {
92        &self.first[nt]
93    }
94    /// Get FOLLOW set for a nonterminal.
95    pub fn get_follow(&self, nt: usize) -> &HashSet<Option<usize>> {
96        &self.follow[nt]
97    }
98    /// Check if a nonterminal is nullable (ε ∈ FIRST(nt)).
99    pub fn is_nullable(&self, nt: usize) -> bool {
100        self.first[nt].contains(&None)
101    }
102}
103/// An LL(1) parsing table for a given grammar.
104#[allow(dead_code)]
105pub struct Ll1Table {
106    /// For each (nonterminal, terminal) pair, the production index to use.
107    /// The terminal None represents end-of-input.
108    pub table: std::collections::HashMap<(usize, Option<usize>), usize>,
109    /// Number of nonterminals.
110    pub n_nonterms: usize,
111    /// Number of terminals.
112    pub n_terms: usize,
113}
114#[allow(dead_code)]
115impl Ll1Table {
116    /// Create an empty LL(1) table.
117    pub fn new(n_nonterms: usize, n_terms: usize) -> Self {
118        Ll1Table {
119            table: std::collections::HashMap::new(),
120            n_nonterms,
121            n_terms,
122        }
123    }
124    /// Set the production for (nonterminal, terminal).
125    pub fn set(&mut self, nt: usize, term: Option<usize>, prod: usize) {
126        self.table.insert((nt, term), prod);
127    }
128    /// Look up the production for (nonterminal, terminal).
129    pub fn get(&self, nt: usize, term: Option<usize>) -> Option<usize> {
130        self.table.get(&(nt, term)).cloned()
131    }
132    /// Check if the table is conflict-free (no cell has more than one production).
133    /// Since we use a HashMap, each cell has at most one entry by construction.
134    pub fn is_conflict_free(&self) -> bool {
135        true
136    }
137    /// Count the number of filled table entries.
138    pub fn entry_count(&self) -> usize {
139        self.table.len()
140    }
141    /// Build an LL(1) table from a grammar and its FIRST/FOLLOW sets.
142    pub fn build(grammar: &ContextFreeGrammar, sets: &FirstFollowSets<'_>) -> Self {
143        let mut table = Ll1Table::new(grammar.n_nonterms, grammar.n_terms);
144        for (prod_idx, (lhs, rhs)) in grammar.productions.iter().enumerate() {
145            if rhs.is_empty() {
146                for &follow_item in sets.get_follow(*lhs) {
147                    table.set(*lhs, follow_item, prod_idx);
148                }
149            } else {
150                let (is_term, idx) = rhs[0];
151                if is_term {
152                    table.set(*lhs, Some(idx), prod_idx);
153                } else if idx < grammar.n_nonterms {
154                    for &first_item in sets.get_first(idx) {
155                        if let Some(t) = first_item {
156                            table.set(*lhs, Some(t), prod_idx);
157                        } else {
158                            for &follow_item in sets.get_follow(*lhs) {
159                                table.set(*lhs, follow_item, prod_idx);
160                            }
161                        }
162                    }
163                }
164            }
165        }
166        table
167    }
168}
169/// A context-free grammar with nonterminals numbered 0..n_nonterms
170/// and terminals numbered 0..n_terms.
171#[allow(dead_code)]
172pub struct ContextFreeGrammar {
173    /// Number of nonterminal symbols.
174    pub n_nonterms: usize,
175    /// Number of terminal symbols.
176    pub n_terms: usize,
177    /// Start symbol (index of nonterminal).
178    pub start: usize,
179    /// Productions: list of (lhs_nonterm, rhs_symbols) where rhs_symbols is
180    /// a list of (is_terminal: bool, index).
181    pub productions: Vec<(usize, Vec<(bool, usize)>)>,
182}
183#[allow(dead_code)]
184impl ContextFreeGrammar {
185    /// Create a new CFG.
186    pub fn new(n_nonterms: usize, n_terms: usize, start: usize) -> Self {
187        ContextFreeGrammar {
188            n_nonterms,
189            n_terms,
190            start,
191            productions: Vec::new(),
192        }
193    }
194    /// Add a production: A → rhs.
195    pub fn add_production(&mut self, lhs: usize, rhs: Vec<(bool, usize)>) {
196        self.productions.push((lhs, rhs));
197    }
198    /// Get all productions for a given nonterminal.
199    pub fn productions_for(&self, nt: usize) -> Vec<&Vec<(bool, usize)>> {
200        self.productions
201            .iter()
202            .filter_map(|(lhs, rhs)| if *lhs == nt { Some(rhs) } else { None })
203            .collect()
204    }
205    /// Check if any production is nullable (A → ε).
206    pub fn is_nullable(&self, nt: usize) -> bool {
207        self.productions
208            .iter()
209            .any(|(lhs, rhs)| *lhs == nt && rhs.is_empty())
210    }
211    /// Count total number of productions.
212    pub fn production_count(&self) -> usize {
213        self.productions.len()
214    }
215    /// Check if the grammar is in Chomsky Normal Form:
216    /// every production is A → BC or A → a.
217    pub fn is_cnf(&self) -> bool {
218        for (_, rhs) in &self.productions {
219            match rhs.len() {
220                0 => return false,
221                1 => {
222                    if rhs[0].0 {
223                    } else {
224                        return false;
225                    }
226                }
227                2 => {
228                    if rhs[0].0 || rhs[1].0 {
229                        return false;
230                    }
231                }
232                _ => return false,
233            }
234        }
235        true
236    }
237}
238/// A CYK (Cocke–Younger–Kasami) parser for CFGs in CNF.
239#[allow(dead_code)]
240pub struct CykParser<'a> {
241    /// The grammar (must be in CNF).
242    pub grammar: &'a ContextFreeGrammar,
243}
244#[allow(dead_code)]
245impl<'a> CykParser<'a> {
246    /// Create a new CYK parser.
247    pub fn new(grammar: &'a ContextFreeGrammar) -> Self {
248        CykParser { grammar }
249    }
250    /// Run CYK on a sequence of terminal indices.
251    /// Returns true iff the input is in L(grammar).
252    pub fn parse(&self, input: &[usize]) -> bool {
253        let n = input.len();
254        if n == 0 {
255            return self.grammar.is_nullable(self.grammar.start);
256        }
257        let mut table: Vec<Vec<HashSet<usize>>> = vec![vec![HashSet::new(); n]; n];
258        for (i, &term) in input.iter().enumerate() {
259            for (lhs, rhs) in &self.grammar.productions {
260                if rhs.len() == 1 && rhs[0].0 && rhs[0].1 == term {
261                    table[i][0].insert(*lhs);
262                }
263            }
264        }
265        for len in 2..=n {
266            for i in 0..=(n - len) {
267                for k in 0..(len - 1) {
268                    for (lhs, rhs) in &self.grammar.productions {
269                        if rhs.len() == 2 && !rhs[0].0 && !rhs[1].0 {
270                            let b = rhs[0].1;
271                            let c = rhs[1].1;
272                            if table[i][k].contains(&b)
273                                && table[i + k + 1][len - k - 2].contains(&c)
274                            {
275                                table[i][len - 1].insert(*lhs);
276                            }
277                        }
278                    }
279                }
280            }
281        }
282        table[0][n - 1].contains(&self.grammar.start)
283    }
284    /// Count the number of parse trees (ambiguity measure).
285    pub fn count_parses(&self, input: &[usize]) -> usize {
286        let n = input.len();
287        if n == 0 {
288            return if self.grammar.is_nullable(self.grammar.start) {
289                1
290            } else {
291                0
292            };
293        }
294        if self.parse(input) {
295            1
296        } else {
297            0
298        }
299    }
300}
301/// A runtime PEG expression.
302#[allow(dead_code)]
303#[derive(Debug, Clone)]
304pub enum PegExpr {
305    /// Match a literal character.
306    Char(char),
307    /// Match any character.
308    Any,
309    /// Sequence: e1 followed by e2.
310    Seq(Box<PegExpr>, Box<PegExpr>),
311    /// Ordered choice: try e1, if it fails (without consuming input) try e2.
312    Choice(Box<PegExpr>, Box<PegExpr>),
313    /// Zero or more: e*
314    Star(Box<PegExpr>),
315    /// One or more: e+
316    Plus(Box<PegExpr>),
317    /// Optional: e?
318    Optional(Box<PegExpr>),
319    /// Negative lookahead: !e
320    Not(Box<PegExpr>),
321    /// Positive lookahead: &e
322    And(Box<PegExpr>),
323}
324#[allow(dead_code)]
325impl PegExpr {
326    /// Try to match the expression at position `pos` in `input`.
327    /// Returns the new position if successful, or None on failure.
328    pub fn matches(&self, input: &str, pos: usize) -> Option<usize> {
329        let chars: Vec<char> = input.chars().collect();
330        self.match_at(&chars, pos)
331    }
332    fn match_at(&self, input: &[char], pos: usize) -> Option<usize> {
333        match self {
334            PegExpr::Char(c) => {
335                if pos < input.len() && input[pos] == *c {
336                    Some(pos + 1)
337                } else {
338                    None
339                }
340            }
341            PegExpr::Any => {
342                if pos < input.len() {
343                    Some(pos + 1)
344                } else {
345                    None
346                }
347            }
348            PegExpr::Seq(e1, e2) => {
349                let p1 = e1.match_at(input, pos)?;
350                e2.match_at(input, p1)
351            }
352            PegExpr::Choice(e1, e2) => e1.match_at(input, pos).or_else(|| e2.match_at(input, pos)),
353            PegExpr::Star(e) => {
354                let mut cur = pos;
355                loop {
356                    match e.match_at(input, cur) {
357                        Some(next) if next > cur => cur = next,
358                        _ => break,
359                    }
360                }
361                Some(cur)
362            }
363            PegExpr::Plus(e) => {
364                let first = e.match_at(input, pos)?;
365                let rest = PegExpr::Star(e.clone()).match_at(input, first)?;
366                Some(rest)
367            }
368            PegExpr::Optional(e) => Some(e.match_at(input, pos).unwrap_or(pos)),
369            PegExpr::Not(e) => {
370                if e.match_at(input, pos).is_some() {
371                    None
372                } else {
373                    Some(pos)
374                }
375            }
376            PegExpr::And(e) => {
377                e.match_at(input, pos)?;
378                Some(pos)
379            }
380        }
381    }
382    /// Check if the expression matches the entire string.
383    pub fn match_full(&self, input: &str) -> bool {
384        let chars: Vec<char> = input.chars().collect();
385        self.match_at(&chars, 0) == Some(chars.len())
386    }
387}