gregex_logic/translation/
node.rs

1//! Contains the implementation of the `Node` enum and the functions to calculate the nullability, prefix, suffix and factors sets of a regular expression tree.
2
3use crate::translation::operator::Operator;
4use crate::translation::setterminal::SetTerminal;
5use std::collections::HashSet;
6
7/// The `Node` enum represents the different types of nodes that can be used in a regular expression tree.
8#[derive(Debug, PartialEq, Eq)]
9pub enum Node {
10    /// Represents an operation on one or two nodes.
11    Operation(Operator, Box<Node>, Option<Box<Node>>),
12    /// `char` represents the character, `u32` represent the unique identifier of the node.
13    Terminal(char, u32),
14}
15
16/// The `nullability_set` function returns the set of [SetTerminal] that are nullable in a regular expression tree.
17pub fn nullability_set(regex_tree: &Node) -> HashSet<SetTerminal> {
18    let mut set = HashSet::new();
19    match regex_tree {
20        Node::Terminal(_, _) => {
21            set.insert(SetTerminal::Empty);
22        }
23        Node::Operation(op, left, right) => match op {
24            Operator::Or => {
25                set.extend(nullability_set(left));
26                set.extend(nullability_set(right.as_ref().unwrap()));
27            }
28            Operator::Concat => {
29                set.extend(nullability_set(left));
30                let right_set = nullability_set(right.as_ref().unwrap());
31                set.extend(right_set);
32            }
33            Operator::Production => {
34                set.insert(SetTerminal::Epsilon);
35            }
36            _ => todo!(),
37        },
38    }
39    set
40}
41
42/// The `prefix_set` function returns the set of [SetTerminal] that are prefixes of a regular expression tree.
43pub fn prefix_set(regex_tree: &Node) -> HashSet<SetTerminal> {
44    let mut set = HashSet::new();
45    match regex_tree {
46        Node::Terminal(symbol, code) => {
47            set.insert(SetTerminal::SingleElement(*symbol, *code));
48        }
49        Node::Operation(op, left, right) => match op {
50            Operator::Or => {
51                let left_set = prefix_set(left);
52                let right_set = prefix_set(right.as_ref().unwrap());
53                set.extend(left_set);
54                set.extend(right_set);
55            }
56            Operator::Concat => {
57                let left_set = prefix_set(left);
58                set.extend(left_set);
59                let right_set = prefix_set(right.as_ref().unwrap());
60                let nullable_set = nullability_set(left);
61
62                // If the left expression is nullable, include the first set of the right expression
63                if nullable_set.contains(&SetTerminal::Epsilon) {
64                    set.extend(right_set);
65                }
66            }
67            Operator::Production => {
68                let left_set = prefix_set(left);
69                set = left_set;
70            }
71            _ => todo!(),
72        },
73    }
74    set
75}
76
77/// The `suffix_set` function returns the set of [SetTerminal] that are suffixes of a regular expression tree.
78pub fn suffix_set(regex_tree: &Node) -> HashSet<SetTerminal> {
79    let mut set = HashSet::new();
80    match regex_tree {
81        Node::Terminal(symbol, code) => {
82            set.insert(SetTerminal::SingleElement(*symbol, *code));
83        }
84        Node::Operation(op, left, right) => match op {
85            Operator::Or => {
86                let left_set = suffix_set(left);
87                let right_set = suffix_set(right.as_ref().unwrap());
88                set.extend(left_set);
89                set.extend(right_set);
90            }
91            Operator::Concat => {
92                let left_set = suffix_set(right.as_ref().unwrap());
93                set.extend(left_set);
94                let right_set = suffix_set(left);
95                let nullable_set = nullability_set(right.as_ref().unwrap());
96
97                // If the left expression is nullable, include the first set of the right expression
98                if nullable_set.contains(&SetTerminal::Epsilon) {
99                    set.extend(right_set);
100                }
101            }
102            Operator::Production => {
103                let left_set = suffix_set(left);
104                set = left_set;
105            }
106            _ => todo!(),
107        },
108    }
109    set
110}
111
112/// The `factors_set` function returns the set of [SetTerminal] that are factors of a regular expression tree.
113/// 
114/// Factors in this scenario mean the set of terminals that can be produced by the regular expression.
115pub fn factors_set(regex_tree: &Node) -> HashSet<SetTerminal> {
116    let mut set = HashSet::new();
117    match regex_tree {
118        Node::Terminal(_, _) => {
119            set.insert(SetTerminal::Empty);
120        }
121        Node::Operation(op, left, right) => match op {
122            Operator::Or => {
123                let left_set = factors_set(left);
124                let right_set = factors_set(right.as_ref().unwrap());
125                set.extend(left_set);
126                set.extend(right_set);
127            }
128            Operator::Concat => {
129                let left_set = factors_set(left);
130                let right_set = factors_set(right.as_ref().unwrap());
131                let suffix_set = suffix_set(left);
132                let prefix_set = prefix_set(right.as_ref().unwrap());
133                set.extend(left_set);
134                set.extend(right_set);
135                for i in suffix_set {
136                    for j in &prefix_set {
137                        set.insert(i.product(j));
138                    }
139                }
140            }
141            Operator::Production => {
142                let left_set = factors_set(left);
143                let suffix_set = suffix_set(left);
144                let prefix_set = prefix_set(left);
145                set.extend(left_set);
146
147                for i in suffix_set {
148                    for j in &prefix_set {
149                        set.insert(i.product(j));
150                    }
151                }
152            }
153            _ => todo!(),
154        },
155    }
156
157    if set.contains(&SetTerminal::Empty) && set.len() > 1 {
158        set.remove(&SetTerminal::Empty);
159    }
160    set
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    #[test]
168    fn nullability_set_test_or() {
169        let tree = Node::Operation(
170            Operator::Or,
171            Box::new(Node::Terminal('a', 1)),
172            Option::Some(Box::new(Node::Terminal('b', 2))),
173        );
174
175        let set = nullability_set(&tree);
176        let mut test_set = HashSet::new();
177        test_set.insert(SetTerminal::Empty);
178        assert_eq!(set, test_set);
179    }
180
181    #[test]
182    fn nullability_set_test_concat() {
183        let tree = Node::Operation(
184            Operator::Concat,
185            Box::new(Node::Terminal('a', 1)),
186            Option::Some(Box::new(Node::Terminal('b', 2))),
187        );
188
189        let set = nullability_set(&tree);
190        let mut test_set = HashSet::new();
191        test_set.insert(SetTerminal::Empty);
192        assert_eq!(set, test_set);
193    }
194
195    #[test]
196    fn nullability_set_test_production() {
197        let tree = Node::Operation(Operator::Production, Box::new(Node::Terminal('a', 1)), None);
198
199        let set = nullability_set(&tree);
200        let mut test_set = HashSet::new();
201        test_set.insert(SetTerminal::Epsilon);
202        assert_eq!(set, test_set);
203    }
204
205    #[test]
206    fn nullability_set_test_terminal() {
207        let tree = Node::Terminal('a', 1);
208
209        let set = nullability_set(&tree);
210        let mut test_set = HashSet::new();
211        test_set.insert(SetTerminal::Empty);
212        assert_eq!(set, test_set);
213    }
214
215    #[test]
216    fn prefix_set_test_or() {
217        let tree = Node::Operation(
218            Operator::Or,
219            Box::new(Node::Terminal('a', 1)),
220            Option::Some(Box::new(Node::Terminal('b', 2))),
221        );
222
223        let set = prefix_set(&tree);
224        let mut test_set = HashSet::new();
225        test_set.insert(SetTerminal::SingleElement('a', 1));
226        test_set.insert(SetTerminal::SingleElement('b', 2));
227        assert_eq!(set, test_set);
228    }
229
230    #[test]
231    fn prefix_set_test_production() {
232        let tree = Node::Operation(Operator::Production, Box::new(Node::Terminal('a', 1)), None);
233
234        let set = prefix_set(&tree);
235        let mut test_set = HashSet::new();
236        test_set.insert(SetTerminal::SingleElement('a', 1));
237        assert_eq!(set, test_set);
238    }
239
240    #[test]
241    fn prefix_set_test_concat() {
242        let tree = Node::Operation(
243            Operator::Concat,
244            Box::new(Node::Terminal('a', 1)),
245            Option::Some(Box::new(Node::Terminal('b', 2))),
246        );
247
248        let set = prefix_set(&tree);
249        let mut test_set = HashSet::new();
250        test_set.insert(SetTerminal::SingleElement('a', 1));
251        assert_eq!(set, test_set);
252    }
253
254    #[test]
255    fn prefix_set_test_terminal() {
256        let tree = Node::Terminal('a', 1);
257
258        let set = prefix_set(&tree);
259        let mut test_set = HashSet::new();
260        test_set.insert(SetTerminal::SingleElement('a', 1));
261        assert_eq!(set, test_set);
262    }
263
264    #[test]
265    fn prefix_set_test_complete() {
266        // Linearized regex: (a(ab)*)* + (ba)*
267        let tree = Node::Operation(
268            Operator::Or,
269            Box::new(Node::Operation(
270                Operator::Production,
271                Box::new(Node::Operation(
272                    Operator::Concat,
273                    Box::new(Node::Terminal('a', 1)),
274                    Some(Box::new(Node::Operation(
275                        Operator::Production,
276                        Box::new(Node::Operation(
277                            Operator::Concat,
278                            Box::new(Node::Terminal('a', 2)),
279                            Option::Some(Box::new(Node::Terminal('b', 3))),
280                        )),
281                        None,
282                    ))),
283                )),
284                None,
285            )),
286            Option::Some(Box::new(Node::Operation(
287                Operator::Production,
288                Box::new(Node::Operation(
289                    Operator::Concat,
290                    Box::new(Node::Terminal('b', 4)),
291                    Option::Some(Box::new(Node::Terminal('a', 5))),
292                )),
293                None,
294            ))),
295        );
296
297        let set = prefix_set(&tree);
298        let mut test_set = HashSet::new();
299        test_set.insert(SetTerminal::SingleElement('a', 1));
300        test_set.insert(SetTerminal::SingleElement('b', 4));
301        assert_eq!(set, test_set);
302    }
303
304    #[test]
305    fn suffix_set_test_or() {
306        let tree = Node::Operation(
307            Operator::Or,
308            Box::new(Node::Terminal('a', 1)),
309            Option::Some(Box::new(Node::Terminal('b', 2))),
310        );
311
312        let set = suffix_set(&tree);
313        let mut test_set = HashSet::new();
314        test_set.insert(SetTerminal::SingleElement('a', 1));
315        test_set.insert(SetTerminal::SingleElement('b', 2));
316        assert_eq!(set, test_set);
317    }
318
319    #[test]
320    fn suffix_set_test_production() {
321        let tree = Node::Operation(Operator::Production, Box::new(Node::Terminal('a', 1)), None);
322
323        let set = suffix_set(&tree);
324        let mut test_set = HashSet::new();
325        test_set.insert(SetTerminal::SingleElement('a', 1));
326        assert_eq!(set, test_set);
327    }
328
329    #[test]
330    fn suffix_set_test_concat() {
331        let tree = Node::Operation(
332            Operator::Concat,
333            Box::new(Node::Terminal('a', 1)),
334            Option::Some(Box::new(Node::Terminal('b', 2))),
335        );
336
337        let set = suffix_set(&tree);
338        let mut test_set = HashSet::new();
339        test_set.insert(SetTerminal::SingleElement('b', 2));
340        assert_eq!(set, test_set);
341    }
342
343    #[test]
344    fn suffix_set_test_terminal() {
345        let tree = Node::Terminal('a', 1);
346
347        let set = suffix_set(&tree);
348        let mut test_set = HashSet::new();
349        test_set.insert(SetTerminal::SingleElement('a', 1));
350        assert_eq!(set, test_set);
351    }
352
353    #[test]
354    fn suffix_set_test_complete() {
355        // Linearized regex: (a(ab)*)* + (ba)*
356        let tree = Node::Operation(
357            Operator::Or,
358            Box::new(Node::Operation(
359                Operator::Production,
360                Box::new(Node::Operation(
361                    Operator::Concat,
362                    Box::new(Node::Terminal('a', 1)),
363                    Some(Box::new(Node::Operation(
364                        Operator::Production,
365                        Box::new(Node::Operation(
366                            Operator::Concat,
367                            Box::new(Node::Terminal('a', 2)),
368                            Option::Some(Box::new(Node::Terminal('b', 3))),
369                        )),
370                        None,
371                    ))),
372                )),
373                None,
374            )),
375            Option::Some(Box::new(Node::Operation(
376                Operator::Production,
377                Box::new(Node::Operation(
378                    Operator::Concat,
379                    Box::new(Node::Terminal('b', 4)),
380                    Option::Some(Box::new(Node::Terminal('a', 5))),
381                )),
382                None,
383            ))),
384        );
385
386        let set = suffix_set(&tree);
387        let mut test_set = HashSet::new();
388        test_set.insert(SetTerminal::SingleElement('a', 1));
389        test_set.insert(SetTerminal::SingleElement('b', 3));
390        test_set.insert(SetTerminal::SingleElement('a', 5));
391        assert_eq!(set, test_set);
392    }
393
394    #[test]
395    fn factors_set_test_or() {
396        let tree = Node::Operation(
397            Operator::Or,
398            Box::new(Node::Terminal('a', 1)),
399            Option::Some(Box::new(Node::Terminal('b', 2))),
400        );
401
402        let set = factors_set(&tree);
403        let mut test_set = HashSet::new();
404        test_set.insert(SetTerminal::Empty);
405        assert_eq!(set, test_set);
406    }
407
408    #[test]
409    fn factors_set_test_production() {
410        let tree = Node::Operation(Operator::Production, Box::new(Node::Terminal('a', 1)), None);
411
412        let set = factors_set(&tree);
413        let mut test_set = HashSet::new();
414        test_set.insert(SetTerminal::DoubleElement('a', 1, 'a', 1));
415        assert_eq!(set, test_set);
416    }
417
418    #[test]
419    fn factors_set_test_concat() {
420        let tree = Node::Operation(
421            Operator::Concat,
422            Box::new(Node::Terminal('a', 1)),
423            Option::Some(Box::new(Node::Terminal('b', 2))),
424        );
425
426        let set = factors_set(&tree);
427        let mut test_set = HashSet::new();
428        test_set.insert(SetTerminal::DoubleElement('a', 1, 'b', 2));
429        assert_eq!(set, test_set);
430    }
431
432    #[test]
433    fn factors_set_test_complete() {
434        // Linearized regex: (a(ab)*)* + (ba)*
435        let tree = Node::Operation(
436            Operator::Or,
437            Box::new(Node::Operation(
438                Operator::Production,
439                Box::new(Node::Operation(
440                    Operator::Concat,
441                    Box::new(Node::Terminal('a', 1)),
442                    Some(Box::new(Node::Operation(
443                        Operator::Production,
444                        Box::new(Node::Operation(
445                            Operator::Concat,
446                            Box::new(Node::Terminal('a', 2)),
447                            Option::Some(Box::new(Node::Terminal('b', 3))),
448                        )),
449                        None,
450                    ))),
451                )),
452                None,
453            )),
454            Option::Some(Box::new(Node::Operation(
455                Operator::Production,
456                Box::new(Node::Operation(
457                    Operator::Concat,
458                    Box::new(Node::Terminal('b', 4)),
459                    Option::Some(Box::new(Node::Terminal('a', 5))),
460                )),
461                None,
462            ))),
463        );
464
465        let set = factors_set(&tree);
466        let mut test_set = HashSet::new();
467        test_set.insert(SetTerminal::DoubleElement('a', 1, 'a', 2));
468        test_set.insert(SetTerminal::DoubleElement('a', 1, 'a', 1));
469        test_set.insert(SetTerminal::DoubleElement('a', 2, 'b', 3));
470        test_set.insert(SetTerminal::DoubleElement('b', 3, 'a', 1));
471        test_set.insert(SetTerminal::DoubleElement('b', 3, 'a', 2));
472        test_set.insert(SetTerminal::DoubleElement('b', 4, 'a', 5));
473        test_set.insert(SetTerminal::DoubleElement('a', 5, 'b', 4));
474        assert_eq!(set, test_set);
475    }
476}