lib/
frontend.rs

1use std::{cell::RefCell, collections::HashMap, rc::Rc};
2
3use debug_print::debug_println;
4use ebnf::{Expression, Grammar, Node, RegexExtKind, SymbolKind};
5use regex::Regex;
6
7use crate::FSMNode;
8
9pub fn print_parsed_ebnf(syntax: &str) {
10    let grammar = ebnf::get_grammar(&syntax).unwrap();
11    for node in grammar.expressions {
12        println!("{:?}", node);
13    }
14}
15
16enum TerminalState {
17    Stub,
18    Created,
19}
20
21fn handle_node(
22    grammar: &Grammar,
23    cur_node: &Node,
24    cur_root: &Rc<RefCell<FSMNode>>,
25    terminals: &mut HashMap<String, (Rc<RefCell<FSMNode>>, TerminalState)>,
26) -> Rc<RefCell<FSMNode>> {
27    debug_println!("handle_node got {:?}", cur_node);
28    let ret = match &cur_node {
29        Node::String(str) => FSMNode::new_keyword_with_parent(str.to_string(), Rc::clone(cur_root)),
30        Node::RegexString(r) => FSMNode::new(
31            crate::NodeType::UserDefinedRegex(Regex::new(r).unwrap()),
32            &cur_root,
33        ),
34        Node::Terminal(name) => {
35            if terminals.contains_key(name) {
36                debug_println!("Found {name} in cache!");
37                let term = terminals.get(name).unwrap();
38                let term_clone = match term.1 {
39                    TerminalState::Stub => term.0.clone(),
40                    TerminalState::Created => term.0.borrow().deep_clone(),
41                };
42                println!("linking back to {}", term_clone.borrow().short_id());
43                FSMNode::add_child_cycle_safe(cur_root, &term_clone);
44                println!("after add:");
45                cur_root.borrow().dbg();
46                term_clone
47            } else {
48                debug_println!("Creating terminal {name}...");
49                let terminal = find_terminal(&grammar, &name);
50                if terminal.is_none() {
51                    panic!("Terminal reference '{name}' not found!");
52                }
53                let terminal = terminal.unwrap();
54                let term_root = FSMNode::new_null(None);
55                debug_println!("term_root: {}", term_root.borrow().short_id());
56                terminals.insert(
57                    name.to_string(),
58                    (Rc::clone(&term_root), TerminalState::Stub),
59                );
60                handle_node(grammar, &terminal.rhs, &term_root, terminals);
61                terminals.insert(
62                    name.to_string(),
63                    (Rc::clone(&term_root), TerminalState::Created),
64                );
65                debug_println!("Finish terminal");
66                debug_println!("young {}:", name);
67                term_root.borrow().dbg();
68                let ret = term_root.borrow().deep_clone();
69                FSMNode::add_child_cycle_safe(cur_root, &ret);
70                ret
71            }
72        }
73        Node::Multiple(nodes) => {
74            let mut cur_treenode = cur_root.clone();
75            nodes.iter().for_each(|node| {
76                debug_println!("Multiple at {node:?}");
77                let tree_bit = handle_node(grammar, &node, &cur_treenode, terminals);
78                debug_println!("Multiple got back:");
79                tree_bit.borrow().dbg();
80                // NOTE: this will only work as long as the other node handlers nicely merge their
81                // diverging branches back into one single Null leaf
82                cur_treenode = tree_bit.borrow().race_to_leaf().unwrap_or(tree_bit.clone());
83                debug_println!("cur_treenode now at: {}", cur_treenode.borrow().short_id());
84            });
85            cur_treenode
86        }
87        Node::RegexExt(node, RegexExtKind::Optional) | Node::Optional(node) => {
88            let tree_bit = handle_node(grammar, &node, cur_root, terminals);
89            let dummy = FSMNode::new_null(None);
90            FSMNode::add_child_to_all_leaves(&tree_bit, &dummy);
91            FSMNode::add_child_cycle_safe(&cur_root, &dummy);
92            tree_bit
93        }
94        Node::Symbol(n1, SymbolKind::Concatenation, n2) => {
95            let t1 = handle_node(grammar, &n1.to_owned(), &cur_root, terminals);
96            let t2 = handle_node(grammar, &n2.to_owned(), &t1, terminals);
97            t1
98        }
99        Node::Symbol(n1, SymbolKind::Alternation, n2) => {
100            let root = FSMNode::new_null(Some(cur_root));
101            let t1 = handle_node(grammar, &n1.to_owned(), &root, terminals);
102            let t2 = handle_node(grammar, &n2.to_owned(), &root, terminals);
103            let child = FSMNode::new_null(None);
104            debug_println!("Alternation dummy child: {}", child.borrow().short_id());
105            FSMNode::add_child_to_all_leaves(&root, &child);
106            debug_println!("Finished alternation:");
107            root.borrow().dbg();
108            root
109        }
110        Node::Group(node) => handle_node(grammar, node, cur_root, terminals),
111        Node::Repeat(node) => {
112            // need to guarantee this is a null so search_rec won't prematurely stop, e.g. when
113            // cur_root is a Keyword
114            let dummy_parent = FSMNode::new_null(Some(&cur_root));
115            let subroot = handle_node(grammar, &node, &dummy_parent, terminals);
116
117            let dummy = FSMNode::new_null(None);
118            debug_println!("Repeat dummy child: {}", dummy.borrow().short_id());
119            FSMNode::add_child_to_all_leaves(&subroot, &dummy);
120            // must have the option to skip it entirely
121            FSMNode::add_child_cycle_safe(&cur_root, &dummy);
122
123            FSMNode::add_child_to_all_leaves(&subroot, &dummy_parent);
124            dummy_parent
125        }
126        _ => {
127            eprintln!("Unimplemented: {cur_node:?}");
128            todo!()
129        }
130    };
131    ret
132}
133
134fn find_terminal<'a>(grammer: &'a Grammar, name: &'a str) -> Option<&'a Expression> {
135    grammer.expressions.iter().find(|expr| expr.lhs == name)
136}
137
138pub fn create_graph_from_ebnf(ebnf: &str) -> Result<Rc<RefCell<FSMNode>>, String> {
139    match ebnf::get_grammar(ebnf) {
140        Ok(grammar) => {
141            let root = FSMNode::new_null(None);
142            let root_node = grammar.expressions.get(0).expect("Empty BNF!");
143            let mut terminals = HashMap::new();
144            handle_node(
145                &grammar,
146                &Node::Terminal(root_node.lhs.to_owned()),
147                &root,
148                &mut terminals,
149            );
150            // sanity op, is_done() won't cancel preemptively
151            FSMNode::add_child_to_all_leaves(&root, &FSMNode::new_null(None));
152            FSMNode::minify(&root);
153            debug_println!("Total node cnt: {}", FSMNode::node_cnt(&root));
154            // for (name, term) in terminals.iter() {
155            //     println!("Term {}", name);
156            //     term.0.borrow().dbg();
157            // }
158            Ok(root)
159        }
160        Err(err) => Err(err.to_string()),
161    }
162}