Skip to main content

lextrail_test/
guide.rs

1use std::collections::{HashMap, HashSet, VecDeque};
2
3use crate::build::{
4    Symbol, SymbolGraph, SymbolTag, SymbolVar, UUId, build_symbol_graph,
5    split_definition_into_lexemes,
6};
7use crate::helpers::{TrailError, bfs, contains_special_characters, format_error, is_escaped};
8
9pub fn split_cfg_into_rows(grammar: &str) -> Vec<String> {
10    let mut rows: Vec<String> = Vec::new();
11    let mut in_quote = false;
12    let mut in_slash = false;
13    let mut row: Vec<char> = Vec::new();
14    let mut i = 0;
15
16    let chars: Vec<char> = grammar.chars().chain(['\n']).collect();
17
18    while i < chars.len() {
19        let curr = chars[i];
20
21        if curr == '"' {
22            if !in_quote && !in_slash {
23                in_quote = true;
24            } else if in_quote && !is_escaped(&chars, i) {
25                in_quote = false;
26            }
27            row.push(curr);
28        } else if curr == '/' {
29            if !in_quote && !in_slash {
30                in_slash = true;
31            } else if in_slash && !is_escaped(&chars, i) {
32                in_slash = false;
33            }
34            row.push(curr);
35        } else if curr == '\n' && !in_quote && !in_slash {
36            if !row.is_empty() {
37                rows.push(row.iter().collect());
38                row.clear();
39            }
40        } else {
41            row.push(curr);
42        }
43
44        i += 1;
45    }
46
47    return rows;
48}
49
50pub fn split_production(production: &str) -> Result<(&str, &str), TrailError> {
51    let mut indices: Vec<usize> = Vec::new();
52    let mut in_quote = false;
53    let mut in_slash = false;
54    let mut i = 0;
55
56    let chars: Vec<char> = production.chars().collect();
57
58    while i < chars.len() {
59        let curr = chars[i];
60
61        if curr == '"' {
62            if !in_quote && !in_slash {
63                in_quote = true;
64            } else if in_quote && !is_escaped(&chars, i) {
65                in_quote = false;
66            }
67        } else if curr == '/' {
68            if !in_quote && !in_slash {
69                in_slash = true;
70            } else if in_slash && !is_escaped(&chars, i) {
71                in_slash = false;
72            }
73        } else if curr == ':' {
74            if !in_quote && !in_slash {
75                indices.push(i);
76            }
77        }
78
79        i += 1;
80    }
81
82    return match indices.len() {
83        0 => Ok(("", "")),
84        1 => Ok((
85            &production[..indices[0]].trim(),
86            &production[indices[0] + 1..].trim(),
87        )),
88        _ => Err(TrailError(format_error(
89            "Duplicate separator `:`.",
90            &production[..indices[0]].trim(),
91            &production[indices[0]..indices[1] + 1].trim(),
92        ))),
93    };
94}
95
96pub fn divide_cfg_into_productions(grammar: &str) -> Result<HashMap<String, String>, TrailError> {
97    let mut cfg: HashMap<String, String> = HashMap::new();
98    let mut current = String::new();
99    let rows = split_cfg_into_rows(grammar);
100
101    for row in rows {
102        let production = row.trim();
103
104        if production.is_empty() {
105            continue;
106        }
107
108        let (head, body) = split_production(production)?;
109
110        if head.is_empty() && body.is_empty() {
111            if current.is_empty() {
112                return Err(TrailError(format_error(
113                    "Invalid production.",
114                    "",
115                    &production,
116                )));
117            }
118
119            *(cfg
120                .get_mut(&current)
121                .expect("Key is not empty, thus it has already been added to the CFG.")) +=
122                &production;
123        } else {
124            if contains_special_characters(head) {
125                let header = format!("Name `{}` contains special characters.", head);
126
127                return Err(TrailError(format_error(&header, "", &production)));
128            }
129
130            if cfg.contains_key(head) {
131                return Err(TrailError(format_error(
132                    "Duplicate production.",
133                    "",
134                    &production,
135                )));
136            }
137
138            cfg.insert(head.to_string(), body.to_string());
139            current = head.to_string();
140        }
141    }
142
143    if !cfg.contains_key("start") {
144        return Err(TrailError(format_error(
145            "`start` production rule has not been defined.",
146            "",
147            "",
148        )));
149    }
150
151    return Ok(cfg);
152}
153
154fn is_escapable_from_infinite_loop(graph: &SymbolGraph, prospect: &String) -> bool {
155    let mut visited: Vec<UUId> = Vec::new();
156    let mut queue: VecDeque<UUId> = graph.heads.iter().cloned().collect();
157
158    let (nodes, edges) = (&graph.nodes, &graph.edges);
159
160    while !queue.is_empty() {
161        let vertex = queue.pop_front().unwrap();
162
163        if let Symbol::VARIABLE { value, .. } = &nodes[&vertex]
164            && value.0 == *prospect
165        {
166            continue;
167        }
168
169        let visited_from_vertex = bfs(graph, vec![vertex.clone()]);
170
171        if visited_from_vertex.into_iter().all(|x| {
172            if let Symbol::VARIABLE { value, .. } = &nodes[&x] {
173                value.0 != *prospect
174            } else {
175                true
176            }
177        }) {
178            return true;
179        }
180
181        if !visited.contains(&vertex) {
182            visited.push(vertex);
183            queue.extend(edges.get(&vertex).cloned().unwrap_or_default());
184        }
185    }
186
187    return false;
188}
189
190pub fn contains_infinite_loop(graph: &SymbolGraph, head: &String, body: &String) -> bool {
191    let lexemes =
192        split_definition_into_lexemes(&body).expect("Expected Vec<String>, but got a TrailError.");
193
194    return lexemes.contains(&head) && !is_escapable_from_infinite_loop(&graph, &head);
195}
196
197pub fn fetch_excluded_vars(graph: &SymbolGraph, heads: &HashSet<&String>) -> Option<String> {
198    let excluded = graph.nodes.iter().find_map(|(_, v)| {
199        if let Symbol::VARIABLE { value, .. } = v {
200            (!heads.contains(&value.0)).then_some(&value.0)
201        } else {
202            None
203        }
204    });
205
206    match excluded {
207        Some(content) => {
208            return Some(content.clone());
209        }
210        None => return None,
211    };
212}
213
214#[derive(Debug, Clone, Copy, PartialEq, Eq)]
215pub enum IdState {
216    Set(UUId),
217    Unset,
218}
219
220#[derive(Debug, Clone)]
221pub struct TrailLayer<'a> {
222    pub graph: &'a SymbolGraph,
223    pub id: IdState,
224}
225
226#[derive(Debug, Clone)]
227pub struct TrailProposal<'a> {
228    frame: Vec<TrailLayer<'a>>,
229    value: String,
230}
231
232type TrailRefs = HashMap<SymbolTag, String>;
233
234#[derive(Debug)]
235pub struct State<T> {
236    pub proposals: Vec<T>,
237    pub backrefs: TrailRefs,
238}
239
240pub type TrailState<'a> = State<TrailProposal<'a>>;
241
242impl<T> Default for State<T> {
243    fn default() -> Self {
244        Self {
245            proposals: Vec::new(),
246            backrefs: HashMap::new(),
247        }
248    }
249}
250
251pub type CFGGraph = HashMap<SymbolVar, SymbolGraph>;
252
253pub type Trail<'a> = (CFGGraph, TrailState<'a>);
254
255pub fn build_cfg_graph(grammar: &str) -> Result<CFGGraph, TrailError> {
256    let productions = divide_cfg_into_productions(grammar)?;
257    let (mut graphs, heads): (HashMap<SymbolVar, SymbolGraph>, HashSet<&String>) =
258        (HashMap::new(), productions.keys().collect());
259
260    productions.iter().try_for_each(|(head, body)| {
261        let graph = build_symbol_graph(body)?;
262
263        if contains_infinite_loop(&graph, head, body) {
264            let context = format!("{}: ", head);
265
266            return Err(TrailError(format_error(
267                "Infinite loop found.",
268                &context,
269                body,
270            )));
271        }
272
273        let excluded = fetch_excluded_vars(&graph, &heads);
274
275        match excluded {
276            Some(value) => {
277                let header = format!("Production has an undefined variable `{}`.", value);
278
279                return Err(TrailError(format_error(
280                    &header,
281                    &format!("{}: ", head),
282                    body,
283                )));
284            }
285            None => (),
286        }
287
288        graphs.insert(SymbolVar(head.clone()), graph);
289        Ok(())
290    })?;
291
292    return Ok(graphs);
293}
294
295pub fn trail_cfg<'a>(cfg: &str) -> Result<Trail<'a>, TrailError> {
296    return Ok((build_cfg_graph(cfg)?, TrailState::default()));
297}
298
299pub fn trail_rex<'a>(rex: &str) -> Result<Trail<'a>, TrailError> {
300    return Ok((
301        build_cfg_graph(&format!("start: /{}/", rex))?,
302        TrailState::default(),
303    ));
304}
305
306pub fn trail_exp<'a>(exp: &str) -> Result<Trail<'a>, TrailError> {
307    return Ok((
308        build_cfg_graph(&format!("start: {}", exp))?,
309        TrailState::default(),
310    ));
311}
312
313fn trail_run<'a>(schema: &'a CFGGraph, state: &mut TrailState<'a>) {
314    let (proposals, backrefs) = (&mut state.proposals, &mut state.backrefs);
315
316    for proposal in &*proposals {
317        let checkpoint = proposal.frame.last().unwrap();
318
319        let id = if let IdState::Set(uuid) = checkpoint.id {
320            uuid
321        } else {
322            unreachable!()
323        };
324
325        if let Symbol::TERMINAL { value, tags, .. } = &checkpoint.graph.nodes[&id] {
326            for tag in tags {
327                backrefs
328                    .entry(tag.clone())
329                    .or_insert_with(String::new)
330                    .push_str(value);
331            }
332        }
333    }
334
335    let mut frames = if proposals.is_empty() {
336        let start = vec![TrailLayer {
337            graph: &schema[&SymbolVar::new("start")],
338            id: IdState::Unset,
339        }];
340
341        vec![start]
342    } else {
343        proposals.drain(..).map(|proposal| proposal.frame).collect()
344    };
345
346    while !frames.is_empty() {
347        let mut frame = frames.pop().unwrap();
348
349        let checkpoint = frame
350            .last()
351            .expect("Empty proposal states are neither processed, nor pushed to the state.");
352
353        let (id, graph) = (checkpoint.id, checkpoint.graph);
354
355        let nodes = &graph.nodes;
356
357        let successors = match id {
358            IdState::Set(value) => graph.edges.get(&value).cloned().unwrap_or_default(),
359            IdState::Unset => graph.heads.clone(),
360        };
361
362        if successors.is_empty() {
363            frame.pop();
364
365            if !frame.is_empty() {
366                frames.push(frame);
367            }
368
369            continue;
370        }
371
372        for successor in &successors {
373            match &nodes[successor] {
374                Symbol::TERMINAL { value, .. } => {
375                    let mut next_frame = frame.clone();
376                    next_frame.last_mut().unwrap().id = IdState::Set(*successor);
377
378                    let next_value = value.clone();
379
380                    proposals.push(TrailProposal {
381                        frame: next_frame,
382                        value: next_value,
383                    });
384                }
385                Symbol::VARIABLE { value, .. } => {
386                    let mut next_frame = frame.clone();
387                    next_frame.last_mut().unwrap().id = IdState::Set(*successor);
388
389                    let next_layer = TrailLayer {
390                        graph: &schema[&value],
391                        id: IdState::Unset,
392                    };
393                    next_frame.push(next_layer);
394
395                    frames.push(next_frame);
396                }
397                Symbol::REFERENCE { value, .. } => {
398                    let mut next_frame = frame.clone();
399                    next_frame.last_mut().unwrap().id = IdState::Set(*successor);
400
401                    let reference = &backrefs[value];
402                    let next_value = reference.clone();
403
404                    proposals.push(TrailProposal {
405                        frame: next_frame,
406                        value: next_value,
407                    });
408                }
409                Symbol::END { .. } => {
410                    let mut next_frame = frame.clone();
411                    next_frame.last_mut().unwrap().id = IdState::Set(*successor);
412
413                    proposals.push(TrailProposal {
414                        frame: next_frame,
415                        value: String::new(),
416                    });
417                }
418            }
419        }
420    }
421}
422
423pub fn get_next_proposals<'a, 'b>(
424    schema: &'a CFGGraph,
425    state: &mut TrailState<'a>,
426    value: &String,
427) -> Result<Vec<String>, TrailError> {
428    let is_initial = state.proposals.is_empty();
429
430    state.proposals.retain(|p| p.value == *value);
431
432    if state.proposals.is_empty() && !is_initial {
433        return Err(TrailError(format!(
434            "Symbol `{}` has no previous state.",
435            value
436        )));
437    }
438
439    trail_run(schema, state);
440
441    Ok(state
442        .proposals
443        .iter()
444        .map(|p| String::from(&p.value))
445        .collect())
446}