1use std::collections::{HashMap, HashSet, VecDeque};
2
3use crate::build::{
4 Symbol, SymbolGraph, SymbolTag, SymbolVar, UUId, build_symbol_graph,
5 split_definition_into_lexemes,
6};
7use crate::helpers::{TrailError, bfs, contains_special_characters, format_error, is_escaped};
8
9pub fn split_cfg_into_rows(grammar: &str) -> Vec<String> {
10 let mut rows: Vec<String> = Vec::new();
11 let mut in_quote = false;
12 let mut in_slash = false;
13 let mut row: Vec<char> = Vec::new();
14 let mut i = 0;
15
16 let chars: Vec<char> = grammar.chars().chain(['\n']).collect();
17
18 while i < chars.len() {
19 let curr = chars[i];
20
21 if curr == '"' {
22 if !in_quote && !in_slash {
23 in_quote = true;
24 } else if in_quote && !is_escaped(&chars, i) {
25 in_quote = false;
26 }
27 row.push(curr);
28 } else if curr == '/' {
29 if !in_quote && !in_slash {
30 in_slash = true;
31 } else if in_slash && !is_escaped(&chars, i) {
32 in_slash = false;
33 }
34 row.push(curr);
35 } else if curr == '\n' && !in_quote && !in_slash {
36 if !row.is_empty() {
37 rows.push(row.iter().collect());
38 row.clear();
39 }
40 } else {
41 row.push(curr);
42 }
43
44 i += 1;
45 }
46
47 return rows;
48}
49
50pub fn split_production(production: &str) -> Result<(&str, &str), TrailError> {
51 let mut indices: Vec<usize> = Vec::new();
52 let mut in_quote = false;
53 let mut in_slash = false;
54 let mut i = 0;
55
56 let chars: Vec<char> = production.chars().collect();
57
58 while i < chars.len() {
59 let curr = chars[i];
60
61 if curr == '"' {
62 if !in_quote && !in_slash {
63 in_quote = true;
64 } else if in_quote && !is_escaped(&chars, i) {
65 in_quote = false;
66 }
67 } else if curr == '/' {
68 if !in_quote && !in_slash {
69 in_slash = true;
70 } else if in_slash && !is_escaped(&chars, i) {
71 in_slash = false;
72 }
73 } else if curr == ':' {
74 if !in_quote && !in_slash {
75 indices.push(i);
76 }
77 }
78
79 i += 1;
80 }
81
82 return match indices.len() {
83 0 => Ok(("", "")),
84 1 => Ok((
85 &production[..indices[0]].trim(),
86 &production[indices[0] + 1..].trim(),
87 )),
88 _ => Err(TrailError(format_error(
89 "Duplicate separator `:`.",
90 &production[..indices[0]].trim(),
91 &production[indices[0]..indices[1] + 1].trim(),
92 ))),
93 };
94}
95
96pub fn divide_cfg_into_productions(grammar: &str) -> Result<HashMap<String, String>, TrailError> {
97 let mut cfg: HashMap<String, String> = HashMap::new();
98 let mut current = String::new();
99 let rows = split_cfg_into_rows(grammar);
100
101 for row in rows {
102 let production = row.trim();
103
104 if production.is_empty() {
105 continue;
106 }
107
108 let (head, body) = split_production(production)?;
109
110 if head.is_empty() && body.is_empty() {
111 if current.is_empty() {
112 return Err(TrailError(format_error(
113 "Invalid production.",
114 "",
115 &production,
116 )));
117 }
118
119 *(cfg
120 .get_mut(¤t)
121 .expect("Key is not empty, thus it has already been added to the CFG.")) +=
122 &production;
123 } else {
124 if contains_special_characters(head) {
125 let header = format!("Name `{}` contains special characters.", head);
126
127 return Err(TrailError(format_error(&header, "", &production)));
128 }
129
130 if cfg.contains_key(head) {
131 return Err(TrailError(format_error(
132 "Duplicate production.",
133 "",
134 &production,
135 )));
136 }
137
138 cfg.insert(head.to_string(), body.to_string());
139 current = head.to_string();
140 }
141 }
142
143 if !cfg.contains_key("start") {
144 return Err(TrailError(format_error(
145 "`start` production rule has not been defined.",
146 "",
147 "",
148 )));
149 }
150
151 return Ok(cfg);
152}
153
154fn is_escapable_from_infinite_loop(graph: &SymbolGraph, prospect: &String) -> bool {
155 let mut visited: Vec<UUId> = Vec::new();
156 let mut queue: VecDeque<UUId> = graph.heads.iter().cloned().collect();
157
158 let (nodes, edges) = (&graph.nodes, &graph.edges);
159
160 while !queue.is_empty() {
161 let vertex = queue.pop_front().unwrap();
162
163 if let Symbol::VARIABLE { value, .. } = &nodes[&vertex]
164 && value.0 == *prospect
165 {
166 continue;
167 }
168
169 let visited_from_vertex = bfs(graph, vec![vertex.clone()]);
170
171 if visited_from_vertex.into_iter().all(|x| {
172 if let Symbol::VARIABLE { value, .. } = &nodes[&x] {
173 value.0 != *prospect
174 } else {
175 true
176 }
177 }) {
178 return true;
179 }
180
181 if !visited.contains(&vertex) {
182 visited.push(vertex);
183 queue.extend(edges.get(&vertex).cloned().unwrap_or_default());
184 }
185 }
186
187 return false;
188}
189
190pub fn contains_infinite_loop(graph: &SymbolGraph, head: &String, body: &String) -> bool {
191 let lexemes =
192 split_definition_into_lexemes(&body).expect("Expected Vec<String>, but got a TrailError.");
193
194 return lexemes.contains(&head) && !is_escapable_from_infinite_loop(&graph, &head);
195}
196
197pub fn fetch_excluded_vars(graph: &SymbolGraph, heads: &HashSet<&String>) -> Option<String> {
198 let excluded = graph.nodes.iter().find_map(|(_, v)| {
199 if let Symbol::VARIABLE { value, .. } = v {
200 (!heads.contains(&value.0)).then_some(&value.0)
201 } else {
202 None
203 }
204 });
205
206 match excluded {
207 Some(content) => {
208 return Some(content.clone());
209 }
210 None => return None,
211 };
212}
213
214#[derive(Debug, Clone, Copy, PartialEq, Eq)]
215pub enum IdState {
216 Set(UUId),
217 Unset,
218}
219
220#[derive(Debug, Clone)]
221pub struct TrailLayer<'a> {
222 pub graph: &'a SymbolGraph,
223 pub id: IdState,
224}
225
226#[derive(Debug, Clone)]
227pub struct TrailProposal<'a> {
228 frame: Vec<TrailLayer<'a>>,
229 value: String,
230}
231
232type TrailRefs = HashMap<SymbolTag, String>;
233
234#[derive(Debug)]
235pub struct State<T> {
236 pub proposals: Vec<T>,
237 pub backrefs: TrailRefs,
238}
239
240pub type TrailState<'a> = State<TrailProposal<'a>>;
241
242impl<T> Default for State<T> {
243 fn default() -> Self {
244 Self {
245 proposals: Vec::new(),
246 backrefs: HashMap::new(),
247 }
248 }
249}
250
251pub type CFGGraph = HashMap<SymbolVar, SymbolGraph>;
252
253pub type Trail<'a> = (CFGGraph, TrailState<'a>);
254
255pub fn build_cfg_graph(grammar: &str) -> Result<CFGGraph, TrailError> {
256 let productions = divide_cfg_into_productions(grammar)?;
257 let (mut graphs, heads): (HashMap<SymbolVar, SymbolGraph>, HashSet<&String>) =
258 (HashMap::new(), productions.keys().collect());
259
260 productions.iter().try_for_each(|(head, body)| {
261 let graph = build_symbol_graph(body)?;
262
263 if contains_infinite_loop(&graph, head, body) {
264 let context = format!("{}: ", head);
265
266 return Err(TrailError(format_error(
267 "Infinite loop found.",
268 &context,
269 body,
270 )));
271 }
272
273 let excluded = fetch_excluded_vars(&graph, &heads);
274
275 match excluded {
276 Some(value) => {
277 let header = format!("Production has an undefined variable `{}`.", value);
278
279 return Err(TrailError(format_error(
280 &header,
281 &format!("{}: ", head),
282 body,
283 )));
284 }
285 None => (),
286 }
287
288 graphs.insert(SymbolVar(head.clone()), graph);
289 Ok(())
290 })?;
291
292 return Ok(graphs);
293}
294
295pub fn trail_cfg<'a>(cfg: &str) -> Result<Trail<'a>, TrailError> {
296 return Ok((build_cfg_graph(cfg)?, TrailState::default()));
297}
298
299pub fn trail_rex<'a>(rex: &str) -> Result<Trail<'a>, TrailError> {
300 return Ok((
301 build_cfg_graph(&format!("start: /{}/", rex))?,
302 TrailState::default(),
303 ));
304}
305
306pub fn trail_exp<'a>(exp: &str) -> Result<Trail<'a>, TrailError> {
307 return Ok((
308 build_cfg_graph(&format!("start: {}", exp))?,
309 TrailState::default(),
310 ));
311}
312
313fn trail_run<'a>(schema: &'a CFGGraph, state: &mut TrailState<'a>) {
314 let (proposals, backrefs) = (&mut state.proposals, &mut state.backrefs);
315
316 for proposal in &*proposals {
317 let checkpoint = proposal.frame.last().unwrap();
318
319 let id = if let IdState::Set(uuid) = checkpoint.id {
320 uuid
321 } else {
322 unreachable!()
323 };
324
325 if let Symbol::TERMINAL { value, tags, .. } = &checkpoint.graph.nodes[&id] {
326 for tag in tags {
327 backrefs
328 .entry(tag.clone())
329 .or_insert_with(String::new)
330 .push_str(value);
331 }
332 }
333 }
334
335 let mut frames = if proposals.is_empty() {
336 let start = vec![TrailLayer {
337 graph: &schema[&SymbolVar::new("start")],
338 id: IdState::Unset,
339 }];
340
341 vec![start]
342 } else {
343 proposals.drain(..).map(|proposal| proposal.frame).collect()
344 };
345
346 while !frames.is_empty() {
347 let mut frame = frames.pop().unwrap();
348
349 let checkpoint = frame
350 .last()
351 .expect("Empty proposal states are neither processed, nor pushed to the state.");
352
353 let (id, graph) = (checkpoint.id, checkpoint.graph);
354
355 let nodes = &graph.nodes;
356
357 let successors = match id {
358 IdState::Set(value) => graph.edges.get(&value).cloned().unwrap_or_default(),
359 IdState::Unset => graph.heads.clone(),
360 };
361
362 if successors.is_empty() {
363 frame.pop();
364
365 if !frame.is_empty() {
366 frames.push(frame);
367 }
368
369 continue;
370 }
371
372 for successor in &successors {
373 match &nodes[successor] {
374 Symbol::TERMINAL { value, .. } => {
375 let mut next_frame = frame.clone();
376 next_frame.last_mut().unwrap().id = IdState::Set(*successor);
377
378 let next_value = value.clone();
379
380 proposals.push(TrailProposal {
381 frame: next_frame,
382 value: next_value,
383 });
384 }
385 Symbol::VARIABLE { value, .. } => {
386 let mut next_frame = frame.clone();
387 next_frame.last_mut().unwrap().id = IdState::Set(*successor);
388
389 let next_layer = TrailLayer {
390 graph: &schema[&value],
391 id: IdState::Unset,
392 };
393 next_frame.push(next_layer);
394
395 frames.push(next_frame);
396 }
397 Symbol::REFERENCE { value, .. } => {
398 let mut next_frame = frame.clone();
399 next_frame.last_mut().unwrap().id = IdState::Set(*successor);
400
401 let reference = &backrefs[value];
402 let next_value = reference.clone();
403
404 proposals.push(TrailProposal {
405 frame: next_frame,
406 value: next_value,
407 });
408 }
409 Symbol::END { .. } => {
410 let mut next_frame = frame.clone();
411 next_frame.last_mut().unwrap().id = IdState::Set(*successor);
412
413 proposals.push(TrailProposal {
414 frame: next_frame,
415 value: String::new(),
416 });
417 }
418 }
419 }
420 }
421}
422
423pub fn get_next_proposals<'a, 'b>(
424 schema: &'a CFGGraph,
425 state: &mut TrailState<'a>,
426 value: &String,
427) -> Result<Vec<String>, TrailError> {
428 let is_initial = state.proposals.is_empty();
429
430 state.proposals.retain(|p| p.value == *value);
431
432 if state.proposals.is_empty() && !is_initial {
433 return Err(TrailError(format!(
434 "Symbol `{}` has no previous state.",
435 value
436 )));
437 }
438
439 trail_run(schema, state);
440
441 Ok(state
442 .proposals
443 .iter()
444 .map(|p| String::from(&p.value))
445 .collect())
446}