use std::collections::{HashMap, HashSet, VecDeque};
use crate::build::{
Symbol, SymbolGraph, SymbolTag, SymbolVar, UUId, build_symbol_graph,
split_definition_into_lexemes,
};
use crate::helpers::{TrailError, bfs, contains_special_characters, format_error, is_escaped};
pub fn split_cfg_into_rows(grammar: &str) -> Vec<String> {
let mut rows: Vec<String> = Vec::new();
let mut in_quote = false;
let mut in_slash = false;
let mut row: Vec<char> = Vec::new();
let mut i = 0;
let chars: Vec<char> = grammar.chars().chain(['\n']).collect();
while i < chars.len() {
let curr = chars[i];
if curr == '"' {
if !in_quote && !in_slash {
in_quote = true;
} else if in_quote && !is_escaped(&chars, i) {
in_quote = false;
}
row.push(curr);
} else if curr == '/' {
if !in_quote && !in_slash {
in_slash = true;
} else if in_slash && !is_escaped(&chars, i) {
in_slash = false;
}
row.push(curr);
} else if curr == '\n' && !in_quote && !in_slash {
if !row.is_empty() {
rows.push(row.iter().collect());
row.clear();
}
} else {
row.push(curr);
}
i += 1;
}
return rows;
}
pub fn split_production(production: &str) -> Result<(&str, &str), TrailError> {
let mut indices: Vec<usize> = Vec::new();
let mut in_quote = false;
let mut in_slash = false;
let mut i = 0;
let chars: Vec<char> = production.chars().collect();
while i < chars.len() {
let curr = chars[i];
if curr == '"' {
if !in_quote && !in_slash {
in_quote = true;
} else if in_quote && !is_escaped(&chars, i) {
in_quote = false;
}
} else if curr == '/' {
if !in_quote && !in_slash {
in_slash = true;
} else if in_slash && !is_escaped(&chars, i) {
in_slash = false;
}
} else if curr == ':' {
if !in_quote && !in_slash {
indices.push(i);
}
}
i += 1;
}
return match indices.len() {
0 => Ok(("", "")),
1 => Ok((
&production[..indices[0]].trim(),
&production[indices[0] + 1..].trim(),
)),
_ => Err(TrailError(format_error(
"Duplicate separator `:`.",
&production[..indices[0]].trim(),
&production[indices[0]..indices[1] + 1].trim(),
))),
};
}
pub fn divide_cfg_into_productions(grammar: &str) -> Result<HashMap<String, String>, TrailError> {
let mut cfg: HashMap<String, String> = HashMap::new();
let mut current = String::new();
let rows = split_cfg_into_rows(grammar);
for row in rows {
let production = row.trim();
if production.is_empty() {
continue;
}
let (head, body) = split_production(production)?;
if head.is_empty() && body.is_empty() {
if current.is_empty() {
return Err(TrailError(format_error(
"Invalid production.",
"",
&production,
)));
}
*(cfg
.get_mut(¤t)
.expect("Key is not empty, thus it has already been added to the CFG.")) +=
&production;
} else {
if contains_special_characters(head) {
let header = format!("Name `{}` contains special characters.", head);
return Err(TrailError(format_error(&header, "", &production)));
}
if cfg.contains_key(head) {
return Err(TrailError(format_error(
"Duplicate production.",
"",
&production,
)));
}
cfg.insert(head.to_string(), body.to_string());
current = head.to_string();
}
}
if !cfg.contains_key("start") {
return Err(TrailError(format_error(
"`start` production rule has not been defined.",
"",
"",
)));
}
return Ok(cfg);
}
fn is_escapable_from_infinite_loop(graph: &SymbolGraph, prospect: &String) -> bool {
let mut visited: Vec<UUId> = Vec::new();
let mut queue: VecDeque<UUId> = graph.heads.iter().cloned().collect();
let (nodes, edges) = (&graph.nodes, &graph.edges);
while !queue.is_empty() {
let vertex = queue.pop_front().unwrap();
if let Symbol::VARIABLE { value, .. } = &nodes[&vertex]
&& value.0 == *prospect
{
continue;
}
let visited_from_vertex = bfs(graph, vec![vertex.clone()]);
if visited_from_vertex.into_iter().all(|x| {
if let Symbol::VARIABLE { value, .. } = &nodes[&x] {
value.0 != *prospect
} else {
true
}
}) {
return true;
}
if !visited.contains(&vertex) {
visited.push(vertex);
queue.extend(edges.get(&vertex).cloned().unwrap_or_default());
}
}
return false;
}
pub fn contains_infinite_loop(graph: &SymbolGraph, head: &String, body: &String) -> bool {
let lexemes =
split_definition_into_lexemes(&body).expect("Expected Vec<String>, but got a TrailError.");
return lexemes.contains(&head) && !is_escapable_from_infinite_loop(&graph, &head);
}
pub fn fetch_excluded_vars(graph: &SymbolGraph, heads: &HashSet<&String>) -> Option<String> {
let excluded = graph.nodes.iter().find_map(|(_, v)| {
if let Symbol::VARIABLE { value, .. } = v {
(!heads.contains(&value.0)).then_some(&value.0)
} else {
None
}
});
match excluded {
Some(content) => {
return Some(content.clone());
}
None => return None,
};
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IdState {
Set(UUId),
Unset,
}
#[derive(Debug, Clone)]
pub struct TrailLayer<'a> {
pub graph: &'a SymbolGraph,
pub id: IdState,
}
#[derive(Debug, Clone)]
pub struct TrailProposal<'a> {
frame: Vec<TrailLayer<'a>>,
value: String,
}
type TrailRefs = HashMap<SymbolTag, String>;
#[derive(Debug)]
pub struct State<T> {
pub proposals: Vec<T>,
pub backrefs: TrailRefs,
}
pub type TrailState<'a> = State<TrailProposal<'a>>;
impl<T> Default for State<T> {
fn default() -> Self {
Self {
proposals: Vec::new(),
backrefs: HashMap::new(),
}
}
}
pub type CFGGraph = HashMap<SymbolVar, SymbolGraph>;
pub type Trail<'a> = (CFGGraph, TrailState<'a>);
pub fn build_cfg_graph(grammar: &str) -> Result<CFGGraph, TrailError> {
let productions = divide_cfg_into_productions(grammar)?;
let (mut graphs, heads): (HashMap<SymbolVar, SymbolGraph>, HashSet<&String>) =
(HashMap::new(), productions.keys().collect());
productions.iter().try_for_each(|(head, body)| {
let graph = build_symbol_graph(body)?;
if contains_infinite_loop(&graph, head, body) {
let context = format!("{}: ", head);
return Err(TrailError(format_error(
"Infinite loop found.",
&context,
body,
)));
}
let excluded = fetch_excluded_vars(&graph, &heads);
match excluded {
Some(value) => {
let header = format!("Production has an undefined variable `{}`.", value);
return Err(TrailError(format_error(
&header,
&format!("{}: ", head),
body,
)));
}
None => (),
}
graphs.insert(SymbolVar(head.clone()), graph);
Ok(())
})?;
return Ok(graphs);
}
pub fn trail_cfg<'a>(cfg: &str) -> Result<Trail<'a>, TrailError> {
return Ok((build_cfg_graph(cfg)?, TrailState::default()));
}
pub fn trail_rex<'a>(rex: &str) -> Result<Trail<'a>, TrailError> {
return Ok((
build_cfg_graph(&format!("start: /{}/", rex))?,
TrailState::default(),
));
}
pub fn trail_exp<'a>(exp: &str) -> Result<Trail<'a>, TrailError> {
return Ok((
build_cfg_graph(&format!("start: {}", exp))?,
TrailState::default(),
));
}
fn trail_run<'a>(schema: &'a CFGGraph, state: &mut TrailState<'a>) {
let (proposals, backrefs) = (&mut state.proposals, &mut state.backrefs);
for proposal in &*proposals {
let checkpoint = proposal.frame.last().unwrap();
let id = if let IdState::Set(uuid) = checkpoint.id {
uuid
} else {
unreachable!()
};
if let Symbol::TERMINAL { value, tags, .. } = &checkpoint.graph.nodes[&id] {
for tag in tags {
backrefs
.entry(tag.clone())
.or_insert_with(String::new)
.push_str(value);
}
}
}
let mut frames = if proposals.is_empty() {
let start = vec![TrailLayer {
graph: &schema[&SymbolVar::new("start")],
id: IdState::Unset,
}];
vec![start]
} else {
proposals.drain(..).map(|proposal| proposal.frame).collect()
};
while !frames.is_empty() {
let mut frame = frames.pop().unwrap();
let checkpoint = frame
.last()
.expect("Empty proposal states are neither processed, nor pushed to the state.");
let (id, graph) = (checkpoint.id, checkpoint.graph);
let nodes = &graph.nodes;
let successors = match id {
IdState::Set(value) => graph.edges.get(&value).cloned().unwrap_or_default(),
IdState::Unset => graph.heads.clone(),
};
if successors.is_empty() {
frame.pop();
if !frame.is_empty() {
frames.push(frame);
}
continue;
}
for successor in &successors {
match &nodes[successor] {
Symbol::TERMINAL { value, .. } => {
let mut next_frame = frame.clone();
next_frame.last_mut().unwrap().id = IdState::Set(*successor);
let next_value = value.clone();
proposals.push(TrailProposal {
frame: next_frame,
value: next_value,
});
}
Symbol::VARIABLE { value, .. } => {
let mut next_frame = frame.clone();
next_frame.last_mut().unwrap().id = IdState::Set(*successor);
let next_layer = TrailLayer {
graph: &schema[&value],
id: IdState::Unset,
};
next_frame.push(next_layer);
frames.push(next_frame);
}
Symbol::REFERENCE { value, .. } => {
let mut next_frame = frame.clone();
next_frame.last_mut().unwrap().id = IdState::Set(*successor);
let reference = &backrefs[value];
let next_value = reference.clone();
proposals.push(TrailProposal {
frame: next_frame,
value: next_value,
});
}
Symbol::END { .. } => {
let mut next_frame = frame.clone();
next_frame.last_mut().unwrap().id = IdState::Set(*successor);
proposals.push(TrailProposal {
frame: next_frame,
value: String::new(),
});
}
}
}
}
}
pub fn get_next_proposals<'a, 'b>(
schema: &'a CFGGraph,
state: &mut TrailState<'a>,
value: &String,
) -> Result<Vec<String>, TrailError> {
let is_initial = state.proposals.is_empty();
state.proposals.retain(|p| p.value == *value);
if state.proposals.is_empty() && !is_initial {
return Err(TrailError(format!(
"Symbol `{}` has no previous state.",
value
)));
}
trail_run(schema, state);
Ok(state
.proposals
.iter()
.map(|p| String::from(&p.value))
.collect())
}