use crate::generator::cut::FirstSet;
use mpl::rules::Rule;
use mpl::symbols::{Metasymbol, TerminalSymbol, E};
use std::collections::{BTreeSet, HashMap};
fn extract_char_byte(raw: &str) -> Option<u8> {
let parsed: syn::Expr = syn::parse_str(raw).ok()?;
let call = match parsed {
syn::Expr::Call(c) => c,
_ => return None,
};
let path = match &*call.func {
syn::Expr::Path(p) => &p.path,
_ => return None,
};
if !path.is_ident("Char") {
return None;
}
let arg = call.args.first()?;
let lit = match arg {
syn::Expr::Lit(syn::ExprLit {
lit: syn::Lit::Char(c),
..
}) => c,
_ => return None,
};
let v = lit.value() as u32;
if v < 256 {
Some(v as u8)
} else {
None
}
}
fn is_empty_meta(e: &E<&str, &str>) -> bool {
matches!(e, E::T(TerminalSymbol::Metasymbol(Metasymbol::Empty)))
}
fn is_failure_meta(e: &E<&str, &str>) -> bool {
matches!(e, E::T(TerminalSymbol::Metasymbol(Metasymbol::Failure)))
}
fn try_resolve_byte_set<'a>(
e: &E<&'a str, &'a str>,
result: &HashMap<&'a str, BTreeSet<u8>>,
) -> Option<BTreeSet<u8>> {
match e {
E::T(TerminalSymbol::Original(raw)) => {
extract_char_byte(raw).map(|b| {
let mut s = BTreeSet::new();
s.insert(b);
s
})
}
E::V(name) => result.get(name).cloned(),
_ => None,
}
}
fn classify_one<'a>(
rule: &Rule<&'a str, &'a str>,
result: &HashMap<&'a str, BTreeSet<u8>>,
) -> Option<BTreeSet<u8>> {
if !is_empty_meta(&rule.equal.first.rhs) {
return None;
}
let mut bytes = try_resolve_byte_set(&rule.equal.first.lhs, result)?;
let second = &rule.equal.second.0;
if !is_failure_meta(second) {
let extra = try_resolve_byte_set(second, result)?;
bytes.extend(extra);
}
Some(bytes)
}
pub fn is_left_recursive(rule: &Rule<&str, &str>) -> bool {
matches!(&rule.equal.first.lhs, E::V(name) if *name == rule.value)
}
pub fn is_tail_loop(rule: &Rule<&str, &str>) -> bool {
let rhs_self = match &rule.equal.first.rhs {
E::V(name) => *name == rule.value,
_ => false,
};
if !rhs_self {
return false;
}
matches!(
&rule.equal.second.0,
E::T(TerminalSymbol::Metasymbol(Metasymbol::Empty))
)
}
pub fn analyze_char_classes<'a>(
rules: &[&'a Rule<&'a str, &'a str>],
) -> HashMap<&'a str, BTreeSet<u8>> {
let mut result: HashMap<&'a str, BTreeSet<u8>> = HashMap::new();
loop {
let mut changed = false;
for r in rules {
if result.contains_key(r.value) {
continue;
}
if let Some(set) = classify_one(r, &result) {
result.insert(r.value, set);
changed = true;
}
}
if !changed {
break;
}
}
result
}
pub struct DispatchCascade<'a> {
pub alternatives: Vec<DispatchAlt<'a>>,
}
pub struct DispatchAlt<'a> {
pub rule_name: &'a str,
pub first: FirstSet,
}
fn collect_cascade_alternatives<'a>(
rule_name: &'a str,
rules: &HashMap<&'a str, &'a Rule<&'a str, &'a str>>,
visited: &mut BTreeSet<&'a str>,
out: &mut Vec<&'a str>,
) -> bool {
if !visited.insert(rule_name) {
return false;
}
let rule = match rules.get(rule_name) {
Some(r) => r,
None => return false,
};
if !is_empty_meta(&rule.equal.first.rhs) {
return false;
}
let lhs_name = match &rule.equal.first.lhs {
E::V(v) => *v,
_ => return false,
};
out.push(lhs_name);
match &rule.equal.second.0 {
E::T(TerminalSymbol::Metasymbol(Metasymbol::Failure)) => true,
E::V(next) => collect_cascade_alternatives(next, rules, visited, out),
_ => false,
}
}
pub fn analyze_first_byte_dispatch<'a>(
rules: &[&'a Rule<&'a str, &'a str>],
firsts: &HashMap<String, FirstSet>,
char_classes: &HashMap<&'a str, BTreeSet<u8>>,
) -> HashMap<&'a str, DispatchCascade<'a>> {
let rule_map: HashMap<&str, &Rule<&str, &str>> =
rules.iter().map(|r| (r.value, *r)).collect();
let mut result = HashMap::new();
for r in rules {
if char_classes.contains_key(r.value) {
continue;
}
let mut alts = Vec::new();
let mut visited = BTreeSet::new();
if !collect_cascade_alternatives(r.value, &rule_map, &mut visited, &mut alts) {
continue;
}
if alts.len() < 5 {
continue;
}
let alt_firsts: Vec<DispatchAlt> = alts
.into_iter()
.filter_map(|name| {
let f = firsts.get(name)?.clone();
if f.any_byte {
return None;
}
Some(DispatchAlt {
rule_name: name,
first: f,
})
})
.collect();
let any_class = alt_firsts.iter().any(|a| a.first.byte_count() >= 3);
if !any_class {
continue;
}
if alt_firsts.iter().any(|a| a.first.byte_count() == 0) {
continue;
}
result.insert(
r.value,
DispatchCascade {
alternatives: alt_firsts,
},
);
}
result
}