use mpl::rules::Rule;
use mpl::symbols::{Metasymbol, TerminalSymbol, E};
use std::collections::HashMap;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FirstSet {
bytes: [bool; 256],
pub any_byte: bool,
pub nullable: bool,
}
impl Default for FirstSet {
fn default() -> Self {
Self {
bytes: [false; 256],
any_byte: false,
nullable: false,
}
}
}
impl FirstSet {
pub fn iter_bytes(&self) -> impl Iterator<Item = u8> + '_ {
(0u32..256).filter_map(move |i| {
if self.bytes[i as usize] {
Some(i as u8)
} else {
None
}
})
}
pub fn byte_count(&self) -> usize {
self.bytes.iter().filter(|&&b| b).count()
}
fn add_byte(&mut self, b: u8) -> bool {
if !self.bytes[b as usize] {
self.bytes[b as usize] = true;
true
} else {
false
}
}
fn union_in(&mut self, other: &FirstSet) -> bool {
let mut changed = false;
for i in 0..256 {
if other.bytes[i] && !self.bytes[i] {
self.bytes[i] = true;
changed = true;
}
}
if other.any_byte && !self.any_byte {
self.any_byte = true;
changed = true;
}
if other.nullable && !self.nullable {
self.nullable = true;
changed = true;
}
changed
}
pub fn is_disjoint(&self, other: &FirstSet) -> bool {
if self.nullable && other.nullable {
return false;
}
if self.any_byte && (other.any_byte || other.bytes.iter().any(|&b| b)) {
return false;
}
if other.any_byte && self.bytes.iter().any(|&b| b) {
return false;
}
for i in 0..256 {
if self.bytes[i] && other.bytes[i] {
return false;
}
}
true
}
}
fn first_of_e(e: &E<&str, &str>, firsts: &HashMap<String, FirstSet>) -> FirstSet {
match e {
E::V(v) => firsts.get(*v).cloned().unwrap_or_default(),
E::T(TerminalSymbol::Metasymbol(m)) => match m {
Metasymbol::Empty => FirstSet {
nullable: true,
..Default::default()
},
Metasymbol::Failure => FirstSet::default(),
Metasymbol::Any(n) => FirstSet {
any_byte: *n > 0,
nullable: *n == 0,
..Default::default()
},
Metasymbol::All => FirstSet {
any_byte: true,
nullable: true,
..Default::default()
},
Metasymbol::Omit => FirstSet::default(),
},
E::T(TerminalSymbol::Original(raw)) => first_of_original(raw),
}
}
fn first_of_original(raw: &str) -> FirstSet {
let parsed = match syn::parse_str::<syn::Expr>(raw) {
Ok(e) => e,
Err(_) => {
return FirstSet {
any_byte: true,
..Default::default()
};
}
};
if let syn::Expr::Call(call) = &parsed {
if let syn::Expr::Path(p) = &*call.func {
if p.path.is_ident("Char") {
if let Some(syn::Expr::Lit(syn::ExprLit {
lit: syn::Lit::Char(c),
..
})) = call.args.first()
{
let mut fs = FirstSet::default();
fs.add_byte(c.value() as u32 as u8);
return fs;
}
} else if p.path.is_ident("Str") {
if let Some(syn::Expr::Lit(syn::ExprLit {
lit: syn::Lit::Str(s),
..
})) = call.args.first()
{
let bytes = s.value();
let mut fs = FirstSet::default();
if let Some(&b) = bytes.as_bytes().first() {
fs.add_byte(b);
} else {
fs.nullable = true;
}
return fs;
}
}
}
}
FirstSet {
any_byte: true,
..Default::default()
}
}
fn first_of_seq(
b: &E<&str, &str>,
c: &E<&str, &str>,
firsts: &HashMap<String, FirstSet>,
) -> FirstSet {
let fb = first_of_e(b, firsts);
let fc = first_of_e(c, firsts);
let mut out = fb.clone();
if fb.nullable {
out.union_in(&fc);
}
out.nullable = fb.nullable && fc.nullable;
out
}
pub fn compute_first_sets<'a>(rules: &[&'a Rule<&'a str, &'a str>]) -> HashMap<String, FirstSet> {
let mut firsts: HashMap<String, FirstSet> = HashMap::new();
for r in rules {
firsts.insert(r.value.to_string(), FirstSet::default());
}
loop {
let mut changed = false;
for r in rules {
let first_choice = first_of_seq(&r.equal.first.lhs, &r.equal.first.rhs, &firsts);
let second_choice = first_of_e(&r.equal.second.0, &firsts);
let mut union = first_choice;
union.union_in(&second_choice);
let cur = firsts.get_mut(r.value).unwrap();
if cur.union_in(&union) {
changed = true;
}
}
if !changed {
break;
}
}
firsts
}
pub fn classify_rules<'a>(
rules: &[&'a Rule<&'a str, &'a str>],
firsts: &HashMap<String, FirstSet>,
) -> Vec<(String, bool)> {
rules
.iter()
.map(|r| {
let first_choice = first_of_seq(&r.equal.first.lhs, &r.equal.first.rhs, firsts);
let second_choice = first_of_e(&r.equal.second.0, firsts);
let needs_memo = !first_choice.is_disjoint(&second_choice);
(r.value.to_string(), needs_memo)
})
.collect()
}