use std::collections::{BTreeSet, HashMap};
use crate::analysis::{self, AnalyzedGrammar, EOF_MARKER};
use crate::grammar::ir::*;
use crate::lowering::{
FirstSet, FirstSetId, FirstSetPool, LookaheadSeq, SyncSet, SyncSetId, SyncSetPool, TokenInfo,
};
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct BlockId(
pub u32,
);
#[derive(Clone, Debug)]
pub enum Target {
Rule(String),
Block(BlockId),
}
#[derive(Clone, Debug)]
pub enum Op {
Enter {
kind: u16,
},
Exit {
kind: u16,
},
Expect {
kind: i16,
token_name: String,
sync: SyncSetId,
},
Call {
target: Target,
},
Opt {
first: FirstSetId,
body: Target,
},
Star {
first: FirstSetId,
body: Target,
},
Dispatch {
arms: Vec<(FirstSetId, Target)>,
has_eps: bool,
sync: SyncSetId,
},
}
#[derive(Clone)]
pub struct Block {
pub label_prefix: String,
pub ops: Vec<Op>,
pub op_labels: Vec<String>,
}
pub struct Program {
pub blocks: Vec<Block>,
pub rule_entry: HashMap<String, BlockId>,
pub rule_order: Vec<String>,
pub public_rule_names: BTreeSet<String>,
pub tokens: Vec<TokenInfo>,
pub rules: Vec<String>,
pub first_sets: FirstSetPool,
pub sync_sets: SyncSetPool,
pub eof_id: i16,
pub error_id: i16,
}
struct Builder<'a> {
ag: &'a AnalyzedGrammar,
blocks: Vec<Block>,
first_sets: FirstSetPool,
first_intern: HashMap<FirstSet, FirstSetId>,
sync_sets: SyncSetPool,
sync_intern: HashMap<SyncSet, SyncSetId>,
current_sync: SyncSetId,
current_symbol: String,
}
pub fn build(ag: &AnalyzedGrammar) -> Program {
let g = &ag.grammar;
let rules = collect_rules(g);
let tokens: Vec<TokenInfo> = g
.tokens
.iter()
.filter(|t| !t.is_fragment)
.enumerate()
.map(|(i, t)| TokenInfo {
name: t.name.clone(),
pattern: resolve_pattern(&t.pattern, g),
skip: t.skip,
kind: (i + 1) as i16,
})
.collect();
let error_id: i16 = parsuna_rt::TOKEN_ERROR;
let eof_id: i16 = parsuna_rt::TOKEN_EOF;
let mut b = Builder {
ag,
blocks: Vec::new(),
first_sets: Vec::new(),
first_intern: HashMap::new(),
sync_sets: Vec::new(),
sync_intern: HashMap::new(),
current_sync: 0,
current_symbol: String::new(),
};
let mut rule_entry: HashMap<String, BlockId> = HashMap::new();
let mut rule_order: Vec<String> = Vec::with_capacity(g.rules.len());
for rule in &g.rules {
let sync = compute_sync(ag, &rule.name, eof_id);
b.current_sync = b.intern_sync(sync);
b.current_symbol = rule.name.clone();
let bid = b.new_block(rule.name.clone());
rule_entry.insert(rule.name.clone(), bid);
rule_order.push(rule.name.clone());
let rule_tail = b.ag.follow_k.get(&rule.name).cloned().unwrap_or_default();
if !rule.is_fragment {
let kind = rule_kind_id(&rules, &rule.name);
let enter_label = format!("{}:enter", rule.name);
let exit_label = format!("{}:exit", rule.name);
b.push_op(bid, Op::Enter { kind }, enter_label);
b.emit_expr(&rule.body, bid, &rule_tail);
b.push_op(bid, Op::Exit { kind }, exit_label);
} else {
b.emit_expr(&rule.body, bid, &rule_tail);
}
}
let public_rule_names: BTreeSet<String> = g
.rules
.iter()
.filter(|r| !r.is_fragment)
.map(|r| r.name.clone())
.collect();
let Builder {
blocks,
first_sets,
sync_sets,
..
} = b;
Program {
blocks,
rule_entry,
rule_order,
public_rule_names,
tokens,
rules,
first_sets,
sync_sets,
eof_id,
error_id,
}
}
impl Builder<'_> {
fn new_block(&mut self, label_prefix: String) -> BlockId {
let id = BlockId(self.blocks.len() as u32);
self.blocks.push(Block {
label_prefix,
ops: Vec::new(),
op_labels: Vec::new(),
});
id
}
fn push_op(&mut self, bid: BlockId, op: Op, label: String) {
let block = &mut self.blocks[bid.0 as usize];
block.ops.push(op);
block.op_labels.push(label);
}
fn intern_first(&mut self, mut seqs: FirstSet) -> FirstSetId {
seqs.sort();
seqs.dedup();
if let Some(id) = self.first_intern.get(&seqs) {
return *id;
}
let id = self.first_sets.len() as FirstSetId;
self.first_intern.insert(seqs.clone(), id);
self.first_sets.push(seqs);
id
}
fn intern_sync(&mut self, mut toks: SyncSet) -> SyncSetId {
toks.sort();
toks.dedup();
if let Some(id) = self.sync_intern.get(&toks) {
return *id;
}
let id = self.sync_sets.len() as SyncSetId;
self.sync_intern.insert(toks.clone(), id);
self.sync_sets.push(toks);
id
}
fn first_of_expr(&self, e: &Expr) -> analysis::FirstSet {
analysis::first_follow::first_of(e, &self.ag.nullable, &self.ag.first, self.ag.k)
}
fn first_ids(&mut self, first: &analysis::FirstSet) -> FirstSetId {
let seqs: FirstSet = first
.iter()
.filter(|seq| !seq.is_empty())
.map(|seq| seq.iter().map(|n| token_kind(self.ag, n)).collect::<LookaheadSeq>())
.collect();
self.intern_first(seqs)
}
fn build_sub(
&mut self,
sub_suffix: &str,
body_expr: &Expr,
tail: &analysis::FirstSet,
) -> BlockId {
let sub_label = format!("{}:{}", self.current_symbol, sub_suffix);
let sub = self.new_block(sub_label.clone());
let saved = std::mem::replace(&mut self.current_symbol, sub_label);
self.emit_expr(body_expr, sub, tail);
self.current_symbol = saved;
sub
}
fn emit_expr(&mut self, e: &Expr, cur: BlockId, tail: &analysis::FirstSet) {
let k = self.ag.k;
match e {
Expr::Empty => {}
Expr::Token(name) => {
let kind = token_kind(self.ag, name);
let label = format!("{}:expect:{}", self.current_symbol, name);
self.push_op(
cur,
Op::Expect {
kind,
token_name: name.clone(),
sync: self.current_sync,
},
label,
);
}
Expr::Rule(name) => {
let label = format!("{}:call:{}", self.current_symbol, name);
self.push_op(
cur,
Op::Call {
target: Target::Rule(name.clone()),
},
label,
);
}
Expr::Seq(xs) => {
let n = xs.len();
let mut succ: Vec<analysis::FirstSet> = vec![tail.clone(); n + 1];
for i in (0..n).rev() {
let fx = self.first_of_expr(&xs[i]);
succ[i] = analysis::first_follow::concat_k(&fx, &succ[i + 1], k);
}
for i in 0..n {
self.emit_expr(&xs[i], cur, &succ[i + 1]);
}
}
Expr::Alt(xs) => {
let firsts: Vec<analysis::FirstSet> =
xs.iter().map(|x| self.first_of_expr(x)).collect();
let has_eps = firsts.iter().any(|f| f.iter().any(|seq| seq.is_empty()));
let mut arms: Vec<(FirstSetId, Target)> = Vec::new();
for (idx, (arm_expr, first)) in xs.iter().zip(firsts.iter()).enumerate() {
if first.iter().all(|seq| seq.is_empty()) {
continue;
}
let predict = analysis::first_follow::concat_k(first, tail, k);
let (non_eps, _) = analysis::first_follow::split_nullable(&predict);
let fid = self.first_ids(&non_eps);
let sub = self.build_sub(&format!("alt{}", idx), arm_expr, tail);
arms.push((fid, Target::Block(sub)));
}
let label = format!("{}:dispatch", self.current_symbol);
self.push_op(
cur,
Op::Dispatch {
arms,
has_eps,
sync: self.current_sync,
},
label,
);
}
Expr::Opt(x) => {
let first = self.first_of_expr(x);
let fid = self.first_ids(&first);
let sub = self.build_sub("opt-body", x, tail);
let label = format!("{}:opt", self.current_symbol);
self.push_op(
cur,
Op::Opt {
first: fid,
body: Target::Block(sub),
},
label,
);
}
Expr::Star(x) => {
let first = self.first_of_expr(x);
let fid = self.first_ids(&first);
let body_tail = analysis::first_follow::concat_k(&first, tail, k);
let sub = self.build_sub("star-body", x, &body_tail);
let label = format!("{}:star", self.current_symbol);
self.push_op(
cur,
Op::Star {
first: fid,
body: Target::Block(sub),
},
label,
);
}
Expr::Plus(x) => {
let first = self.first_of_expr(x);
let fid = self.first_ids(&first);
let body_tail = analysis::first_follow::concat_k(&first, tail, k);
let sub = self.build_sub("plus-body", x, &body_tail);
let call_label = format!("{}:plus-first", self.current_symbol);
self.push_op(
cur,
Op::Call {
target: Target::Block(sub),
},
call_label,
);
let star_label = format!("{}:plus-star", self.current_symbol);
self.push_op(
cur,
Op::Star {
first: fid,
body: Target::Block(sub),
},
star_label,
);
}
}
}
}
fn collect_rules(g: &Grammar) -> Vec<String> {
let mut set: BTreeSet<String> = BTreeSet::new();
for r in &g.rules {
if r.is_fragment {
continue;
}
set.insert(r.name.clone());
}
set.into_iter().collect()
}
fn compute_sync(ag: &AnalyzedGrammar, rule_name: &str, eof_id: i16) -> SyncSet {
let follow = ag.follow.get(rule_name).cloned().unwrap_or_default();
let mut names: Vec<&str> = follow
.iter()
.filter(|t| t.as_str() != EOF_MARKER)
.map(|t| t.as_str())
.collect();
names.sort();
names.dedup();
let mut ids: SyncSet = names.iter().map(|n| token_kind(ag, n)).collect();
ids.push(eof_id);
ids
}
fn token_kind(ag: &AnalyzedGrammar, name: &str) -> i16 {
ag.grammar
.tokens
.iter()
.filter(|t| !t.is_fragment)
.position(|t| t.name == name)
.map(|i| (i + 1) as i16)
.unwrap_or(0)
}
fn resolve_pattern(p: &TokenPattern, g: &Grammar) -> TokenPattern {
match p {
TokenPattern::Empty | TokenPattern::Literal(_) | TokenPattern::Class(_) => p.clone(),
TokenPattern::Ref(n) => match g.token(n) {
Some(td) => resolve_pattern(&td.pattern, g),
None => TokenPattern::Empty,
},
TokenPattern::Seq(xs) => {
TokenPattern::Seq(xs.iter().map(|x| resolve_pattern(x, g)).collect())
}
TokenPattern::Alt(xs) => {
TokenPattern::Alt(xs.iter().map(|x| resolve_pattern(x, g)).collect())
}
TokenPattern::Opt(x) => TokenPattern::Opt(Box::new(resolve_pattern(x, g))),
TokenPattern::Star(x) => TokenPattern::Star(Box::new(resolve_pattern(x, g))),
TokenPattern::Plus(x) => TokenPattern::Plus(Box::new(resolve_pattern(x, g))),
}
}
fn rule_kind_id(rules: &[String], name: &str) -> u16 {
rules
.iter()
.position(|n| n == name)
.map(|i| i as u16)
.unwrap_or(0)
}