use std::collections::{HashMap, BTreeMap, BTreeSet};
use std::cmp::Ordering;
use std::mem;
use std::iter;
use bit_matrix::BitMatrix;
use gearley::grammar::{Grammar, BinarizedGrammar, History};
use cfg::classification::cyclical::Cycles;
use cfg::remap::{Remap, Mapping};
use cfg::rule::RuleRef;
use cfg::rule::container::RuleContainer;
use cfg::classification::useful::Usefulness;
use cfg::*;
use cfg_regex::{RegexTranslation, Class};
use input::{ast, LEXER_START_FRAGMENT, LexerId, FragmentId};
use input::attr_arguments::AttrArguments;
use middle::error::{TransformationError, CycleWithCauses};
use middle::trace::Trace;
use middle::rule_rewrite::{RuleValue, Rule, Sym, RuleRewrite, LowerRuleRewriteResult, RuleRewriteResult};
use middle::flatten_stmts::{FlattenStmts, Path, Position};
use middle::type_collector::{Type, TypeCollector};
use middle::warn::{WarningCauses, WarningsWithContext};
use middle::embedded_string::EmbeddedString;
pub struct Ir {
pub grammar: BinarizedGrammar,
pub nulling_grammar: BinarizedGrammar,
pub trivial_derivation: bool,
pub rules: LowerRuleRewriteResult,
pub type_map: BTreeMap<Path, BTreeSet<Type>>,
pub maps: InternalExternalNameMap,
pub attr_arguments: AttrArguments,
pub lexer_layer: LexerLayer,
pub trace: Trace,
pub errors: Vec<TransformationError>,
}
#[derive(Clone)]
pub struct SymMap {
pub sym_map: HashMap<Sym, Symbol>,
pub sym_vec: Vec<Option<Sym>>,
}
pub struct InternalExternalNameMap {
internal_external: Mapping,
sym_map: SymMap,
}
impl SymMap {
fn new() -> Self {
SymMap {
sym_map: HashMap::new(),
sym_vec: vec![],
}
}
pub fn intern(&mut self, grammar: &mut Grammar, internal_sym: &Sym) -> Symbol {
if let Some(&symbol) = self.sym_map.get(internal_sym) {
symbol
} else {
let new_sym: Symbol = grammar.sym();
self.insert(new_sym, internal_sym.clone());
new_sym
}
}
fn insert(&mut self, sym: Symbol, internal_sym: Sym) {
self.insert_padding(sym);
self.sym_vec[sym.usize()] = Some(internal_sym.clone());
self.sym_map.insert(internal_sym, sym);
}
fn insert_padding(&mut self, symbol: Symbol) {
if self.sym_vec.len() <= symbol.usize() {
let pad_len = symbol.usize() - self.sym_vec.len() + 1;
self.sym_vec.extend(iter::repeat(None).take(pad_len));
}
}
fn get(&self, sym: Symbol) -> Option<Sym> {
self.sym_vec.get(sym.usize()).and_then(|s| s.clone())
}
pub fn syms(&self) -> &[Option<Sym>] {
&self.sym_vec[..]
}
}
impl InternalExternalNameMap {
fn new(internal_external: Mapping, sym_map: SymMap) -> Self {
InternalExternalNameMap {
internal_external: internal_external,
sym_map,
}
}
pub fn internalize(&self, sym: Symbol) -> Option<Symbol> {
self.internal_external.to_internal[sym.usize()]
}
pub fn externalize(&self, sym: Symbol) -> Symbol {
self.internal_external.to_external[sym.usize()]
}
pub fn sym_of_external(&self, sym: Symbol) -> Option<Sym> {
self.sym_map.get(sym)
}
fn sym_map(&self) -> &SymMap {
&self.sym_map
}
}
#[derive(Debug)]
pub enum LexerLayer {
Invoke {
lexer_id: LexerId,
embedded_strings: Vec<String>,
},
CharClassifier(Vec<(Class, Symbol)>),
None
}
struct IrStmts {
stmts: ast::Stmts,
start: FragmentId,
}
struct IrWithRules {
rules: RuleRewriteResult,
start: FragmentId,
attr_arguments: AttrArguments,
errors: Vec<TransformationError>,
embedded_strings: Vec<String>,
type_collector: TypeCollector,
lexer_layer: LexerLayer,
trace: Trace,
}
struct IrPrepared {
grammar: Grammar,
common: CommonPrepared,
}
struct IrBinarized {
bin_grammar: BinarizedGrammar,
common: CommonPrepared,
}
struct IrNormalized {
bin_grammar: BinarizedGrammar,
nulling_grammar: BinarizedGrammar,
warnings: WarningCauses,
trivial_derivation: bool,
common: CommonPrepared,
}
struct IrProperNormalized {
bin_grammar: BinarizedGrammar,
nulling_grammar: BinarizedGrammar,
warnings: WarningCauses,
trivial_derivation: bool,
common: CommonPrepared,
}
pub struct IrMapped {
bin_mapped_grammar: BinarizedGrammar,
nulling_grammar: BinarizedGrammar,
maps: InternalExternalNameMap,
warnings: WarningCauses,
trivial_derivation: bool,
common: CommonPrepared,
}
struct CommonPrepared {
rules: LowerRuleRewriteResult,
attr_arguments: AttrArguments,
errors: Vec<TransformationError>,
trace: Trace,
lexer_layer: LexerLayer,
sym_map: SymMap,
type_map: BTreeMap<Path, BTreeSet<Type>>,
}
impl IrStmts {
fn compute(mut stmts: ast::Stmts) -> Result<Self, TransformationError> {
let start = if let Some(first_stmt) = stmts.stmts.get(0) {
first_stmt.lhs
} else {
return Err(TransformationError::GrammarIsEmpty);
};
if let Some(ref lexer_arguments) = stmts.attr_arguments.lexer_arguments {
for &terminal in &lexer_arguments.terminals {
let lower_start_stmt = ast::Stmt {
lhs: LEXER_START_FRAGMENT,
body: vec![
(
0,
ast::Rhs(vec![
ast::RhsElement {
bind: None,
elem: ast::RhsAst::Fragment(terminal)
}
]),
ast::Action { expr: None }
)
],
ty: None,
};
stmts.stmts.push(lower_start_stmt);
}
}
Ok(IrStmts {
stmts,
start,
})
}
}
impl IrWithRules {
fn compute(ir: IrStmts) -> Result<Self, TransformationError> {
let mut trace = Trace::from_stmts(&ir.stmts);
let (rules, type_collector) = {
let mut rule_rewrite = RuleRewrite::new(&mut trace);
let mut flatten = FlattenStmts::new();
flatten.flatten_stmts(&ir.stmts);
rule_rewrite.rewrite(flatten.paths.clone());
let mut type_collector = TypeCollector::new();
type_collector.collect(flatten.join_stmts());
type_collector.simplify_tuples();
(rule_rewrite.result(), type_collector)
};
let mut errors = vec![];
if let Some(inequalities) = type_collector.check_type_equality(&trace) {
errors.extend(inequalities.into_iter().map(|spans|
TransformationError::TypeMismatch(spans)
));
}
let lexer_layer = if let Some(lexer_id) = ir.stmts.lexer {
LexerLayer::Invoke {
lexer_id,
embedded_strings: vec![]
}
} else {
LexerLayer::None
};
Ok(IrWithRules {
rules,
start: ir.start,
trace,
lexer_layer,
attr_arguments: ir.stmts.attr_arguments,
embedded_strings: vec![],
errors,
type_collector,
})
}
}
impl IrPrepared {
fn compute(mut ir: IrWithRules) -> Self {
let mut grammar = Grammar::new();
let mut sym_map = SymMap::new();
let embedded_strings = ir.embedded_strings.clone();
let lexer_layer = if !ir.embedded_strings.is_empty() {
match ir.lexer_layer {
LexerLayer::Invoke { lexer_id, .. } => {
LexerLayer::Invoke {
embedded_strings: embedded_strings,
lexer_id: lexer_id,
}
}
LexerLayer::None => {
if ir.attr_arguments.lexer_arguments.is_some() {
LexerLayer::CharClassifier(vec![])
} else {
LexerLayer::Invoke {
lexer_id: 0,
embedded_strings: embedded_strings,
}
}
}
LexerLayer::CharClassifier(_) => unreachable!()
}
} else {
ir.lexer_layer
};
let start_sym = Sym::Fragment(ir.start);
let start = sym_map.intern(&mut grammar, &start_sym);
grammar.set_start(start);
let grammar_rules = ir.rules.lower(&mut grammar, &mut sym_map);
for rule in &grammar_rules.rules {
IrPrepared::add_rule(&mut grammar, &mut sym_map, rule);
}
grammar.rewrite_sequences();
IrPrepared {
grammar: grammar,
common: CommonPrepared {
rules: grammar_rules,
attr_arguments: ir.attr_arguments,
errors: ir.errors,
trace: ir.trace,
lexer_layer,
sym_map,
type_map: ir.type_collector.types,
}
}
}
fn add_rule(grammar: &mut Grammar, sym_map: &mut SymMap, rule: &Rule) {
match rule {
&Rule { lhs, ref rhs, sequence: Some((min, max)), .. } => {
let rhs = rhs.first().cloned().unwrap();
grammar.sequence(lhs).inclusive(min, max).rhs(rhs);
}
&Rule { lhs, ref rhs, sequence: None, .. } => {
grammar.rule(lhs).rhs(&rhs[..]);
}
}
}
}
impl IrBinarized {
fn compute(mut ir: IrPrepared) -> Result<Self, TransformationError> {
let bin_grammar = ir.grammar.binarize();
Ok(IrBinarized {
bin_grammar: bin_grammar,
common: ir.common,
})
}
}
impl IrNormalized {
fn compute(ir: IrBinarized) -> Self {
let _start = ir.bin_grammar.start();
let (bin_grammar, mut nulling_grammar) = ir.bin_grammar.eliminate_nulling();
let start = bin_grammar.start();
let trivial_derivation = nulling_grammar.rules().any(|rule| rule.lhs() == start);
let mut null_cycle_matrix = BitMatrix::new(bin_grammar.num_syms(), bin_grammar.num_syms());
for rule in nulling_grammar.rules() {
for sym in rule.rhs() {
null_cycle_matrix.set(rule.lhs().usize(), sym.usize(), true);
}
}
null_cycle_matrix.transitive_closure();
let mut warnings = WarningCauses::new();
nulling_grammar.retain(|lhs, rhs, history| {
let is_in_cycle = |sym: &Symbol| null_cycle_matrix[(sym.usize(), sym.usize())];
let lhs_once = [lhs];
let mut syms = lhs_once.iter().chain(rhs.iter());
if syms.any(is_in_cycle) {
warnings.cycles_among_nullable.push(history.origin().expect("internal rule with a cycle"));
false
} else {
true
}
});
IrNormalized {
bin_grammar: bin_grammar,
nulling_grammar: nulling_grammar,
warnings: warnings,
trivial_derivation: trivial_derivation,
common: ir.common,
}
}
}
impl IrProperNormalized {
fn compute(mut ir: IrNormalized) -> Self {
{
let start = ir.bin_grammar.start();
let mut usefulness = Usefulness::new(&mut *ir.bin_grammar).reachable([start]);
for rule_uselessness in usefulness.useless_rules() {
if rule_uselessness.unreachable {
if let Some(origin) = rule_uselessness.rule.history().origin() {
ir.warnings.unreachable_rules.push(origin);
}
} else {
let rule = rule_uselessness.rule;
for (pos, &sym) in rule.rhs().iter().enumerate() {
if !usefulness.productivity(sym) {
let (dot_a, dot_b) = (rule.history().dot(pos), rule.history().dot(pos + 1));
match (dot_a.trace(), dot_b.trace()) {
(Some(dot1), Some(dot2)) => {
if dot1.0 == dot2.0 && dot1.1 + 1 == dot2.1 {
ir.warnings.unproductive_rules.push((dot1.0, dot1.1));
}
}
_ => {}
}
}
}
}
}
usefulness.remove_useless_rules();
};
let to_rule_origin = |rule: RuleRef<History>| {
rule.history().origin().expect("internal rule in a cycle")
};
{
let mut cycle_analysis = Cycles::new(&mut *ir.bin_grammar);
ir.warnings.cycles = cycle_analysis.cycle_participants().map(to_rule_origin).collect();
cycle_analysis.remove_cycles();
};
IrProperNormalized {
bin_grammar: ir.bin_grammar,
nulling_grammar: ir.nulling_grammar,
warnings: ir.warnings,
trivial_derivation: ir.trivial_derivation,
common: ir.common,
}
}
}
impl IrMapped {
fn compute(mut ir: IrProperNormalized) -> Result<Self, TransformationError> {
let IrProperNormalized { mut bin_grammar, .. } = ir;
let mut ordering = HashMap::new();
for rule in bin_grammar.rules() {
if rule.rhs().len() == 1 {
let mut left = rule.lhs();
let mut right = rule.rhs()[0];
let ord;
if left.usize() > right.usize() {
mem::swap(&mut left, &mut right);
ord = Ordering::Less;
} else {
ord = Ordering::Greater;
}
ordering.insert((left, right), ord);
}
}
let maps = {
let mut remap = Remap::new(&mut *bin_grammar);
remap.reorder_symbols(|left, right| {
let should_swap = left.usize() > right.usize();
let ord = if should_swap {
ordering.get(&(right, left)).cloned().map(|ord| ord.reverse())
} else {
ordering.get(&(left, right)).cloned()
};
ord.unwrap_or(Ordering::Equal)
});
remap.remove_unused_symbols();
remap.get_mapping()
};
let maps = InternalExternalNameMap::new(maps, ir.common.sym_map.clone());
if let Some(start) = maps.internalize(bin_grammar.start()) {
bin_grammar.set_start(start);
} else {
ir.common.errors.push(TransformationError::GrammarIsEmpty);
};
let mut bin = BinarizedGrammar::new();
for _ in 0 .. bin_grammar.num_syms() {
let _: Symbol = bin.sym();
}
for rule in bin_grammar.rules() {
let mut history = rule.history().clone();
history.nullable = history.nullable.map(|(sym, pos)| (maps.internalize(sym).unwrap(), pos));
bin.rule(rule.lhs()).rhs_with_history(rule.rhs(), history);
}
let mut remapped_nulling_grammar = BinarizedGrammar::new();
if let Some(start) = maps.internalize(ir.nulling_grammar.start()) {
remapped_nulling_grammar.set_start(start);
}
for _ in 0 .. bin_grammar.num_syms() {
let _: Symbol = remapped_nulling_grammar.sym();
}
bin.set_start(bin_grammar.start());
for rule in ir.nulling_grammar.rules() {
let lhs = maps.internalize(rule.lhs()).unwrap();
let rhs: Vec<_>;
rhs = rule.rhs().iter().map(|sym| maps.internalize(*sym).unwrap()).collect();
let history = rule.history().clone();
remapped_nulling_grammar.rule(lhs).rhs_with_history(&rhs, history);
}
Ok(IrMapped {
bin_mapped_grammar: bin,
nulling_grammar: remapped_nulling_grammar,
maps: maps,
warnings: ir.warnings,
trivial_derivation: ir.trivial_derivation,
common: ir.common,
})
}
pub fn transform_from_stmts(stmts: ast::Stmts) -> Result<Self, TransformationError> {
let ir = IrStmts::compute(stmts)?;
let ir = IrWithRules::compute(ir)?;
let ir = IrPrepared::compute(ir);
let ir = IrBinarized::compute(ir)?;
let ir = IrNormalized::compute(ir);
let ir = IrProperNormalized::compute(ir);
IrMapped::compute(ir)
}
pub fn report_warnings(&self) {
unimplemented!()
}
pub fn get_errors(&self) -> Option<&[TransformationError]> {
if self.common.errors.is_empty() {
None
} else {
Some(&self.common.errors[..])
}
}
}
impl From<IrMapped> for Ir {
fn from(ir: IrMapped) -> Self {
Ir {
grammar: ir.bin_mapped_grammar,
nulling_grammar: ir.nulling_grammar,
trivial_derivation: ir.trivial_derivation,
rules: ir.common.rules,
type_map: ir.common.type_map,
maps: ir.maps,
errors: ir.common.errors,
lexer_layer: ir.common.lexer_layer,
attr_arguments: ir.common.attr_arguments,
trace: ir.common.trace,
}
}
}
impl Ir {
pub fn transform(stmts: ast::Stmts) -> Result<Self, TransformationError> {
IrMapped::transform_from_stmts(stmts).map(|ir| ir.into())
}
pub fn internalize(&self, symbol: Symbol) -> Option<Symbol> {
self.maps.internalize(symbol)
}
pub fn externalize(&self, symbol: Symbol) -> Symbol {
self.maps.externalize(symbol)
}
pub fn sym_of_external(&self, symbol: Symbol) -> Option<Sym> {
self.maps.sym_of_external(symbol)
}
pub fn sym_map(&self) -> &SymMap {
self.maps.sym_map()
}
}