use super::{
AltRef as _AltRef, Alternative, Grammar, IdlVersion, Production, ProductionId, RepeatKind,
SpecRef, Symbol, TokenKind, TokenRule,
};
#[must_use]
pub fn compile(grammar: &Grammar) -> CompiledGrammar {
CompileState::new(grammar).compile()
}
#[derive(Debug, Clone)]
pub struct CompiledGrammar {
pub name: &'static str,
pub version: IdlVersion,
pub productions: Vec<Production>,
pub start: ProductionId,
pub token_rules: &'static [TokenRule],
}
impl CompiledGrammar {
#[must_use]
pub fn production(&self, id: ProductionId) -> Option<&Production> {
if let Some(p) = self.productions.get(id.0 as usize) {
if p.id == id {
return Some(p);
}
}
self.productions.iter().find(|p| p.id == id)
}
#[must_use]
pub fn start_production(&self) -> Option<&Production> {
self.production(self.start)
}
#[must_use]
pub fn production_count(&self) -> usize {
self.productions.len()
}
pub fn productions_iter(&self) -> impl Iterator<Item = &Production> {
self.productions.iter()
}
}
impl super::GrammarLike for CompiledGrammar {
fn production(&self, id: ProductionId) -> Option<&Production> {
self.production(id)
}
fn start(&self) -> ProductionId {
self.start
}
fn productions_slice(&self) -> &[Production] {
&self.productions
}
}
struct CompileState<'g> {
base: &'g Grammar,
productions: Vec<Production>,
next_id: u32,
}
impl<'g> CompileState<'g> {
fn new(grammar: &'g Grammar) -> Self {
Self {
base: grammar,
productions: Vec::with_capacity(grammar.productions.len() * 2),
next_id: grammar.productions.len() as u32,
}
}
fn compile(mut self) -> CompiledGrammar {
let mut transformed: Vec<Production> = Vec::with_capacity(self.base.productions.len());
for prod in self.base.productions {
let new_alts: Vec<Alternative> = prod
.alternatives
.iter()
.map(|alt| Alternative {
name: alt.name,
symbols: leak_symbols(self.transform_symbols(alt.symbols)),
note: alt.note,
})
.collect();
transformed.push(Production {
id: prod.id,
name: prod.name,
spec_ref: prod.spec_ref,
alternatives: leak_alternatives(new_alts),
ast_hint: prod.ast_hint,
});
}
let mut all = transformed;
all.append(&mut self.productions);
CompiledGrammar {
name: self.base.name,
version: self.base.version,
productions: all,
start: self.base.start,
token_rules: self.base.token_rules,
}
}
fn transform_symbols(&mut self, symbols: &[Symbol]) -> Vec<Symbol> {
let mut out = Vec::with_capacity(symbols.len());
for sym in symbols {
out.push(self.transform_symbol(sym));
}
out
}
fn transform_symbol(&mut self, sym: &Symbol) -> Symbol {
match sym {
Symbol::Terminal(_) | Symbol::Nonterminal(_) => *sym,
Symbol::Repeat(kind, inner) => {
let inner_transformed = self.transform_symbols(inner);
let new_id = self.alloc_id();
let synth = self.synth_repeat(new_id, *kind, inner_transformed);
self.productions.push(synth);
Symbol::Nonterminal(new_id)
}
Symbol::Choice(branches) => {
let mut alts: Vec<Alternative> = Vec::with_capacity(branches.len());
for branch in *branches {
let bt = self.transform_symbols(branch);
alts.push(Alternative {
name: None,
symbols: leak_symbols(bt),
note: None,
});
}
let new_id = self.alloc_id();
self.productions.push(Production {
id: new_id,
name: leak_name(format!("_chc_{}", new_id.0)),
spec_ref: SYNTHESIZED_SPEC,
alternatives: leak_alternatives(alts),
ast_hint: None,
});
Symbol::Nonterminal(new_id)
}
}
}
fn alloc_id(&mut self) -> ProductionId {
let id = ProductionId(self.next_id);
self.next_id += 1;
id
}
fn synth_repeat(
&mut self,
id: ProductionId,
kind: RepeatKind,
inner: Vec<Symbol>,
) -> Production {
let inner_static: &'static [Symbol] = leak_symbols(inner);
let alts: Vec<Alternative> = match kind {
RepeatKind::ZeroOrMore => vec![
Alternative {
name: Some("empty"),
symbols: &[],
note: None,
},
Alternative {
name: Some("cons"),
symbols: leak_symbols({
let mut v = vec![Symbol::Nonterminal(id)];
v.extend_from_slice(inner_static);
v
}),
note: None,
},
],
RepeatKind::OneOrMore => vec![
Alternative {
name: Some("single"),
symbols: inner_static,
note: None,
},
Alternative {
name: Some("cons"),
symbols: leak_symbols({
let mut v = vec![Symbol::Nonterminal(id)];
v.extend_from_slice(inner_static);
v
}),
note: None,
},
],
RepeatKind::Optional => vec![
Alternative {
name: Some("empty"),
symbols: &[],
note: None,
},
Alternative {
name: Some("present"),
symbols: inner_static,
note: None,
},
],
};
Production {
id,
name: leak_name(format!("_rep_{}", id.0)),
spec_ref: SYNTHESIZED_SPEC,
alternatives: leak_alternatives(alts),
ast_hint: None,
}
}
}
const SYNTHESIZED_SPEC: SpecRef = SpecRef {
doc: "synthesized",
section: "compile",
};
fn leak_symbols(syms: Vec<Symbol>) -> &'static [Symbol] {
Box::leak(syms.into_boxed_slice())
}
fn leak_alternatives(alts: Vec<Alternative>) -> &'static [Alternative] {
Box::leak(alts.into_boxed_slice())
}
fn leak_name(name: String) -> &'static str {
Box::leak(name.into_boxed_str())
}
#[allow(dead_code)]
const _ALT_REF_TYPE: Option<_AltRef> = None;
#[allow(dead_code)]
const _TOKEN_KIND_UNUSED: Option<TokenKind> = None;
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used, clippy::panic)]
use super::*;
use crate::grammar::{Alternative, Grammar};
fn dummy_grammar(productions: &'static [Production]) -> Grammar {
Grammar {
name: "test",
version: IdlVersion::V4_2,
productions,
start: ProductionId(0),
token_rules: &[],
}
}
#[test]
fn compile_grammar_without_repeat_is_structural_copy() {
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Terminal(TokenKind::Keyword("x"))],
note: None,
}],
ast_hint: None,
}];
let compiled = compile(&dummy_grammar(PRODS));
assert_eq!(compiled.production_count(), 1);
assert_eq!(compiled.start, ProductionId(0));
}
#[test]
fn compile_repeat_one_or_more_creates_synth_production() {
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Repeat(
RepeatKind::OneOrMore,
&[Symbol::Terminal(TokenKind::Keyword("x"))],
)],
note: None,
}],
ast_hint: None,
}];
let compiled = compile(&dummy_grammar(PRODS));
assert_eq!(compiled.production_count(), 2);
let a = compiled.production(ProductionId(0)).expect("a");
assert_eq!(a.alternatives[0].symbols.len(), 1);
assert_eq!(
a.alternatives[0].symbols[0],
Symbol::Nonterminal(ProductionId(1))
);
let rep = compiled.production(ProductionId(1)).expect("rep");
assert_eq!(rep.alternatives.len(), 2);
assert_eq!(rep.name, "_rep_1");
}
#[test]
fn compile_repeat_zero_or_more_creates_epsilon_alt() {
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Repeat(
RepeatKind::ZeroOrMore,
&[Symbol::Terminal(TokenKind::Keyword("x"))],
)],
note: None,
}],
ast_hint: None,
}];
let compiled = compile(&dummy_grammar(PRODS));
let rep = compiled.production(ProductionId(1)).expect("rep");
assert_eq!(rep.alternatives.len(), 2);
assert_eq!(rep.alternatives[0].symbols, &[] as &[Symbol]);
assert_eq!(rep.alternatives[0].name, Some("empty"));
}
#[test]
fn compile_repeat_optional_creates_two_alts() {
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Repeat(
RepeatKind::Optional,
&[Symbol::Terminal(TokenKind::Keyword("x"))],
)],
note: None,
}],
ast_hint: None,
}];
let compiled = compile(&dummy_grammar(PRODS));
let rep = compiled.production(ProductionId(1)).expect("rep");
assert_eq!(rep.alternatives.len(), 2);
assert_eq!(rep.alternatives[0].name, Some("empty"));
assert_eq!(rep.alternatives[1].name, Some("present"));
}
#[test]
fn compile_choice_creates_one_alt_per_branch() {
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Choice(&[
&[Symbol::Terminal(TokenKind::Keyword("x"))],
&[Symbol::Terminal(TokenKind::Keyword("y"))],
])],
note: None,
}],
ast_hint: None,
}];
let compiled = compile(&dummy_grammar(PRODS));
let chc = compiled.production(ProductionId(1)).expect("chc");
assert_eq!(chc.alternatives.len(), 2);
assert_eq!(chc.name, "_chc_1");
}
#[test]
fn engine_recognizes_grammar_with_one_or_more_repeat() {
use crate::engine::Engine;
use crate::lexer::Token;
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Repeat(
RepeatKind::OneOrMore,
&[Symbol::Terminal(TokenKind::Keyword("x"))],
)],
note: None,
}],
ast_hint: None,
}];
let g = dummy_grammar(PRODS);
let engine = Engine::new(&g);
let one = [Token::synthetic(TokenKind::Keyword("x"))];
assert!(engine.recognize(&one).is_ok());
let five = [Token::synthetic(TokenKind::Keyword("x")); 5];
assert!(engine.recognize(&five).is_ok());
let zero: [Token<'static>; 0] = [];
assert!(engine.recognize(&zero).is_err());
}
#[test]
fn engine_recognizes_grammar_with_zero_or_more_repeat() {
use crate::engine::Engine;
use crate::lexer::Token;
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Repeat(
RepeatKind::ZeroOrMore,
&[Symbol::Terminal(TokenKind::Keyword("x"))],
)],
note: None,
}],
ast_hint: None,
}];
let g = dummy_grammar(PRODS);
let engine = Engine::new(&g);
let zero: [Token<'static>; 0] = [];
assert!(engine.recognize(&zero).is_ok());
let three = [Token::synthetic(TokenKind::Keyword("x")); 3];
assert!(engine.recognize(&three).is_ok());
}
#[test]
fn engine_recognizes_grammar_with_optional_repeat() {
use crate::engine::Engine;
use crate::lexer::Token;
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[
Symbol::Repeat(
RepeatKind::Optional,
&[Symbol::Terminal(TokenKind::Keyword("x"))],
),
Symbol::Terminal(TokenKind::Keyword("y")),
],
note: None,
}],
ast_hint: None,
}];
let g = dummy_grammar(PRODS);
let engine = Engine::new(&g);
let y_only = [Token::synthetic(TokenKind::Keyword("y"))];
assert!(engine.recognize(&y_only).is_ok());
let xy = [
Token::synthetic(TokenKind::Keyword("x")),
Token::synthetic(TokenKind::Keyword("y")),
];
assert!(engine.recognize(&xy).is_ok());
}
#[test]
fn engine_recognizes_grammar_with_choice() {
use crate::engine::Engine;
use crate::lexer::Token;
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[
Symbol::Choice(&[
&[Symbol::Terminal(TokenKind::Keyword("x"))],
&[Symbol::Terminal(TokenKind::Keyword("y"))],
]),
Symbol::Terminal(TokenKind::Keyword("z")),
],
note: None,
}],
ast_hint: None,
}];
let g = dummy_grammar(PRODS);
let engine = Engine::new(&g);
let xz = [
Token::synthetic(TokenKind::Keyword("x")),
Token::synthetic(TokenKind::Keyword("z")),
];
assert!(engine.recognize(&xz).is_ok());
let yz = [
Token::synthetic(TokenKind::Keyword("y")),
Token::synthetic(TokenKind::Keyword("z")),
];
assert!(engine.recognize(&yz).is_ok());
}
#[test]
fn compile_nested_repeat_creates_chained_synthetics() {
const PRODS: &[Production] = &[Production {
id: ProductionId(0),
name: "a",
spec_ref: SpecRef {
doc: "T",
section: "0",
},
alternatives: &[Alternative {
name: None,
symbols: &[Symbol::Repeat(
RepeatKind::ZeroOrMore,
&[Symbol::Repeat(
RepeatKind::OneOrMore,
&[Symbol::Terminal(TokenKind::Keyword("x"))],
)],
)],
note: None,
}],
ast_hint: None,
}];
let compiled = compile(&dummy_grammar(PRODS));
assert_eq!(compiled.production_count(), 3);
}
}