#[cfg(test)]
use crate::ast::Repetition;
use crate::ast::{Alternatives, BaseFactor, Factor, IxmlGrammar, Mark, Rule, Sequence};
use std::collections::{HashMap, HashSet};
pub fn normalize_grammar(grammar: &IxmlGrammar) -> IxmlGrammar {
let rule_map: HashMap<String, &Rule> =
grammar.rules.iter().map(|r| (r.name.clone(), r)).collect();
let recursive_rules = find_recursive_rules(grammar, &rule_map);
println!(
"[normalize] Found {} recursive rules: {:?}",
recursive_rules.len(),
recursive_rules
);
let mut normalized_rules = Vec::new();
for rule in &grammar.rules {
let mut normalized_rule = rule.clone();
inline_in_alternatives(
&mut normalized_rule.alternatives,
&rule_map,
&recursive_rules,
);
normalized_rules.push(normalized_rule);
}
let start_rule_name = grammar.rules.first().map(|r| r.name.clone());
normalized_rules
.retain(|r| recursive_rules.contains(&r.name) || start_rule_name.as_ref() == Some(&r.name));
println!(
"[normalize] Reduced from {} rules to {} rules",
grammar.rules.len(),
normalized_rules.len()
);
IxmlGrammar::new(normalized_rules)
}
fn find_recursive_rules(
grammar: &IxmlGrammar,
rule_map: &HashMap<String, &Rule>,
) -> HashSet<String> {
let mut recursive = HashSet::new();
for rule in &grammar.rules {
let mut visited = HashSet::new();
if is_recursive(&rule.name, rule_map, &mut visited) {
recursive.insert(rule.name.clone());
}
}
recursive
}
fn is_recursive(
rule_name: &str,
rule_map: &HashMap<String, &Rule>,
visited: &mut HashSet<String>,
) -> bool {
if visited.contains(rule_name) {
return true;
}
let rule = match rule_map.get(rule_name) {
Some(r) => r,
None => return false, };
visited.insert(rule_name.to_string());
let is_rec = check_alternatives_for_recursion(&rule.alternatives, rule_name, rule_map, visited);
visited.remove(rule_name);
is_rec
}
fn check_alternatives_for_recursion(
alternatives: &Alternatives,
target_rule: &str,
rule_map: &HashMap<String, &Rule>,
visited: &mut HashSet<String>,
) -> bool {
for seq in &alternatives.alts {
if check_sequence_for_recursion(seq, target_rule, rule_map, visited) {
return true;
}
}
false
}
fn check_sequence_for_recursion(
seq: &Sequence,
target_rule: &str,
rule_map: &HashMap<String, &Rule>,
visited: &mut HashSet<String>,
) -> bool {
for factor in &seq.factors {
if check_factor_for_recursion(factor, target_rule, rule_map, visited) {
return true;
}
}
false
}
fn check_factor_for_recursion(
factor: &Factor,
target_rule: &str,
rule_map: &HashMap<String, &Rule>,
visited: &mut HashSet<String>,
) -> bool {
match &factor.base {
BaseFactor::Nonterminal { name, .. } => {
if name == target_rule {
return true;
}
is_recursive(name, rule_map, visited)
}
BaseFactor::Group { alternatives } => {
check_alternatives_for_recursion(alternatives, target_rule, rule_map, visited)
}
_ => false, }
}
fn inline_in_alternatives(
alternatives: &mut Alternatives,
rule_map: &HashMap<String, &Rule>,
recursive_rules: &HashSet<String>,
) {
for seq in &mut alternatives.alts {
inline_in_sequence(seq, rule_map, recursive_rules);
}
}
fn inline_in_sequence(
seq: &mut Sequence,
rule_map: &HashMap<String, &Rule>,
recursive_rules: &HashSet<String>,
) {
let mut new_factors = Vec::new();
for factor in &seq.factors {
match inline_factor(factor, rule_map, recursive_rules) {
InlineResult::Keep(f) => new_factors.push(f),
InlineResult::Replace(factors) => new_factors.extend(factors),
}
}
seq.factors = new_factors;
}
enum InlineResult {
Keep(Factor), #[allow(dead_code)]
Replace(Vec<Factor>), }
fn inline_factor(
factor: &Factor,
rule_map: &HashMap<String, &Rule>,
recursive_rules: &HashSet<String>,
) -> InlineResult {
match &factor.base {
BaseFactor::Nonterminal { name, mark } => {
if recursive_rules.contains(name) {
return InlineResult::Keep(factor.clone());
}
let target_rule = match rule_map.get(name) {
Some(r) => r,
None => return InlineResult::Keep(factor.clone()), };
let mut inlined_alternatives = target_rule.alternatives.clone();
inline_in_alternatives(&mut inlined_alternatives, rule_map, recursive_rules);
let mut inlined_base = BaseFactor::Group {
alternatives: Box::new(inlined_alternatives),
};
if *mark != Mark::None {
inlined_base = apply_mark_to_base(inlined_base, *mark);
}
let inlined_factor = Factor::new(inlined_base, factor.repetition.clone());
InlineResult::Keep(inlined_factor)
}
BaseFactor::Group { alternatives } => {
let mut inlined_alternatives = (**alternatives).clone();
inline_in_alternatives(&mut inlined_alternatives, rule_map, recursive_rules);
let inlined_factor = Factor::new(
BaseFactor::Group {
alternatives: Box::new(inlined_alternatives),
},
factor.repetition.clone(),
);
InlineResult::Keep(inlined_factor)
}
_ => InlineResult::Keep(factor.clone()), }
}
fn apply_mark_to_base(base: BaseFactor, _mark: Mark) -> BaseFactor {
match base {
BaseFactor::Group { alternatives } => {
BaseFactor::Group { alternatives }
}
_ => base, }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_direct_recursion_detection() {
let grammar = IxmlGrammar::new(vec![
Rule::new(
"expr".to_string(),
Mark::None,
Alternatives::new(vec![
Sequence::new(vec![
Factor::simple(BaseFactor::nonterminal("expr".to_string())),
Factor::simple(BaseFactor::literal("+".to_string())),
Factor::simple(BaseFactor::nonterminal("term".to_string())),
]),
Sequence::new(vec![Factor::simple(BaseFactor::nonterminal(
"term".to_string(),
))]),
]),
),
Rule::new(
"term".to_string(),
Mark::None,
Alternatives::single(Sequence::new(vec![Factor::simple(BaseFactor::literal(
"x".to_string(),
))])),
),
]);
let rule_map: HashMap<_, _> = grammar.rules.iter().map(|r| (r.name.clone(), r)).collect();
let recursive = find_recursive_rules(&grammar, &rule_map);
assert!(recursive.contains("expr"));
assert!(!recursive.contains("term"));
}
#[test]
fn test_indirect_recursion_detection() {
let grammar = IxmlGrammar::new(vec![
Rule::new(
"a".to_string(),
Mark::None,
Alternatives::single(Sequence::new(vec![Factor::simple(
BaseFactor::nonterminal("b".to_string()),
)])),
),
Rule::new(
"b".to_string(),
Mark::None,
Alternatives::single(Sequence::new(vec![Factor::simple(
BaseFactor::nonterminal("c".to_string()),
)])),
),
Rule::new(
"c".to_string(),
Mark::None,
Alternatives::single(Sequence::new(vec![Factor::simple(
BaseFactor::nonterminal("a".to_string()),
)])),
),
]);
let rule_map: HashMap<_, _> = grammar.rules.iter().map(|r| (r.name.clone(), r)).collect();
let recursive = find_recursive_rules(&grammar, &rule_map);
assert!(recursive.contains("a"));
assert!(recursive.contains("b"));
assert!(recursive.contains("c"));
}
#[test]
fn test_simple_inlining() {
let grammar = IxmlGrammar::new(vec![
Rule::new(
"number".to_string(),
Mark::None,
Alternatives::single(Sequence::new(vec![Factor::new(
BaseFactor::nonterminal("digit".to_string()),
Repetition::OneOrMore,
)])),
),
Rule::new(
"digit".to_string(),
Mark::None,
Alternatives::single(Sequence::new(vec![Factor::simple(BaseFactor::charclass(
"\"0\"-\"9\"".to_string(),
))])),
),
]);
let normalized = normalize_grammar(&grammar);
assert_eq!(normalized.rules.len(), 1);
assert_eq!(normalized.rules[0].name, "number");
let first_alt = &normalized.rules[0].alternatives.alts[0];
let first_factor = &first_alt.factors[0];
assert_eq!(first_factor.repetition, Repetition::OneOrMore);
match &first_factor.base {
BaseFactor::Group { alternatives } => {
assert_eq!(alternatives.alts.len(), 1);
}
_ => panic!("Expected a Group after inlining"),
}
}
}