use std::sync::Arc;
use adze::glr_lexer::GLRLexer;
use adze::glr_parser::GLRParser;
use adze::glr_tree_bridge::{GLRTree, subtree_to_tree};
use adze::subtree::Subtree;
use adze_glr_core::{FirstFollowSets, build_lr1_automaton};
use adze_ir::{Grammar, ProductionId, Rule, Symbol, SymbolId, Token, TokenPattern};
use criterion::{Criterion, black_box, criterion_group, criterion_main};
fn create_arithmetic_grammar() -> Grammar {
let mut grammar = Grammar::new("expression".to_string());
let expr_id = SymbolId(0);
let number_id = SymbolId(1);
let plus_id = SymbolId(2);
let mult_id = SymbolId(3);
let lparen_id = SymbolId(4);
let rparen_id = SymbolId(5);
grammar.tokens.insert(
number_id,
Token {
name: "number".into(),
pattern: TokenPattern::Regex(r"\d+".into()),
fragile: false,
},
);
grammar.tokens.insert(
plus_id,
Token {
name: "plus".into(),
pattern: TokenPattern::String("+".into()),
fragile: false,
},
);
grammar.tokens.insert(
mult_id,
Token {
name: "multiply".into(),
pattern: TokenPattern::String("*".into()),
fragile: false,
},
);
grammar.tokens.insert(
lparen_id,
Token {
name: "lparen".into(),
pattern: TokenPattern::String("(".into()),
fragile: false,
},
);
grammar.tokens.insert(
rparen_id,
Token {
name: "rparen".into(),
pattern: TokenPattern::String(")".into()),
fragile: false,
},
);
let rules = vec![
Rule {
lhs: expr_id,
rhs: vec![
Symbol::NonTerminal(expr_id),
Symbol::Terminal(plus_id),
Symbol::NonTerminal(expr_id),
],
production_id: ProductionId(0),
precedence: None,
associativity: None,
fields: vec![],
},
Rule {
lhs: expr_id,
rhs: vec![
Symbol::NonTerminal(expr_id),
Symbol::Terminal(mult_id),
Symbol::NonTerminal(expr_id),
],
production_id: ProductionId(1),
precedence: None,
associativity: None,
fields: vec![],
},
Rule {
lhs: expr_id,
rhs: vec![
Symbol::Terminal(lparen_id),
Symbol::NonTerminal(expr_id),
Symbol::Terminal(rparen_id),
],
production_id: ProductionId(2),
precedence: None,
associativity: None,
fields: vec![],
},
Rule {
lhs: expr_id,
rhs: vec![Symbol::Terminal(number_id)],
production_id: ProductionId(3),
precedence: None,
associativity: None,
fields: vec![],
},
];
grammar.rules.insert(expr_id, rules);
grammar.rule_names.insert(expr_id, "expression".into());
grammar
}
fn small_input() -> String {
"1 + 2 * 3 + 4".to_string()
}
fn medium_input() -> String {
let parts: Vec<String> = (1..=250).map(|i| i.to_string()).collect();
parts.join(" + ")
}
fn large_input() -> String {
let parts: Vec<String> = (1..=2500).map(|i| i.to_string()).collect();
parts.join(" + ")
}
fn glr_parse(
grammar: &Grammar,
parse_table: &adze_glr_core::ParseTable,
input: &str,
) -> Result<Arc<Subtree>, String> {
let mut lexer = GLRLexer::new(grammar, input.to_string()).map_err(|e| format!("{e:?}"))?;
let mut parser = GLRParser::new(parse_table.clone(), grammar.clone());
let mut tokens = Vec::new();
while let Some(token) = lexer.next_token() {
tokens.push(token);
}
for token in &tokens {
parser.process_token(token.symbol_id, &token.text, token.byte_offset);
}
let total_bytes = tokens
.last()
.map(|t| t.byte_offset + t.text.len())
.unwrap_or(0);
parser.process_eof(total_bytes);
parser.finish()
}
fn serialize_subtree_json(subtree: &Subtree, source: &[u8]) -> serde_json::Value {
let start = subtree.node.byte_range.start;
let end = subtree.node.byte_range.end;
let text = if subtree.children.is_empty() {
std::str::from_utf8(&source[start..end.min(source.len())])
.ok()
.map(|s| serde_json::Value::String(s.to_string()))
} else {
None
};
let children: Vec<serde_json::Value> = subtree
.children
.iter()
.map(|e| serialize_subtree_json(&e.subtree, source))
.collect();
serde_json::json!({
"symbol": subtree.node.symbol_id.0,
"start_byte": start,
"end_byte": end,
"is_error": subtree.node.is_error,
"text": text,
"children": children,
})
}
fn serialize_subtree_sexpr(subtree: &Subtree, source: &[u8]) -> String {
let start = subtree.node.byte_range.start;
let end = subtree.node.byte_range.end;
if subtree.children.is_empty() {
let text = std::str::from_utf8(&source[start..end.min(source.len())]).unwrap_or("");
format!("\"{}\"", text.replace('"', "\\\""))
} else {
let children: Vec<String> = subtree
.children
.iter()
.map(|e| serialize_subtree_sexpr(&e.subtree, source))
.collect();
format!(
"(symbol_{} {})",
subtree.node.symbol_id.0,
children.join(" ")
)
}
}
fn count_nodes(subtree: &Subtree) -> (usize, usize) {
let mut nodes = 1usize;
let mut leaves = if subtree.children.is_empty() {
1usize
} else {
0
};
for edge in &subtree.children {
let (n, l) = count_nodes(&edge.subtree);
nodes += n;
leaves += l;
}
(nodes, leaves)
}
fn bench_parse_small(c: &mut Criterion) {
let grammar = create_arithmetic_grammar();
let ff = FirstFollowSets::compute(&grammar).unwrap();
let table = build_lr1_automaton(&grammar, &ff).unwrap();
let input = small_input();
c.bench_function("rt_parse_small_input", |b| {
b.iter(|| {
let _ = glr_parse(&grammar, &table, black_box(&input));
});
});
}
fn bench_parse_medium(c: &mut Criterion) {
let grammar = create_arithmetic_grammar();
let ff = FirstFollowSets::compute(&grammar).unwrap();
let table = build_lr1_automaton(&grammar, &ff).unwrap();
let input = medium_input();
c.bench_function("rt_parse_medium_input", |b| {
b.iter(|| {
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
glr_parse(&grammar, &table, black_box(&input))
}));
});
});
}
fn bench_parse_large(c: &mut Criterion) {
let grammar = create_arithmetic_grammar();
let ff = FirstFollowSets::compute(&grammar).unwrap();
let table = build_lr1_automaton(&grammar, &ff).unwrap();
let input = large_input();
c.bench_function("rt_parse_large_input", |b| {
b.iter(|| {
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
glr_parse(&grammar, &table, black_box(&input))
}));
});
});
}
fn bench_serialize_json(c: &mut Criterion) {
let grammar = create_arithmetic_grammar();
let ff = FirstFollowSets::compute(&grammar).unwrap();
let table = build_lr1_automaton(&grammar, &ff).unwrap();
let input = small_input();
let source = input.as_bytes();
let subtree = glr_parse(&grammar, &table, &input).expect("parse should succeed");
c.bench_function("rt_serialize_json", |b| {
b.iter(|| {
let json = serialize_subtree_json(black_box(&subtree), source);
black_box(json);
});
});
}
fn bench_serialize_sexpr(c: &mut Criterion) {
let grammar = create_arithmetic_grammar();
let ff = FirstFollowSets::compute(&grammar).unwrap();
let table = build_lr1_automaton(&grammar, &ff).unwrap();
let input = small_input();
let source = input.as_bytes();
let subtree = glr_parse(&grammar, &table, &input).expect("parse should succeed");
c.bench_function("rt_serialize_sexpr", |b| {
b.iter(|| {
let sexpr = serialize_subtree_sexpr(black_box(&subtree), source);
black_box(sexpr);
});
});
}
fn bench_visitor_traversal(c: &mut Criterion) {
let grammar = create_arithmetic_grammar();
let ff = FirstFollowSets::compute(&grammar).unwrap();
let table = build_lr1_automaton(&grammar, &ff).unwrap();
let input = small_input();
let subtree = glr_parse(&grammar, &table, &input).expect("parse should succeed");
c.bench_function("rt_visitor_traversal", |b| {
b.iter(|| {
let (nodes, leaves) = count_nodes(black_box(&subtree));
black_box((nodes, leaves));
});
});
}
fn bench_tree_construction(c: &mut Criterion) {
let grammar = create_arithmetic_grammar();
let ff = FirstFollowSets::compute(&grammar).unwrap();
let table = build_lr1_automaton(&grammar, &ff).unwrap();
let input = small_input();
let subtree = glr_parse(&grammar, &table, &input).expect("parse should succeed");
c.bench_function("rt_tree_construction", |b| {
b.iter(|| {
let tree: GLRTree = subtree_to_tree(
Arc::clone(black_box(&subtree)),
input.as_bytes().to_vec(),
grammar.clone(),
);
black_box(&tree);
});
});
}
criterion_group!(
parsing,
bench_parse_small,
bench_parse_medium,
bench_parse_large,
);
criterion_group!(serialization, bench_serialize_json, bench_serialize_sexpr,);
criterion_group!(traversal, bench_visitor_traversal, bench_tree_construction,);
criterion_main!(parsing, serialization, traversal);