#![forbid(unsafe_op_in_unsafe_fn)]
#![deny(private_interfaces)]
#![cfg_attr(feature = "strict_api", deny(unreachable_pub))]
#![cfg_attr(not(feature = "strict_api"), warn(unreachable_pub))]
#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
#![cfg_attr(feature = "strict_docs", deny(missing_docs))]
#![cfg_attr(not(feature = "strict_docs"), allow(missing_docs))]
mod test_helpers;
mod util;
#[cfg(test)]
pub use crate::test_helpers::test::{make_empty_table, make_minimal_table};
pub mod abi;
pub mod abi_builder;
pub mod compress;
pub mod compression;
pub mod error;
pub mod external_scanner;
pub mod external_scanner_v2;
pub mod generate;
pub mod helpers;
pub mod language_gen;
pub mod lexer_gen;
pub mod node_types;
pub mod parser;
#[cfg(feature = "serialization")]
pub mod parsetable_writer;
pub mod schema;
pub mod serializer;
pub mod validation;
pub use error::{Result, TableGenError};
pub use helpers::{collect_token_indices, eof_accepts_or_reduces};
pub use abi_builder::AbiLanguageBuilder;
pub use compress::{
ActionEntry, CompressedActionEntry, CompressedActionTable, CompressedGotoEntry,
CompressedGotoTable, CompressedParseTable, CompressedTables, GotoEntry, TableCompressor,
};
pub use external_scanner::ExternalScannerGenerator;
pub use generate::LanguageBuilder;
pub use node_types::NodeTypesGenerator;
#[cfg(feature = "serialization")]
pub use parsetable_writer::{
FORMAT_VERSION, FeatureFlags, GenerationInfo, GovernanceMetadata, GrammarInfo, MAGIC_NUMBER,
METADATA_SCHEMA_VERSION, ParserFeatureProfileSnapshot, ParsetableError, ParsetableMetadata,
ParsetableWriter, TableStatistics,
};
pub use validation::{LanguageValidator, ValidationError};
use adze_glr_core::*;
use adze_ir::*;
use proc_macro2::TokenStream;
use quote::quote;
pub struct StaticLanguageGenerator {
pub grammar: Grammar,
pub parse_table: ParseTable,
pub compressed_tables: Option<CompressedTables>,
pub start_can_be_empty: bool,
}
impl StaticLanguageGenerator {
pub fn new(grammar: Grammar, parse_table: ParseTable) -> Self {
Self {
grammar,
parse_table,
compressed_tables: None,
start_can_be_empty: false,
}
}
pub fn set_start_can_be_empty(&mut self, value: bool) {
self.start_can_be_empty = value;
}
pub fn generate_language_code(&self) -> TokenStream {
let generator =
crate::language_gen::LanguageGenerator::new(&self.grammar, &self.parse_table);
generator.generate()
}
pub fn generate_node_types(&self) -> String {
use serde_json::json;
let mut types = Vec::new();
for (symbol_id, rules) in &self.grammar.rules {
let rule_name = format!("rule_{}", symbol_id.0);
if rule_name.starts_with('_') {
continue;
}
let mut node_type = json!({
"type": rule_name,
"named": true
});
let mut all_fields = serde_json::Map::new();
let mut has_children = false;
for rule in rules {
for (field_id, _position) in &rule.fields {
if let Some(field_name) = self.grammar.fields.get(field_id) {
all_fields.insert(
field_name.clone(),
json!({
"multiple": false,
"required": true,
"types": []
}),
);
}
}
if !rule.rhs.is_empty() {
has_children = true;
}
}
if !all_fields.is_empty() {
node_type["fields"] = json!(all_fields);
}
if has_children {
let mut children = serde_json::Map::new();
children.insert("multiple".to_string(), json!(false));
children.insert("required".to_string(), json!(true));
children.insert("types".to_string(), json!([]));
node_type["children"] = json!(children);
}
if self.grammar.supertypes.contains(symbol_id) {
node_type["subtypes"] = json!([]);
}
types.push(node_type);
}
for (_, token) in &self.grammar.tokens {
if !token.name.starts_with('_') && matches!(&token.pattern, TokenPattern::Regex(_)) {
types.push(json!({
"type": token.name,
"named": true
}));
}
}
for external in &self.grammar.externals {
if !external.name.starts_with('_') {
types.push(json!({
"type": external.name,
"named": true
}));
}
}
serde_json::to_string_pretty(&json!(types)).unwrap_or_else(|_| "[]".to_string())
}
#[allow(dead_code)]
fn generate_symbol_names(&self) -> Vec<String> {
let mut names = Vec::new();
for (_, token) in &self.grammar.tokens {
names.push(token.name.clone());
}
for (symbol_id, _) in &self.grammar.rules {
names.push(format!("rule_{}", symbol_id.0));
}
for external in &self.grammar.externals {
names.push(external.name.clone());
}
names
}
#[allow(dead_code)]
fn generate_symbol_metadata(&self) -> Vec<TokenStream> {
let mut metadata = Vec::new();
for (_, token) in &self.grammar.tokens {
let visible = !token.name.starts_with('_');
let named = matches!(&token.pattern, TokenPattern::Regex(_)) && visible;
let supertype = false;
metadata.push(quote! {
adze::ffi::TSSymbolMetadata {
visible: #visible,
named: #named,
supertype: #supertype,
}
});
}
for (symbol_id, _rule) in &self.grammar.rules {
let rule_name = format!("rule_{}", symbol_id.0);
let visible = !rule_name.starts_with('_');
let named = visible;
let supertype = self.grammar.supertypes.contains(symbol_id);
metadata.push(quote! {
adze::ffi::TSSymbolMetadata {
visible: #visible,
named: #named,
supertype: #supertype,
}
});
}
for external in &self.grammar.externals {
let visible = !external.name.starts_with('_');
let named = visible;
let supertype = false;
metadata.push(quote! {
adze::ffi::TSSymbolMetadata {
visible: #visible,
named: #named,
supertype: #supertype,
}
});
}
metadata
}
#[allow(dead_code)]
fn generate_field_names(&self) -> Vec<String> {
self.grammar.fields.values().cloned().collect()
}
#[allow(dead_code)]
fn generate_uncompressed_tables(&self) -> (TokenStream, TokenStream) {
let action_entries = self.generate_action_table_entries();
let goto_entries = self.generate_goto_table_entries();
let action_table = quote! {
static ACTION_TABLE: &[&[adze::ffi::TSParseActionEntry]] = &[#(#action_entries),*];
};
let goto_table = quote! {
static GOTO_TABLE: &[&[u16]] = &[#(#goto_entries),*];
};
(action_table, goto_table)
}
#[allow(dead_code)]
fn generate_compressed_tables(
&self,
compressed: &CompressedTables,
) -> (TokenStream, TokenStream) {
if self.parse_table.state_count < compressed.small_table_threshold {
self.generate_small_compressed_tables(compressed)
} else {
self.generate_large_compressed_tables(compressed)
}
}
#[allow(dead_code)]
fn generate_small_compressed_tables(
&self,
compressed: &CompressedTables,
) -> (TokenStream, TokenStream) {
let action_entries = self.generate_small_action_entries(&compressed.action_table);
let goto_entries = self.generate_small_goto_entries(&compressed.goto_table);
let action_count = compressed.action_table.data.len();
let goto_count = self.count_goto_entries(&compressed.goto_table);
let action_table = quote! {
static SMALL_PARSE_TABLE: &[u16; #action_count] = &[#(#action_entries),*];
static SMALL_PARSE_TABLE_MAP: &[u16] = &[];
};
let goto_table = quote! {
static GOTO_TABLE: &[u16; #goto_count] = &[#(#goto_entries),*];
};
(action_table, goto_table)
}
#[allow(dead_code)]
fn generate_large_compressed_tables(
&self,
compressed: &CompressedTables,
) -> (TokenStream, TokenStream) {
self.generate_small_compressed_tables(compressed) }
#[allow(dead_code)]
fn generate_small_action_entries(
&self,
action_table: &CompressedActionTable,
) -> Vec<TokenStream> {
let mut entries = Vec::new();
let compressor = TableCompressor::new();
for entry in &action_table.data {
if let Ok(encoded) = compressor.encode_action_small(&entry.action) {
let symbol = entry.symbol;
entries.push(quote! { #symbol }); entries.push(quote! { #encoded }); }
}
entries
}
#[allow(dead_code)]
fn generate_small_goto_entries(&self, goto_table: &CompressedGotoTable) -> Vec<TokenStream> {
let mut entries = Vec::new();
for entry in &goto_table.data {
match entry {
CompressedGotoEntry::Single(state) => {
entries.push(quote! { #state });
}
CompressedGotoEntry::RunLength { state, count } => {
for _ in 0..*count {
entries.push(quote! { #state });
}
}
}
}
entries
}
#[allow(dead_code)]
fn count_goto_entries(&self, goto_table: &CompressedGotoTable) -> usize {
goto_table
.data
.iter()
.map(|entry| match entry {
CompressedGotoEntry::Single(_) => 1,
CompressedGotoEntry::RunLength { count, .. } => *count as usize,
})
.sum()
}
#[allow(dead_code)]
fn generate_action_table_entries(&self) -> Vec<TokenStream> {
let mut entries = Vec::new();
for state_actions in &self.parse_table.action_table {
let actions: Vec<TokenStream> = state_actions
.iter()
.flat_map(|action_cell| {
action_cell.iter().map(|action| {
match action {
Action::Shift(state) => {
let state_id = state.0;
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Shift,
state: #state_id,
symbol: 0,
child_count: 0,
dynamic_precedence: 0,
fragile: false,
}
}
}
Action::Reduce(rule) => {
let rule_id = rule.0;
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Reduce,
state: 0,
symbol: #rule_id,
child_count: 0, dynamic_precedence: 0,
fragile: false,
}
}
}
Action::Accept => {
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Accept,
state: 0,
symbol: 0,
child_count: 0,
dynamic_precedence: 0,
fragile: false,
}
}
}
Action::Error => {
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Error,
state: 0,
symbol: 0,
child_count: 0,
dynamic_precedence: 0,
fragile: false,
}
}
}
Action::Recover => {
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Error,
state: 0,
symbol: 0,
child_count: 0,
dynamic_precedence: 0,
fragile: false,
}
}
}
Action::Fork(actions) => {
if let Some(Action::Shift(state)) = actions.first() {
let state_id = state.0;
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Shift,
state: #state_id,
symbol: 0,
child_count: 0,
dynamic_precedence: 0,
fragile: false,
}
}
} else {
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Error,
state: 0,
symbol: 0,
child_count: 0,
dynamic_precedence: 0,
fragile: false,
}
}
}
}
_ => {
quote! {
adze::ffi::TSParseActionEntry {
type_: adze::ffi::TSParseActionType::Error,
state: 0,
symbol: 0,
child_count: 0,
dynamic_precedence: 0,
fragile: false,
}
}
}
}
})
})
.collect();
entries.push(quote! { &[#(#actions),*] });
}
entries
}
#[allow(dead_code)]
fn generate_goto_table_entries(&self) -> Vec<TokenStream> {
let mut entries = Vec::new();
for state_gotos in &self.parse_table.goto_table {
let gotos: Vec<u16> = state_gotos.iter().map(|state| state.0).collect();
entries.push(quote! { &[#(#gotos),*] });
}
entries
}
pub fn compress_tables(&mut self) -> Result<()> {
if !self.start_can_be_empty {
self.start_can_be_empty = helpers::eof_accepts_or_reduces(&self.parse_table);
}
let compressor = TableCompressor::new();
let token_indices = helpers::collect_token_indices(&self.grammar, &self.parse_table);
self.compressed_tables = Some(compressor.compress(
&self.parse_table,
&token_indices,
self.start_can_be_empty,
)?);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(not(debug_assertions))]
macro_rules! debug_trace {
($($arg:tt)*) => {};
}
#[cfg(debug_assertions)]
macro_rules! debug_trace {
($($arg:tt)*) => {
if std::env::var("RUST_LOG")
.ok()
.unwrap_or_default()
.contains("debug")
{
eprintln!($($arg)*);
}
};
}
#[test]
fn test_static_language_generator_creation() {
let grammar = Grammar::new("test".to_string());
let parse_table = crate::empty_table!(states: 1, terms: 0, nonterms: 0);
let generator = StaticLanguageGenerator::new(grammar, parse_table);
assert_eq!(generator.grammar.name, "test");
assert_eq!(generator.parse_table.state_count, 1); assert!(generator.compressed_tables.is_none());
}
#[test]
fn test_action_encoding_small_table() {
let compressor = TableCompressor::new();
let shift_action = Action::Shift(StateId(42));
let encoded = compressor.encode_action_small(&shift_action).unwrap();
assert_eq!(encoded, 42);
assert!(encoded < 0x8000);
let reduce_action = Action::Reduce(RuleId(17));
let encoded = compressor.encode_action_small(&reduce_action).unwrap();
assert_eq!(encoded, 32786);
assert!(encoded >= 0x8000);
let accept_action = Action::Accept;
let encoded = compressor.encode_action_small(&accept_action).unwrap();
assert_eq!(encoded, 0xFFFF);
let error_action = Action::Error;
let encoded = compressor.encode_action_small(&error_action).unwrap();
assert_eq!(encoded, 0xFFFE);
}
#[test]
fn test_action_encoding_overflow() {
let compressor = TableCompressor::new();
let shift_action = Action::Shift(StateId(0x8000));
let result = compressor.encode_action_small(&shift_action);
assert!(result.is_err());
let reduce_action = Action::Reduce(RuleId(0x4000));
let result = compressor.encode_action_small(&reduce_action);
assert!(result.is_err());
}
#[test]
fn test_table_compressor_creation() {
let compressor = TableCompressor::new();
let _ = compressor;
}
#[test]
fn test_symbol_names_generation() {
let mut grammar = Grammar::new("test".to_string());
let token = Token {
name: "NUMBER".to_string(),
pattern: TokenPattern::Regex(r"\d+".to_string()),
fragile: false,
};
grammar.tokens.insert(SymbolId(0), token);
let rule = Rule {
lhs: SymbolId(1),
rhs: vec![Symbol::Terminal(SymbolId(0))],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
};
grammar.add_rule(rule);
let parse_table = crate::empty_table!(states: 1, terms: 0, nonterms: 0);
let generator = StaticLanguageGenerator::new(grammar, parse_table);
let symbol_names = generator.generate_symbol_names();
assert_eq!(symbol_names.len(), 2);
assert!(symbol_names.contains(&"NUMBER".to_string()));
assert!(symbol_names.contains(&"rule_1".to_string()));
}
#[test]
fn test_field_names_generation() {
let mut grammar = Grammar::new("test".to_string());
grammar.fields.insert(FieldId(0), "left".to_string());
grammar.fields.insert(FieldId(1), "right".to_string());
let parse_table = crate::empty_table!(states: 1, terms: 0, nonterms: 0);
let generator = StaticLanguageGenerator::new(grammar, parse_table);
let field_names = generator.generate_field_names();
assert_eq!(field_names, vec!["left", "right"]);
}
#[test]
fn test_node_types_generation() {
let grammar = Grammar::new("test".to_string());
let parse_table = crate::empty_table!(states: 1, terms: 0, nonterms: 0);
let generator = StaticLanguageGenerator::new(grammar, parse_table);
let node_types = generator.generate_node_types();
assert!(serde_json::from_str::<serde_json::Value>(&node_types).is_ok());
}
#[test]
fn test_table_compression_small_table() {
let grammar = Grammar::new("test".to_string());
let mut parse_table = crate::test_helpers::test::make_minimal_table(
vec![
vec![vec![Action::Shift(StateId(1))], vec![Action::Error]],
vec![vec![Action::Reduce(RuleId(0))], vec![Action::Accept]],
],
vec![vec![StateId(0), StateId(1)], vec![StateId(2), StateId(0)]],
vec![],
SymbolId(1), SymbolId(1), 0, );
parse_table.eof_symbol = SymbolId(0);
parse_table.symbol_to_index.clear();
parse_table.symbol_to_index.insert(SymbolId(0), 0);
parse_table.symbol_to_index.insert(SymbolId(1), 1);
let mut generator = StaticLanguageGenerator::new(grammar, parse_table);
assert!(generator.compress_tables().is_ok());
assert!(generator.compressed_tables.is_some());
let compressed = generator.compressed_tables.as_ref().unwrap();
assert_eq!(compressed.small_table_threshold, 32768);
}
#[test]
fn test_table_compression_large_table() {
let _grammar = Grammar::new("large_test".to_string());
let mut parse_table = crate::test_helpers::test::make_minimal_table(
vec![vec![vec![Action::Error]; 10]; 40000],
vec![vec![StateId(0); 10]; 40000],
vec![],
SymbolId(1), SymbolId(1), 0, );
parse_table.eof_symbol = SymbolId(0);
parse_table.symbol_to_index.clear();
parse_table.symbol_to_index.insert(SymbolId(0), 0);
parse_table.action_table[0][0] = vec![Action::Accept];
let compressor = TableCompressor::new();
let grammar = Grammar::default(); let token_indices = helpers::collect_token_indices(&grammar, &parse_table);
let start_can_be_empty = true;
let result = compressor.compress(&parse_table, &token_indices, start_can_be_empty);
let compressed = result.expect("large table should compress");
assert_eq!(compressed.small_table_threshold, 32768);
assert!(parse_table.state_count >= compressed.small_table_threshold);
}
#[test]
fn test_compressed_action_table_small() {
let compressor = TableCompressor::new();
let action_table = vec![
vec![
vec![Action::Shift(StateId(1))],
vec![Action::Error],
vec![Action::Error],
],
vec![
vec![Action::Error],
vec![Action::Reduce(RuleId(0))],
vec![Action::Error],
],
];
let symbol_to_index = std::collections::BTreeMap::new();
let compressed = compressor.compress_action_table_small(&action_table, &symbol_to_index);
assert!(compressed.is_ok());
let compressed = compressed.unwrap();
assert_eq!(compressed.default_actions.len(), 2);
assert_eq!(compressed.row_offsets.len(), 3);
match &compressed.default_actions[0] {
Action::Error => {}
_ => panic!("Expected Error as default for first row"),
}
match &compressed.default_actions[1] {
Action::Error => {}
_ => panic!("Expected Error as default for second row"),
}
}
#[test]
fn test_compressed_action_table_with_default_reduction() {
let compressor = TableCompressor::new();
let action_table = vec![vec![
vec![Action::Reduce(RuleId(1))],
vec![Action::Reduce(RuleId(1))],
vec![Action::Reduce(RuleId(1))],
]];
let symbol_to_index = std::collections::BTreeMap::new();
let compressed = compressor.compress_action_table_small(&action_table, &symbol_to_index);
assert!(compressed.is_ok());
let compressed = compressed.unwrap();
match &compressed.default_actions[0] {
Action::Error => {}
_ => panic!("Expected Error as default (optimization disabled)"),
}
let entries_for_state_0 = compressed.row_offsets[1] - compressed.row_offsets[0];
assert_eq!(
entries_for_state_0, 3,
"All reduce actions should be explicitly encoded"
);
}
#[test]
fn test_compressed_goto_table_small() {
let compressor = TableCompressor::new();
let goto_table = vec![
vec![StateId(0), StateId(0), StateId(1)],
vec![StateId(2), StateId(2), StateId(2)],
];
let compressed = compressor.compress_goto_table_small(&goto_table);
assert!(compressed.is_ok());
let compressed = compressed.unwrap();
assert_eq!(compressed.row_offsets.len(), 3); assert!(!compressed.data.is_empty());
let first_row_start = compressed.row_offsets[0] as usize;
let first_row_end = compressed.row_offsets[1] as usize;
let first_row_entries = &compressed.data[first_row_start..first_row_end];
assert_eq!(first_row_entries.len(), 3);
let second_row_start = compressed.row_offsets[1] as usize;
let second_row_end = compressed.row_offsets[2] as usize;
let second_row_entries = &compressed.data[second_row_start..second_row_end];
assert_eq!(second_row_entries.len(), 1);
match &second_row_entries[0] {
CompressedGotoEntry::RunLength { state: 2, count: 3 } => {}
_ => panic!("Expected run-length encoding for second row"),
}
}
#[test]
fn test_goto_table_run_length_threshold() {
let compressor = TableCompressor::new();
let goto_table = vec![vec![
StateId(1),
StateId(2),
StateId(2),
StateId(3),
StateId(3),
StateId(3),
]];
let compressed = compressor.compress_goto_table_small(&goto_table);
assert!(compressed.is_ok());
let compressed = compressed.unwrap();
let entries = &compressed.data;
assert_eq!(entries.len(), 4);
match &entries[0] {
CompressedGotoEntry::Single(1) => {}
_ => panic!("Expected single entry for StateId(1)"),
}
match &entries[1] {
CompressedGotoEntry::Single(2) => {}
_ => panic!("Expected single entry for first StateId(2)"),
}
match &entries[2] {
CompressedGotoEntry::Single(2) => {}
_ => panic!("Expected single entry for second StateId(2)"),
}
match &entries[3] {
CompressedGotoEntry::RunLength { state: 3, count: 3 } => {}
_ => panic!("Expected run-length for StateId(3)"),
}
}
#[test]
fn test_language_code_generation() {
let grammar = Grammar::new("test_lang".to_string());
let parse_table = crate::test_helpers::test::make_minimal_table(
vec![vec![vec![], vec![Action::Accept]]],
vec![vec![StateId(0), StateId(0)]],
vec![],
SymbolId(1), SymbolId(1), 0,
);
let generator = StaticLanguageGenerator::new(grammar, parse_table);
let code = generator.generate_language_code();
let code_str = code.to_string();
debug_trace!("Generated code: {}", code_str);
assert!(code_str.contains("pub fn language")); assert!(code_str.contains("tree_sitter_test_lang")); assert!(code_str.contains("LANGUAGE_VERSION"));
}
#[test]
fn test_compressed_tables_validation() {
let mut parse_table = crate::test_helpers::test::make_minimal_table(
vec![
vec![vec![Action::Shift(StateId(1))], vec![Action::Error]],
vec![vec![Action::Reduce(RuleId(0))], vec![Action::Accept]],
],
vec![vec![StateId(0), StateId(1)], vec![StateId(2), StateId(0)]],
vec![],
SymbolId(1), SymbolId(1), 0, );
parse_table.eof_symbol = SymbolId(0);
parse_table.symbol_to_index.clear();
parse_table.symbol_to_index.insert(SymbolId(0), 0);
parse_table.symbol_to_index.insert(SymbolId(1), 1);
let compressor = TableCompressor::new();
let grammar = Grammar::default(); let token_indices = helpers::collect_token_indices(&grammar, &parse_table);
let start_can_be_empty = false; let compressed = compressor
.compress(&parse_table, &token_indices, start_can_be_empty)
.unwrap();
assert!(compressed.validate(&parse_table).is_ok());
}
#[test]
fn test_tree_sitter_compatibility() {
let compressor = TableCompressor::new();
let shift = Action::Shift(StateId(42));
assert_eq!(compressor.encode_action_small(&shift).unwrap(), 0x002A);
let reduce = Action::Reduce(RuleId(17));
assert_eq!(compressor.encode_action_small(&reduce).unwrap(), 32786);
let accept = Action::Accept;
assert_eq!(compressor.encode_action_small(&accept).unwrap(), 0xFFFF);
let error = Action::Error;
assert_eq!(compressor.encode_action_small(&error).unwrap(), 0xFFFE);
}
#[test]
fn test_compressed_action_entry() {
let entry = CompressedActionEntry::new(5, Action::Shift(StateId(10)));
assert_eq!(entry.symbol, 5);
match entry.action {
Action::Shift(StateId(10)) => {}
_ => panic!("Wrong action type"),
}
}
#[test]
fn test_generated_small_table_format() {
let mut grammar = Grammar::new("small_test".to_string());
let token = Token {
name: "A".to_string(),
pattern: TokenPattern::String("a".to_string()),
fragile: false,
};
grammar.tokens.insert(SymbolId(0), token);
let mut parse_table = crate::test_helpers::test::make_minimal_table(
vec![
vec![vec![Action::Shift(StateId(1))], vec![]],
vec![vec![], vec![Action::Accept]],
],
vec![vec![StateId(1), StateId(0)], vec![StateId(0), StateId(0)]],
vec![],
SymbolId(2), SymbolId(1), 0, );
parse_table.symbol_to_index.insert(SymbolId(0), 0);
let mut generator = StaticLanguageGenerator::new(grammar, parse_table);
generator.compress_tables().unwrap();
let code = generator.generate_language_code();
let code_str = code.to_string();
assert!(code_str.contains("SMALL_PARSE_TABLE") || code_str.contains("ACTION_TABLE"));
}
#[test]
fn arithmetic_has_many_states() {
let mut grammar = Grammar::new("arithmetic".to_string());
let number_token = Token {
name: "number".to_string(),
pattern: TokenPattern::Regex(r"\d+".to_string()),
fragile: false,
};
let plus_token = Token {
name: "plus".to_string(),
pattern: TokenPattern::String("+".to_string()),
fragile: false,
};
let times_token = Token {
name: "times".to_string(),
pattern: TokenPattern::String("*".to_string()),
fragile: false,
};
grammar.tokens.insert(SymbolId(3), number_token);
grammar.tokens.insert(SymbolId(4), plus_token);
grammar.tokens.insert(SymbolId(5), times_token);
grammar
.rule_names
.insert(SymbolId(0), "source_file".to_string());
grammar
.rule_names
.insert(SymbolId(1), "expression".to_string());
grammar.rule_names.insert(SymbolId(2), "term".to_string());
grammar.add_rule(Rule {
lhs: SymbolId(0),
rhs: vec![Symbol::NonTerminal(SymbolId(1))],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
});
grammar.add_rule(Rule {
lhs: SymbolId(1),
rhs: vec![
Symbol::NonTerminal(SymbolId(1)),
Symbol::Terminal(SymbolId(4)),
Symbol::NonTerminal(SymbolId(2)),
],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(1),
});
grammar.add_rule(Rule {
lhs: SymbolId(1),
rhs: vec![Symbol::NonTerminal(SymbolId(2))],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(2),
});
grammar.add_rule(Rule {
lhs: SymbolId(2),
rhs: vec![
Symbol::NonTerminal(SymbolId(2)),
Symbol::Terminal(SymbolId(5)),
Symbol::Terminal(SymbolId(3)),
],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(3),
});
grammar.add_rule(Rule {
lhs: SymbolId(2),
rhs: vec![Symbol::Terminal(SymbolId(3))],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(4),
});
let first_follow = FirstFollowSets::compute(&grammar).unwrap();
let parse_table = build_lr1_automaton(&grammar, &first_follow).unwrap();
assert!(
parse_table.state_count >= 9,
"automaton collapsed ({} states), expected >= 9",
parse_table.state_count
);
assert!(
parse_table.action_table[0]
.iter()
.any(|action_cell| action_cell.iter().any(|a| !matches!(a, Action::Error))),
"state-0 has no valid actions"
);
}
}