1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Test helpers for tablegen tests
#![cfg_attr(feature = "strict_docs", allow(missing_docs))]
//! Test helper utilities for tablegen unit tests.
#[cfg(test)]
pub(crate) mod test {
use adze_glr_core::{Action, GotoIndexing, LexMode, ParseRule, ParseTable};
use adze_ir::{Grammar, StateId, SymbolId};
use std::collections::BTreeMap;
/// Sentinel used throughout the tests for "no goto".
pub(crate) const INVALID: StateId = StateId(u16::MAX);
/// Build a minimal but fully-formed ParseTable suitable for unit tests.
///
/// Conventions expected by the project:
/// - Symbol layout: ERROR(0), terminals `[1..]`, EOF (= token_count + external_token_count), then non-terminals.
/// - `actions` is indexed by `[state][symbol_index]` and `gotos` by `[state][symbol_index]`.
pub fn make_minimal_table(
mut actions: Vec<Vec<Vec<Action>>>,
mut gotos: Vec<Vec<StateId>>,
rules: Vec<ParseRule>,
start_symbol: SymbolId,
eof_symbol: SymbolId,
external_token_count: usize,
) -> ParseTable {
// Dimensions
let state_count = actions.len().max(1);
let symbol_cols_from_actions = actions.first().map(|r| r.len()).unwrap_or(0);
let symbol_cols_from_gotos = gotos.first().map(|r| r.len()).unwrap_or(0);
// Cover the columns referenced by start_symbol and eof_symbol too.
let min_needed = (start_symbol.0 as usize + 1).max(eof_symbol.0 as usize + 1);
let symbol_count = symbol_cols_from_actions
.max(symbol_cols_from_gotos)
.max(min_needed)
.max(1);
// Normalize shapes (pad rows/cols if needed)
if actions.is_empty() {
actions = vec![vec![vec![]; symbol_count]];
} else {
for row in &mut actions {
if row.len() < symbol_count {
row.resize_with(symbol_count, Vec::new);
}
}
}
if gotos.len() < state_count {
gotos.resize_with(state_count, || vec![INVALID; symbol_count]);
}
for row in &mut gotos {
if row.len() < symbol_count {
row.resize(symbol_count, INVALID);
}
}
// Build symbol maps
let mut symbol_to_index: BTreeMap<SymbolId, usize> = BTreeMap::new();
for i in 0..symbol_count {
symbol_to_index.insert(SymbolId(i as u16), i);
}
let mut nonterminal_to_index: BTreeMap<SymbolId, usize> = BTreeMap::new();
for col in 0..symbol_count {
// "Is this column used as a goto for any state?"
if gotos.iter().any(|row| row[col] != INVALID) {
nonterminal_to_index.insert(SymbolId(col as u16), col);
}
}
nonterminal_to_index
.entry(start_symbol)
.or_insert_with(|| start_symbol.0 as usize);
// Invariants on EOF / token_count
let eof_idx = eof_symbol.0 as usize;
debug_assert!(
eof_idx > 0 && eof_idx < symbol_count,
"EOF column must be within 1..symbol_count (got {eof_idx} of {symbol_count})"
);
// By project convention: EOF index == token_count + external_token_count.
// (token_count includes EOF; examples set token_count == eof_idx when externals==0)
let token_count = eof_idx - external_token_count;
// Minimal lexing configuration (one mode per state)
let lex_modes = vec![
LexMode {
lex_state: 0,
external_lex_state: 0
};
state_count
];
// Build index_to_symbol from symbol_to_index
let mut index_to_symbol = vec![SymbolId(0); symbol_count];
for (symbol_id, index) in &symbol_to_index {
index_to_symbol[*index] = *symbol_id;
}
ParseTable {
// core grids
action_table: actions,
goto_table: gotos,
// grammar rules
rules,
// shapes
state_count,
symbol_count,
// symbol bookkeeping
symbol_to_index,
index_to_symbol,
nonterminal_to_index,
symbol_metadata: vec![], // tests don't need metadata
// token layout / sentinels
token_count,
external_token_count,
eof_symbol,
start_symbol,
// parsing config
initial_state: StateId(0),
// lexing config
lex_modes,
extras: vec![],
external_scanner_states: vec![],
// advanced features (unused in hand tests)
dynamic_prec_by_rule: vec![],
rule_assoc_by_rule: vec![],
alias_sequences: vec![],
field_names: vec![],
field_map: BTreeMap::new(),
// display / provenance (defaults are fine for tests)
grammar: Grammar::default(),
// GOTO indexing mode
goto_indexing: GotoIndexing::NonterminalMap,
}
}
/// Create an *empty* but valid table for tests that don't care about actions/gotos.
///
/// `terms` = number of real terminals (excluding EOF); `nonterms` = number of non-terminals.
/// Symbol layout produced:
/// 0: ERROR, 1..=terms: terminals, (terms+externals+1): EOF, the rest: non-terminals.
pub fn make_empty_table(
states: usize,
terms: usize,
nonterms: usize,
externals: usize,
) -> ParseTable {
let states = states.max(1);
let eof_idx = 1 + terms + externals;
// Ensure at least one nonterminal column so start_symbol is valid.
let nonterms_eff = if nonterms == 0 { 1 } else { nonterms };
let symbol_count = eof_idx + 1 + nonterms_eff; // +1 for EOF itself
let actions = vec![vec![vec![]; symbol_count]; states];
let gotos = vec![vec![INVALID; symbol_count]; states];
let start_symbol = SymbolId((eof_idx + 1) as u16); // first nonterminal column (now always exists)
let eof_symbol = SymbolId(eof_idx as u16);
make_minimal_table(actions, gotos, vec![], start_symbol, eof_symbol, externals)
}
#[cfg(test)]
mod smoke {
use super::*;
#[test]
fn empty_is_constructible() {
let _ = make_empty_table(2, 1, 0, 0);
}
// Handy macro for the simple case.
#[macro_export]
macro_rules! empty_table {
(states: $s:expr, terms: $t:expr, nonterms: $n:expr $(, externals: $e:expr)? ) => {{
let e = 0 $(+ $e)?;
$crate::test_helpers::test::make_empty_table($s, $t, $n, e)
}};
}
}
}