1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
// GLR (Generalized LR) parsing support
// This module implements fork/merge handling for ambiguous grammars
use crate::parser_v3::{ParseNode, ParserState};
use adze_glr_core::Action;
use adze_ir::{StateId, SymbolId};
use anyhow::Result;
use std::collections::{HashMap, VecDeque};
/// A GLR parser stack that can represent multiple parse paths
#[derive(Debug, Clone)]
pub struct GLRStack {
/// Active parse stacks (each represents a different parse path)
pub stacks: Vec<ParseStack>,
/// Merge points where stacks converge
pub merge_points: HashMap<(StateId, usize), Vec<usize>>, // (state, position) -> stack indices
}
/// A single parse stack in the GLR forest
#[derive(Debug, Clone)]
pub struct ParseStack {
/// Stack ID for tracking
pub id: usize,
/// Parser state stack
pub state_stack: Vec<ParserState>,
/// Parse node stack
pub node_stack: Vec<ParseNode>,
/// Current position in input
pub position: usize,
/// Whether this stack is still active
pub active: bool,
}
/// Result of a GLR fork operation
#[derive(Debug)]
pub struct ForkResult {
/// New stacks created by the fork
pub new_stacks: Vec<ParseStack>,
/// Indices of stacks that should be deactivated
pub deactivate: Vec<usize>,
}
impl GLRStack {
/// Create a new GLR stack with an initial parse stack
pub fn new(initial_state: StateId) -> Self {
let initial_stack = ParseStack {
id: 0,
state_stack: vec![ParserState {
state: initial_state,
symbol: None,
position: 0,
}],
node_stack: Vec::new(),
position: 0,
active: true,
};
Self {
stacks: vec![initial_stack],
merge_points: HashMap::new(),
}
}
/// Handle a fork action by creating new parse stacks
pub fn fork(&mut self, stack_idx: usize, actions: &[Action]) -> Result<ForkResult> {
if stack_idx >= self.stacks.len() {
anyhow::bail!(
"Invalid stack index for fork: {} (only {} stacks exist)",
stack_idx,
self.stacks.len()
);
}
let source_stack = &self.stacks[stack_idx];
if !source_stack.active {
anyhow::bail!(
"Cannot fork from inactive stack {} (it was previously merged or pruned)",
stack_idx
);
}
let mut new_stacks = Vec::new();
let next_id = self.stacks.len();
// Create a new stack for each action
for (i, _action) in actions.iter().enumerate() {
let mut new_stack = source_stack.clone();
new_stack.id = next_id + i;
new_stacks.push(new_stack);
}
Ok(ForkResult {
new_stacks,
deactivate: vec![stack_idx], // Deactivate the original stack
})
}
/// Check if stacks can be merged at the current state
pub fn check_merge(&mut self, state: StateId, position: usize) -> Vec<Vec<usize>> {
let _key = (state, position);
let mut stacks_at_state = Vec::new();
// Find all active stacks at this state and position
for (idx, stack) in self.stacks.iter().enumerate() {
if stack.active
&& stack.position == position
&& stack.state_stack.last().map(|s| s.state) == Some(state)
{
stacks_at_state.push(idx);
}
}
// Group stacks that can be merged
let mut merge_groups = Vec::new();
if stacks_at_state.len() > 1 {
// For now, merge all stacks at the same state
// In a more sophisticated implementation, we'd check compatibility
merge_groups.push(stacks_at_state);
}
merge_groups
}
/// Merge multiple stacks into one
pub fn merge(&mut self, stack_indices: &[usize]) -> Result<usize> {
if stack_indices.len() < 2 {
anyhow::bail!(
"Need at least 2 stacks to merge, got {}",
stack_indices.len()
);
}
// Use the first stack as the base
let base_idx = stack_indices[0];
// Create ambiguity nodes for different parse trees
let mut ambiguous_nodes = Vec::new();
for &idx in stack_indices {
if let Some(node) = self.stacks[idx].node_stack.last() {
ambiguous_nodes.push(node.clone());
}
}
// Create an ambiguity node if parse trees differ
if ambiguous_nodes.len() > 1 && !Self::nodes_equal(&ambiguous_nodes) {
let ambiguity_node = ParseNode {
symbol: SymbolId(0xFFFF), // Special ambiguity marker
children: ambiguous_nodes,
start_byte: self.stacks[base_idx].position,
end_byte: self.stacks[base_idx].position,
field_name: Some("ambiguous".to_string()),
};
// Replace top node with ambiguity node
if let Some(stack) = self.stacks.get_mut(base_idx) {
stack.node_stack.pop();
stack.node_stack.push(ambiguity_node);
}
}
// Deactivate all but the base stack
for &idx in &stack_indices[1..] {
if let Some(stack) = self.stacks.get_mut(idx) {
stack.active = false;
}
}
Ok(base_idx)
}
/// Check if nodes are structurally equal
fn nodes_equal(nodes: &[ParseNode]) -> bool {
if nodes.is_empty() {
return true;
}
let first = &nodes[0];
nodes[1..]
.iter()
.all(|node| node.symbol == first.symbol && node.children.len() == first.children.len())
}
/// Get all active stacks
pub fn active_stacks(&self) -> Vec<&ParseStack> {
self.stacks.iter().filter(|s| s.active).collect()
}
/// Get mutable references to active stacks
pub fn active_stacks_mut(&mut self) -> Vec<&mut ParseStack> {
self.stacks.iter_mut().filter(|s| s.active).collect()
}
}
/// GLR parser coordinator that manages multiple parse stacks
pub struct GLRParser {
/// The GLR stack forest
pub stack: GLRStack,
/// Queue of stacks to process
pub work_queue: VecDeque<usize>,
}
impl GLRParser {
/// Create a new GLR parser
pub fn new(initial_state: StateId) -> Self {
Self {
stack: GLRStack::new(initial_state),
work_queue: VecDeque::from(vec![0]),
}
}
/// Process all active stacks for the current token
pub fn process_token(
&mut self,
token_symbol: SymbolId,
get_action: impl Fn(StateId, SymbolId) -> Action,
) -> Result<()> {
let mut new_work = Vec::new();
while let Some(stack_idx) = self.work_queue.pop_front() {
let Some(stack) = self.stack.stacks.get(stack_idx) else {
continue;
};
if !stack.active {
continue;
}
let current_state = stack
.state_stack
.last()
.ok_or_else(|| {
anyhow::anyhow!("Empty state stack in GLR step for stack {}", stack_idx)
})?
.state;
let action = get_action(current_state, token_symbol);
match action {
Action::Fork(ref actions) => {
// Handle fork by creating new stacks
let fork_result = self.stack.fork(stack_idx, actions)?;
// Add new stacks to the GLR forest
for new_stack in fork_result.new_stacks {
let new_idx = self.stack.stacks.len();
self.stack.stacks.push(new_stack);
new_work.push(new_idx);
}
// Deactivate forked stack
for idx in fork_result.deactivate {
if let Some(stack) = self.stack.stacks.get_mut(idx) {
stack.active = false;
}
}
}
_ => {
// Non-fork actions are handled by the regular parser
new_work.push(stack_idx);
}
}
}
// Check for merge opportunities
let mut merge_data = Vec::new();
for stack in self.stack.active_stacks() {
if let Some(state) = stack.state_stack.last() {
merge_data.push((state.state, stack.position));
}
}
for (state, position) in merge_data {
let merge_groups = self.stack.check_merge(state, position);
for group in merge_groups {
if group.len() > 1 {
self.stack.merge(&group)?;
}
}
}
// Add remaining work to queue
self.work_queue.extend(new_work);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use adze_ir::RuleId;
#[test]
fn test_glr_fork() {
let mut glr = GLRStack::new(StateId(0));
// Create a fork with 3 actions
let actions = vec![
Action::Shift(StateId(1)),
Action::Shift(StateId(2)),
Action::Reduce(RuleId(1)),
];
let fork_result = glr.fork(0, &actions).unwrap();
assert_eq!(fork_result.new_stacks.len(), 3);
assert_eq!(fork_result.deactivate, vec![0]);
}
#[test]
fn test_glr_merge() {
let mut glr = GLRStack::new(StateId(0));
// Add another stack at the same state
let mut stack2 = glr.stacks[0].clone();
stack2.id = 1;
glr.stacks.push(stack2);
// Both stacks at state 0, position 0
let merge_groups = glr.check_merge(StateId(0), 0);
assert_eq!(merge_groups.len(), 1);
assert_eq!(merge_groups[0].len(), 2);
// Merge them
let merged_idx = glr.merge(&merge_groups[0]).unwrap();
assert_eq!(merged_idx, 0);
assert!(!glr.stacks[1].active);
}
}