1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
//! GLR parse forest representation and manipulation.
#![cfg_attr(feature = "strict_docs", allow(missing_docs))]
// GLR Parse Forest implementation
// This module implements a Shared Packed Parse Forest (SPPF) for efficient
// representation of ambiguous parse trees.
use crate::parser_v4::ParseNode;
use crate::stack_pool::StackPool;
use adze_ir::{RuleId, SymbolId};
use std::collections::HashMap;
use std::rc::Rc;
/// A node in the parse forest that can represent multiple parse trees
#[derive(Debug, Clone)]
pub enum ForestNode {
/// Terminal node (leaf)
Terminal {
symbol: SymbolId,
start: usize,
end: usize,
text: Vec<u8>,
},
/// Non-terminal node with one or more interpretations
NonTerminal {
symbol: SymbolId,
start: usize,
end: usize,
/// Alternative interpretations of this non-terminal
alternatives: Vec<PackedNode>,
},
}
/// A packed node represents one way to derive a non-terminal
#[derive(Debug, Clone)]
pub struct PackedNode {
/// The rule that was used for this derivation
pub rule_id: RuleId,
/// The children in this derivation
pub children: Vec<Rc<ForestNode>>,
}
/// Graph-Structured Stack (GSS) for GLR parsing
/// This allows sharing of common prefixes between different parse paths
#[derive(Debug)]
pub struct GSSNode {
/// Parser state
pub state: usize,
/// Links to parent nodes (can have multiple parents in GLR)
pub parents: Vec<GSSLink>,
/// Unique identifier
pub id: usize,
}
/// A link in the GSS representing a reduction path
#[derive(Debug, Clone)]
pub struct GSSLink {
/// Parent GSS node
pub parent: usize, // Index into GSS node pool
/// Parse tree node created by this link
pub tree_node: Rc<ForestNode>,
}
/// Statistics for GLR parsing
#[derive(Debug, Default, Clone)]
pub struct GLRStats {
/// Total number of GSS nodes created
pub total_nodes_created: usize,
/// Maximum number of active heads at any point
pub max_active_heads: usize,
/// Total number of forks performed
pub total_forks: usize,
/// Total number of merges performed
pub total_merges: usize,
/// Number of shared forest nodes (cache hits)
pub forest_cache_hits: usize,
}
/// GLR parser state that supports forking
pub struct GLRParserState {
/// Pool of GSS nodes
pub gss_nodes: Vec<GSSNode>,
/// Active GSS node heads (frontier of parsing)
pub active_heads: Vec<usize>,
/// Next GSS node ID
pub next_gss_id: usize,
/// Cache for sharing forest nodes
pub forest_cache: HashMap<(SymbolId, usize, usize), Rc<ForestNode>>,
/// Statistics for performance monitoring
pub stats: GLRStats,
/// Stack pool for efficient memory reuse
pub stack_pool: Rc<StackPool<usize>>,
}
impl Default for GLRParserState {
fn default() -> Self {
Self::new()
}
}
impl GLRParserState {
pub fn new() -> Self {
let mut state = Self {
gss_nodes: Vec::new(),
active_heads: Vec::new(),
next_gss_id: 0,
forest_cache: HashMap::new(),
stats: GLRStats::default(),
stack_pool: Rc::new(StackPool::new(64)), // Default pool size
};
// Create initial GSS node for state 0
let initial_node = GSSNode {
state: 0,
parents: Vec::new(),
id: 0,
};
state.gss_nodes.push(initial_node);
state.active_heads.push(0);
state.next_gss_id = 1;
state.stats.total_nodes_created = 1;
state.stats.max_active_heads = 1;
state
}
/// Fork the parser state for handling ambiguity
pub fn fork(&mut self, _gss_node_idx: usize, new_state: usize) -> usize {
// Check if a node with this state already exists at this position
for &head in &self.active_heads {
if self.gss_nodes[head].state == new_state {
// Reuse existing node
return head;
}
}
// Create new GSS node
let new_node = GSSNode {
state: new_state,
parents: Vec::new(),
id: self.next_gss_id,
};
self.next_gss_id += 1;
let new_idx = self.gss_nodes.len();
self.gss_nodes.push(new_node);
self.active_heads.push(new_idx);
// Update statistics
self.stats.total_nodes_created += 1;
self.stats.total_forks += 1;
self.stats.max_active_heads = self.stats.max_active_heads.max(self.active_heads.len());
new_idx
}
/// Create or retrieve a cached forest node
pub fn get_or_create_forest_node(
&mut self,
symbol: SymbolId,
start: usize,
end: usize,
create_fn: impl FnOnce() -> ForestNode,
) -> Rc<ForestNode> {
let key = (symbol, start, end);
if let Some(node) = self.forest_cache.get(&key) {
self.stats.forest_cache_hits += 1;
return node.clone();
}
let node = Rc::new(create_fn());
self.forest_cache.insert(key, node.clone());
node
}
/// Merge parse trees when multiple derivations lead to the same state
pub fn merge_trees(
&mut self,
symbol: SymbolId,
start: usize,
end: usize,
new_alternative: PackedNode,
) -> Rc<ForestNode> {
let key = (symbol, start, end);
if let Some(existing) = self.forest_cache.get(&key) {
// Cannot mutate through Rc, need to create new node with merged alternatives
if let ForestNode::NonTerminal { alternatives, .. } = existing.as_ref() {
let mut new_alts = alternatives.clone();
new_alts.push(new_alternative);
let merged = Rc::new(ForestNode::NonTerminal {
symbol,
start,
end,
alternatives: new_alts,
});
self.forest_cache.insert(key, merged.clone());
self.stats.total_merges += 1;
merged
} else {
existing.clone()
}
} else {
// Create new non-terminal with single alternative
let node = Rc::new(ForestNode::NonTerminal {
symbol,
start,
end,
alternatives: vec![new_alternative],
});
self.forest_cache.insert(key, node.clone());
node
}
}
/// Get a reference to the parser statistics
pub fn get_stats(&self) -> &GLRStats {
&self.stats
}
}
/// Convert a forest node to a single parse tree (picking first alternative)
pub fn forest_to_parse_tree(forest: &ForestNode) -> ParseNode {
match forest {
ForestNode::Terminal {
symbol, start, end, ..
} => ParseNode {
symbol: *symbol,
symbol_id: *symbol,
start_byte: *start,
end_byte: *end,
children: Vec::new(),
field_name: None,
},
ForestNode::NonTerminal {
symbol,
start,
end,
alternatives,
} => {
// For now, just pick the first alternative if present
let children = alternatives
.first()
.map(|first_alt| {
first_alt
.children
.iter()
.map(|child| forest_to_parse_tree(child))
.collect()
})
.unwrap_or_else(Vec::new);
ParseNode {
symbol: *symbol,
symbol_id: *symbol,
start_byte: *start,
end_byte: *end,
children,
field_name: None,
}
}
}
}
/// Convert a forest node to parse trees for all alternatives.
pub fn forest_to_parse_trees(forest: &ForestNode) -> Vec<ParseNode> {
match forest {
ForestNode::Terminal {
symbol, start, end, ..
} => vec![ParseNode {
symbol: *symbol,
symbol_id: *symbol,
start_byte: *start,
end_byte: *end,
children: Vec::new(),
field_name: None,
}],
ForestNode::NonTerminal {
symbol,
start,
end,
alternatives,
} => {
if alternatives.is_empty() {
vec![ParseNode {
symbol: *symbol,
symbol_id: *symbol,
start_byte: *start,
end_byte: *end,
children: Vec::new(),
field_name: None,
}]
} else {
alternatives
.iter()
.map(|alt| ParseNode {
symbol: *symbol,
symbol_id: *symbol,
start_byte: *start,
end_byte: *end,
children: alt
.children
.iter()
.map(|child| forest_to_parse_tree(child))
.collect(),
field_name: None,
})
.collect()
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn forest_to_parse_tree_with_no_alternatives_is_stable() {
let forest = ForestNode::NonTerminal {
symbol: SymbolId(7),
start: 0,
end: 2,
alternatives: vec![],
};
let node = forest_to_parse_tree(&forest);
let alternatives = forest_to_parse_trees(&forest);
assert_eq!(node.symbol, SymbolId(7));
assert!(node.children.is_empty());
assert_eq!(alternatives.len(), 1);
assert_eq!(alternatives[0].symbol, SymbolId(7));
assert!(alternatives[0].children.is_empty());
}
}