sipha_memo/lib.rs
1//! Memoization support for packrat parsing.
2//!
3//! This module provides memoization tables for caching parse results at specific
4//! positions, enabling packrat parsing which can handle left-recursive grammars
5//! and improve performance for ambiguous grammars.
6
7use std::collections::HashMap;
8
9use sipha_core::traits::{NodeId, RuleId, TokenKind};
10use sipha_error::ParseError;
11
12/// Memo entry storing a cached parse result.
13#[derive(Clone, Debug)]
14pub enum MemoEntry<K: TokenKind, R: RuleId, N: NodeId> {
15 /// Successful parse result.
16 Success {
17 /// The resulting node.
18 node: N,
19 /// Number of tokens consumed.
20 consumed: usize,
21 },
22 /// Failed parse (with error).
23 Failure {
24 /// The parse error.
25 error: ParseError<K, R>,
26 /// Number of tokens consumed before failure.
27 consumed: usize,
28 },
29}
30
31/// Memoization table for packrat parsing.
32///
33/// This table caches parse results keyed by (rule_id, position) pairs.
34/// When the same rule is parsed at the same position, the cached result
35/// is returned instead of re-parsing.
36///
37/// # When to Use
38///
39/// Use `MemoTable` when:
40/// - Parsing left-recursive grammars (required)
41/// - Parsing ambiguous grammars (improves performance)
42/// - Parsing large inputs with repeated patterns
43/// - Optimizing parser performance
44///
45/// # Usage
46///
47/// ```rust
48/// use sipha_memo::MemoTable;
49/// use sipha_tree::RawNodeId;
50///
51/// let mut memo: MemoTable<Token, Rule, RawNodeId> = MemoTable::new();
52///
53/// // Store successful parse
54/// memo.store_success(Rule::Expr, 0, RawNodeId(42), 5);
55///
56/// // Retrieve cached result
57/// if let Some(entry) = memo.get(Rule::Expr, 0) {
58/// match entry {
59/// MemoEntry::Success { node, consumed } => {
60/// // Use cached result
61/// }
62/// MemoEntry::Failure { error, consumed } => {
63/// // Use cached failure
64/// }
65/// }
66/// }
67/// ```
68///
69/// # Performance
70///
71/// - **Memory**: ~64 bytes per cache entry
72/// - **Lookup**: O(1) average case
73/// - **Storage**: O(n) where n is number of unique (rule, position) pairs
74///
75/// # Enabling in Parser
76///
77/// Enable memoization when creating `ParserState`:
78///
79/// ```rust
80/// let mut state = ParserState::new(&tokens, &mut arena, true, ());
81/// // ^^^
82/// // Enable memoization
83/// ```
84pub struct MemoTable<K: TokenKind, R: RuleId, N: NodeId> {
85 /// Cache mapping (rule_id, position) -> MemoEntry
86 cache: HashMap<(R, usize), MemoEntry<K, R, N>>,
87 /// Whether memoization is enabled.
88 enabled: bool,
89}
90
91impl<K: TokenKind, R: RuleId, N: NodeId> MemoTable<K, R, N> {
92 /// Create a new empty memo table.
93 pub fn new() -> Self {
94 Self {
95 cache: HashMap::new(),
96 enabled: true,
97 }
98 }
99
100 /// Create a new memo table with memoization disabled.
101 pub fn disabled() -> Self {
102 Self {
103 cache: HashMap::new(),
104 enabled: false,
105 }
106 }
107
108 /// Enable or disable memoization.
109 pub fn set_enabled(&mut self, enabled: bool) {
110 self.enabled = enabled;
111 if !enabled {
112 self.clear();
113 }
114 }
115
116 /// Check if memoization is enabled.
117 pub fn is_enabled(&self) -> bool {
118 self.enabled
119 }
120
121 /// Look up a cached result for a rule at a specific position.
122 pub fn get(&self, rule_id: R, position: usize) -> Option<&MemoEntry<K, R, N>> {
123 if !self.enabled {
124 return None;
125 }
126 self.cache.get(&(rule_id, position))
127 }
128
129 /// Store a successful parse result in the cache.
130 pub fn store_success(&mut self, rule_id: R, position: usize, node: N, consumed: usize) {
131 if !self.enabled {
132 return;
133 }
134 self.cache
135 .insert((rule_id, position), MemoEntry::Success { node, consumed });
136 }
137
138 /// Store a failed parse result in the cache.
139 pub fn store_failure(
140 &mut self,
141 rule_id: R,
142 position: usize,
143 error: ParseError<K, R>,
144 consumed: usize,
145 ) {
146 if !self.enabled {
147 return;
148 }
149 self.cache
150 .insert((rule_id, position), MemoEntry::Failure { error, consumed });
151 }
152
153 /// Clear all cached entries.
154 pub fn clear(&mut self) {
155 self.cache.clear();
156 }
157
158 /// Get the number of cached entries.
159 pub fn len(&self) -> usize {
160 self.cache.len()
161 }
162
163 /// Check if the cache is empty.
164 pub fn is_empty(&self) -> bool {
165 self.cache.is_empty()
166 }
167
168 /// Get statistics about memoization performance.
169 ///
170 /// Returns a struct containing cache hit/miss information and
171 /// memory usage estimates.
172 pub fn statistics(&self) -> MemoStatistics {
173 let total_entries = self.cache.len();
174 let success_count = self
175 .cache
176 .values()
177 .filter(|entry| matches!(entry, MemoEntry::Success { .. }))
178 .count();
179 let failure_count = total_entries - success_count;
180
181 MemoStatistics {
182 total_entries,
183 success_count,
184 failure_count,
185 enabled: self.enabled,
186 }
187 }
188}
189
190/// Statistics about memoization performance.
191#[derive(Clone, Debug)]
192pub struct MemoStatistics {
193 /// Total number of cached entries.
194 pub total_entries: usize,
195 /// Number of successful parse results cached.
196 pub success_count: usize,
197 /// Number of failed parse results cached.
198 pub failure_count: usize,
199 /// Whether memoization is currently enabled.
200 pub enabled: bool,
201}
202
203impl MemoStatistics {
204 /// Get the hit rate as a percentage (0.0 to 100.0).
205 ///
206 /// Note: This is a simplified metric. In practice, you'd track
207 /// actual hits vs misses during parsing.
208 pub fn estimated_memory_bytes(&self) -> usize {
209 // Rough estimate: each entry is ~64 bytes (rule_id + position key + entry overhead)
210 self.total_entries * 64
211 }
212}
213
214impl<K: TokenKind, R: RuleId, N: NodeId> Default for MemoTable<K, R, N> {
215 fn default() -> Self {
216 Self::new()
217 }
218}
219
220/// Prelude module containing commonly used types.
221pub mod prelude {
222 pub use crate::{MemoEntry, MemoTable};
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228 use sipha_core::traits::TokenKind;
229 use sipha_tree::RawNodeId;
230
231 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
232 #[allow(dead_code)]
233 enum TestToken {
234 A,
235 }
236
237 impl TokenKind for TestToken {
238 fn is_trivia(&self) -> bool {
239 false
240 }
241 }
242
243 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
244 enum TestRule {
245 Expr,
246 }
247
248 #[test]
249 fn test_memo_basic() {
250 let mut memo: MemoTable<TestToken, TestRule, RawNodeId> = MemoTable::new();
251 memo.store_success(TestRule::Expr, 0, RawNodeId(0), 5);
252 assert_eq!(memo.len(), 1);
253 assert!(memo.get(TestRule::Expr, 0).is_some());
254 }
255}