sipha_memo/
lib.rs

1//! Memoization support for packrat parsing.
2//!
3//! This module provides memoization tables for caching parse results at specific
4//! positions, enabling packrat parsing which can handle left-recursive grammars
5//! and improve performance for ambiguous grammars.
6
7use std::collections::HashMap;
8
9use sipha_core::traits::{NodeId, RuleId, TokenKind};
10use sipha_error::ParseError;
11
12/// Memo entry storing a cached parse result.
13#[derive(Clone, Debug)]
14pub enum MemoEntry<K: TokenKind, R: RuleId, N: NodeId> {
15    /// Successful parse result.
16    Success {
17        /// The resulting node.
18        node: N,
19        /// Number of tokens consumed.
20        consumed: usize,
21    },
22    /// Failed parse (with error).
23    Failure {
24        /// The parse error.
25        error: ParseError<K, R>,
26        /// Number of tokens consumed before failure.
27        consumed: usize,
28    },
29}
30
31/// Memoization table for packrat parsing.
32///
33/// This table caches parse results keyed by (rule_id, position) pairs.
34/// When the same rule is parsed at the same position, the cached result
35/// is returned instead of re-parsing.
36///
37/// # When to Use
38///
39/// Use `MemoTable` when:
40/// - Parsing left-recursive grammars (required)
41/// - Parsing ambiguous grammars (improves performance)
42/// - Parsing large inputs with repeated patterns
43/// - Optimizing parser performance
44///
45/// # Usage
46///
47/// ```rust
48/// use sipha_memo::MemoTable;
49/// use sipha_tree::RawNodeId;
50///
51/// let mut memo: MemoTable<Token, Rule, RawNodeId> = MemoTable::new();
52///
53/// // Store successful parse
54/// memo.store_success(Rule::Expr, 0, RawNodeId(42), 5);
55///
56/// // Retrieve cached result
57/// if let Some(entry) = memo.get(Rule::Expr, 0) {
58///     match entry {
59///         MemoEntry::Success { node, consumed } => {
60///             // Use cached result
61///         }
62///         MemoEntry::Failure { error, consumed } => {
63///             // Use cached failure
64///         }
65///     }
66/// }
67/// ```
68///
69/// # Performance
70///
71/// - **Memory**: ~64 bytes per cache entry
72/// - **Lookup**: O(1) average case
73/// - **Storage**: O(n) where n is number of unique (rule, position) pairs
74///
75/// # Enabling in Parser
76///
77/// Enable memoization when creating `ParserState`:
78///
79/// ```rust
80/// let mut state = ParserState::new(&tokens, &mut arena, true, ());
81/// //                                                    ^^^
82/// //                                              Enable memoization
83/// ```
84pub struct MemoTable<K: TokenKind, R: RuleId, N: NodeId> {
85    /// Cache mapping (rule_id, position) -> MemoEntry
86    cache: HashMap<(R, usize), MemoEntry<K, R, N>>,
87    /// Whether memoization is enabled.
88    enabled: bool,
89}
90
91impl<K: TokenKind, R: RuleId, N: NodeId> MemoTable<K, R, N> {
92    /// Create a new empty memo table.
93    pub fn new() -> Self {
94        Self {
95            cache: HashMap::new(),
96            enabled: true,
97        }
98    }
99
100    /// Create a new memo table with memoization disabled.
101    pub fn disabled() -> Self {
102        Self {
103            cache: HashMap::new(),
104            enabled: false,
105        }
106    }
107
108    /// Enable or disable memoization.
109    pub fn set_enabled(&mut self, enabled: bool) {
110        self.enabled = enabled;
111        if !enabled {
112            self.clear();
113        }
114    }
115
116    /// Check if memoization is enabled.
117    pub fn is_enabled(&self) -> bool {
118        self.enabled
119    }
120
121    /// Look up a cached result for a rule at a specific position.
122    pub fn get(&self, rule_id: R, position: usize) -> Option<&MemoEntry<K, R, N>> {
123        if !self.enabled {
124            return None;
125        }
126        self.cache.get(&(rule_id, position))
127    }
128
129    /// Store a successful parse result in the cache.
130    pub fn store_success(&mut self, rule_id: R, position: usize, node: N, consumed: usize) {
131        if !self.enabled {
132            return;
133        }
134        self.cache
135            .insert((rule_id, position), MemoEntry::Success { node, consumed });
136    }
137
138    /// Store a failed parse result in the cache.
139    pub fn store_failure(
140        &mut self,
141        rule_id: R,
142        position: usize,
143        error: ParseError<K, R>,
144        consumed: usize,
145    ) {
146        if !self.enabled {
147            return;
148        }
149        self.cache
150            .insert((rule_id, position), MemoEntry::Failure { error, consumed });
151    }
152
153    /// Clear all cached entries.
154    pub fn clear(&mut self) {
155        self.cache.clear();
156    }
157
158    /// Get the number of cached entries.
159    pub fn len(&self) -> usize {
160        self.cache.len()
161    }
162
163    /// Check if the cache is empty.
164    pub fn is_empty(&self) -> bool {
165        self.cache.is_empty()
166    }
167
168    /// Get statistics about memoization performance.
169    ///
170    /// Returns a struct containing cache hit/miss information and
171    /// memory usage estimates.
172    pub fn statistics(&self) -> MemoStatistics {
173        let total_entries = self.cache.len();
174        let success_count = self
175            .cache
176            .values()
177            .filter(|entry| matches!(entry, MemoEntry::Success { .. }))
178            .count();
179        let failure_count = total_entries - success_count;
180
181        MemoStatistics {
182            total_entries,
183            success_count,
184            failure_count,
185            enabled: self.enabled,
186        }
187    }
188}
189
190/// Statistics about memoization performance.
191#[derive(Clone, Debug)]
192pub struct MemoStatistics {
193    /// Total number of cached entries.
194    pub total_entries: usize,
195    /// Number of successful parse results cached.
196    pub success_count: usize,
197    /// Number of failed parse results cached.
198    pub failure_count: usize,
199    /// Whether memoization is currently enabled.
200    pub enabled: bool,
201}
202
203impl MemoStatistics {
204    /// Get the hit rate as a percentage (0.0 to 100.0).
205    ///
206    /// Note: This is a simplified metric. In practice, you'd track
207    /// actual hits vs misses during parsing.
208    pub fn estimated_memory_bytes(&self) -> usize {
209        // Rough estimate: each entry is ~64 bytes (rule_id + position key + entry overhead)
210        self.total_entries * 64
211    }
212}
213
214impl<K: TokenKind, R: RuleId, N: NodeId> Default for MemoTable<K, R, N> {
215    fn default() -> Self {
216        Self::new()
217    }
218}
219
220/// Prelude module containing commonly used types.
221pub mod prelude {
222    pub use crate::{MemoEntry, MemoTable};
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use sipha_core::traits::TokenKind;
229    use sipha_tree::RawNodeId;
230
231    #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
232    #[allow(dead_code)]
233    enum TestToken {
234        A,
235    }
236
237    impl TokenKind for TestToken {
238        fn is_trivia(&self) -> bool {
239            false
240        }
241    }
242
243    #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
244    enum TestRule {
245        Expr,
246    }
247
248    #[test]
249    fn test_memo_basic() {
250        let mut memo: MemoTable<TestToken, TestRule, RawNodeId> = MemoTable::new();
251        memo.store_success(TestRule::Expr, 0, RawNodeId(0), 5);
252        assert_eq!(memo.len(), 1);
253        assert!(memo.get(TestRule::Expr, 0).is_some());
254    }
255}