Skip to main content

oak_core/parser/
cache.rs

1use crate::{
2    Language,
3    lexer::{LexOutput, LexerCache, Token},
4    memory::arena::SyntaxArena,
5    parser::session::ParseCache,
6    source::Source,
7    tree::GreenNode,
8};
9use std::{collections::HashMap, hash::Hasher, sync::Arc};
10use twox_hash::XxHash64;
11
12/// A content-based cache for parsed results.
13///
14/// This cache stores parsed results based on the hash of the source content,
15/// allowing for efficient reuse of parsing results when processing the same content multiple times.
16pub struct ContentCache<L: Language + Send + Sync + 'static> {
17    /// Cache entries mapping content hash to parsed results.
18    entries: HashMap<u64, CacheEntry<L>>,
19    /// Maximum number of entries to keep in the cache.
20    max_entries: usize,
21    /// Current number of entries in the cache.
22    entry_count: usize,
23}
24
25/// A single entry in the content cache.
26struct CacheEntry<L: Language + Send + Sync + 'static> {
27    /// The parsed green node.
28    root: Arc<GreenNode<'static, L>>,
29    /// The lex output associated with this entry.
30    lex_output: LexOutput<L>,
31}
32
33impl<L: Language + Send + Sync + 'static> ContentCache<L> {
34    /// Creates a new content cache with the specified maximum size.
35    pub fn new(max_entries: usize) -> Self {
36        Self { entries: HashMap::new(), max_entries, entry_count: 0 }
37    }
38
39    /// Computes a hash for the given source content.
40    fn hash_content<S: Source + ?Sized>(source: &S) -> u64 {
41        let mut hasher = XxHash64::default();
42        let text = source.get_text_from(0);
43        hasher.write(text.as_bytes());
44        hasher.finish()
45    }
46
47    /// Gets a cached entry for the given source content if it exists.
48    pub fn get<S: Source + ?Sized>(&self, source: &S) -> Option<(&GreenNode<'_, L>, &LexOutput<L>)> {
49        let hash = Self::hash_content(source);
50        self.entries.get(&hash).map(|entry| {
51            let root: &GreenNode<'_, L> = unsafe { std::mem::transmute(&*entry.root) };
52            (root, &entry.lex_output)
53        })
54    }
55
56    /// Inserts a new entry into the cache.
57    pub fn insert<S: Source + ?Sized>(&mut self, source: &S, root: &GreenNode<'_, L>, lex_output: LexOutput<L>) {
58        let hash = Self::hash_content(source);
59
60        // Remove existing entry if it exists
61        if self.entries.contains_key(&hash) {
62            self.entries.remove(&hash);
63            self.entry_count -= 1;
64        }
65
66        // Evict oldest entries if cache is full
67        while self.entry_count >= self.max_entries && !self.entries.is_empty() {
68            // Remove the first entry (FIFO eviction)
69            if let Some(key) = self.entries.keys().next().cloned() {
70                self.entries.remove(&key);
71                self.entry_count -= 1;
72            }
73        }
74
75        // Insert new entry
76        let root: Arc<GreenNode<'static, L>> = unsafe { Arc::from_raw(Arc::into_raw(Arc::from(root)) as *const GreenNode<'static, L>) };
77
78        self.entries.insert(hash, CacheEntry { root, lex_output });
79        self.entry_count += 1;
80    }
81
82    /// Clears all entries from the cache.
83    pub fn clear(&mut self) {
84        self.entries.clear();
85        self.entry_count = 0;
86    }
87
88    /// Returns the current number of entries in the cache.
89    pub fn len(&self) -> usize {
90        self.entry_count
91    }
92
93    /// Returns true if the cache is empty.
94    pub fn is_empty(&self) -> bool {
95        self.entry_count == 0
96    }
97}
98
99impl<L: Language + Send + Sync + 'static> Default for ContentCache<L> {
100    fn default() -> Self {
101        Self::new(100) // Default to 100 entries
102    }
103}
104
105/// A ParseCache implementation that wraps another ParseCache and adds content-based caching.
106pub struct CachingParseSession<L: Language + Send + Sync + 'static, C: ParseCache<L>> {
107    /// The underlying parse session.
108    inner: C,
109    /// The content cache for storing parsed results.
110    content_cache: ContentCache<L>,
111}
112
113impl<L: Language + Send + Sync + 'static, C: ParseCache<L>> CachingParseSession<L, C> {
114    /// Creates a new caching parse session.
115    pub fn new(inner: C, max_cache_entries: usize) -> Self {
116        Self { inner, content_cache: ContentCache::new(max_cache_entries) }
117    }
118
119    /// Returns a reference to the content cache.
120    pub fn content_cache(&self) -> &ContentCache<L> {
121        &self.content_cache
122    }
123
124    /// Returns a mutable reference to the content cache.
125    pub fn content_cache_mut(&mut self) -> &mut ContentCache<L> {
126        &mut self.content_cache
127    }
128
129    /// Returns a reference to the inner parse session.
130    pub fn inner(&self) -> &C {
131        &self.inner
132    }
133
134    /// Returns a mutable reference to the inner parse session.
135    pub fn inner_mut(&mut self) -> &mut C {
136        &mut self.inner
137    }
138}
139
140impl<L: Language + Send + Sync + 'static, C: ParseCache<L>> ParseCache<L> for CachingParseSession<L, C> {
141    fn arena(&self) -> &SyntaxArena {
142        self.inner.arena()
143    }
144
145    fn old_tree(&self) -> Option<&GreenNode<'_, L>> {
146        self.inner.old_tree()
147    }
148
149    fn lex_output(&self) -> Option<&LexOutput<L>> {
150        self.inner.lex_output()
151    }
152
153    fn prepare_generation(&mut self) {
154        self.inner.prepare_generation()
155    }
156
157    fn commit_generation(&self, root: &GreenNode<L>) {
158        self.inner.commit_generation(root)
159    }
160}
161
162impl<L: Language + Send + Sync + 'static, C: ParseCache<L>> LexerCache<L> for CachingParseSession<L, C> {
163    fn set_lex_output(&mut self, output: LexOutput<L>) {
164        self.inner.set_lex_output(output)
165    }
166
167    fn get_token(&self, index: usize) -> Option<Token<L::TokenType>> {
168        self.inner.get_token(index)
169    }
170
171    fn count_tokens(&self) -> usize {
172        self.inner.count_tokens()
173    }
174
175    fn has_tokens(&self) -> bool {
176        self.inner.has_tokens()
177    }
178
179    fn get_tokens(&self) -> Option<&[Token<L::TokenType>]> {
180        self.inner.get_tokens()
181    }
182}
183
184/// Implementation of BuilderCache for CachingParseSession.
185impl<L: Language + Send + Sync + 'static, C: ParseCache<L> + crate::builder::BuilderCache<L>> crate::builder::BuilderCache<L> for CachingParseSession<L, C> {
186    fn get_typed_node<T: std::any::Any + Clone + Send + Sync>(&self, node: &GreenNode<L>) -> Option<T> {
187        self.inner.get_typed_node(node)
188    }
189
190    fn set_typed_node<T: std::any::Any + Send + Sync>(&mut self, node: &GreenNode<L>, value: T) {
191        self.inner.set_typed_node(node, value)
192    }
193}