Skip to main content

json_eval_rs/jsoneval/
cache.rs

1use super::JSONEval;
2use crate::jsoneval::eval_cache::{CacheKey, CacheStats};
3use crate::jsoneval::eval_data::EvalData;
4use crate::jsoneval::path_utils;
5
6
7use indexmap::IndexSet;
8use serde_json::Value;
9
10impl JSONEval {
11    /// Check if a dependency should be part of the cache key
12    /// Check if a dependency should be cached
13    /// Caches everything except keys starting with $ (except $context)
14    #[inline]
15    pub fn should_cache_dependency(&self, key: &str) -> bool {
16        if key.starts_with("/$") || key.starts_with('$') {
17            // Only cache $context, exclude other $ keys like $params
18            key.starts_with("$context.") || key.starts_with("/$context") || key.starts_with("$context/")
19                || key.starts_with("$params.") || key.starts_with("/$params") || key.starts_with("$params/")
20        } else {
21            true
22        }
23    }
24
25    /// Try to get a result from cache
26    /// Helper: Try to get cached result for an evaluation (thread-safe)
27    /// Try to get a value from cache if it exists and dependencies match
28    pub(crate) fn try_get_cached(
29        &self,
30        eval_key: &str,
31        eval_data_snapshot: &EvalData,
32        missed_keys: &dashmap::DashSet<String>,
33    ) -> Option<Value> {
34        // Skip cache lookup if caching is disabled
35        if !self.cache_enabled {
36            return None;
37        }
38
39        // Get dependencies for this evaluation
40        let deps = self.dependencies.get(eval_key)?;
41
42        // If no dependencies, use simple cache key
43        let cache_key = if deps.is_empty() {
44            CacheKey::simple(eval_key.to_string())
45        } else {
46            // If any dependency missed the cache in this batch, force a cache miss
47            // This prevents false cache hits when large data arrays are intentionally skipped in hashes
48            if deps.iter().any(|dep| missed_keys.contains(dep)) {
49                return None;
50            }
51
52            // Filter dependencies (exclude $ keys except $context)
53            let filtered_deps: IndexSet<String> = deps
54                .iter()
55                .filter(|dep_key| self.should_cache_dependency(dep_key) && dep_key.as_str() != path_utils::normalize_to_json_pointer(eval_key))
56                .cloned()
57                .collect();
58
59            // Collect dependency values
60            let dep_values: Vec<(String, &Value)> = filtered_deps
61                .iter()
62                .filter_map(|dep_key| {
63                    eval_data_snapshot.get(dep_key).and_then(|v| {
64                        // Skip large arrays that contain no actionable schema keys
65                        // to avoid hashing large amounts of pure data
66                        if let Value::Array(arr) = v {
67                            if arr.len() > 10 && !crate::parse_schema::common::has_actionable_keys(v) {
68                                return None;
69                            }
70                        }
71                        Some((dep_key.clone(), v))
72                    })
73                })
74                .collect();
75
76            CacheKey::new(eval_key.to_string(), &filtered_deps, &dep_values)            
77        };
78
79        // Try cache lookup (zero-copy via Arc, thread-safe)
80        self.eval_cache
81            .get(&cache_key)
82            .map(|arc_val| (*arc_val).clone())
83    }
84
85    /// Cache a result
86    /// Helper: Store evaluation result in cache (thread-safe)
87    pub(crate) fn cache_result(
88        &self,
89        eval_key: &str,
90        value: Value,
91        eval_data_snapshot: &EvalData,
92    ) {
93        // Skip cache insertion if caching is disabled
94        if !self.cache_enabled {
95            return;
96        }
97
98        // Get dependencies for this evaluation
99        let deps = match self.dependencies.get(eval_key) {
100            Some(d) => d,
101            None => {
102                // No dependencies - use simple cache key
103                let cache_key = CacheKey::simple(eval_key.to_string());
104                self.eval_cache.insert(cache_key, value);
105                return;
106            }
107        };
108
109        // Filter and collect dependency values (exclude $ keys except $context)
110        let filtered_deps: IndexSet<String> = deps
111            .iter()
112            .filter(|dep_key| self.should_cache_dependency(dep_key) && dep_key.as_str() != path_utils::normalize_to_json_pointer(eval_key))
113            .cloned()
114            .collect();
115
116        let dep_values: Vec<(String, &Value)> = filtered_deps
117            .iter()
118            .filter_map(|dep_key| {
119                eval_data_snapshot.get(dep_key).and_then(|v| {
120                    if let Value::Array(arr) = v {
121                        if arr.len() > 10 && !crate::parse_schema::common::has_actionable_keys(v) {
122                            return None;
123                        }
124                    }
125                    Some((dep_key.clone(), v))
126                })
127            })
128            .collect();
129
130        let cache_key = CacheKey::new(eval_key.to_string(), &filtered_deps, &dep_values);
131        
132        self.eval_cache.insert(cache_key, value);
133
134    }
135
136    /// Purge cache entries affected by changed data paths, comparing old and new values
137    /// Selectively purge cache entries that depend on changed data paths
138    /// Only removes cache entries whose dependencies intersect with changed_paths
139    /// Recursively find all paths where the values differ
140    fn compute_changed_paths(old: &Value, new: &Value, current_path: String, changed: &mut Vec<String>) {
141        if old == new {
142            return;
143        }
144
145        match (old, new) {
146            (Value::Object(o1), Value::Object(o2)) => {
147                for (k, v2) in o2 {
148                    if let Some(v1) = o1.get(k) {
149                        if v1 != v2 {
150                            let path = format!("{}/{}", current_path, k);
151                            Self::compute_changed_paths(v1, v2, path, changed);
152                        }
153                    } else {
154                        let path = format!("{}/{}", current_path, k);
155                        changed.push(path);
156                    }
157                }
158                for k in o1.keys() {
159                    if !o2.contains_key(k) {
160                        let path = format!("{}/{}", current_path, k);
161                        changed.push(path);
162                    }
163                }
164            }
165            (Value::Array(a1), Value::Array(a2)) if a1.len() == a2.len() => {
166                for (i, (v1, v2)) in a1.iter().zip(a2.iter()).enumerate() {
167                    if v1 != v2 {
168                        let path = format!("{}/{}", current_path, i);
169                        Self::compute_changed_paths(v1, v2, path, changed);
170                    }
171                }
172            }
173            _ => {
174                if current_path.is_empty() {
175                    changed.push("/".to_string());
176                } else {
177                    changed.push(current_path);
178                }
179            }
180        }
181    }
182
183    /// Compares old vs new values by deep diffing and purges affected entries
184    pub fn purge_cache_for_changed_data_with_comparison(
185        &self,
186        old_data: &Value,
187        new_data: &Value,
188    ) {
189        let mut actually_changed_paths = Vec::new();
190        Self::compute_changed_paths(old_data, new_data, "".to_string(), &mut actually_changed_paths);
191
192        // If no values actually changed, no need to purge
193        if actually_changed_paths.is_empty() {
194            return;
195        }
196
197        // Find all eval_keys that depend on the actually changed data paths
198        let mut affected_eval_keys = IndexSet::new();
199
200        for (eval_key, deps) in self.dependencies.iter() {
201            // Check if this evaluation depends on any of the changed paths
202            let is_affected = deps.iter().any(|dep| {
203                // Check if the dependency matches any changed path
204                actually_changed_paths.iter().any(|changed_path| {
205                    // Exact match or prefix match (for nested fields)
206                    dep == changed_path
207                        || dep.starts_with(&format!("{}/", changed_path))
208                        || changed_path.starts_with(&format!("{}/", dep))
209                })
210            });
211
212            if is_affected {
213                affected_eval_keys.insert(eval_key.clone());
214            }
215        }
216
217        // Remove all cache entries for affected eval_keys using retain
218        // Keep entries whose eval_key is NOT in the affected set
219        self.eval_cache
220            .retain(|cache_key, _| !affected_eval_keys.contains(&cache_key.eval_key));
221    }
222
223    /// Selectively purge cache entries that depend on changed data paths
224    /// Finds all eval_keys that depend on the changed paths and removes them
225    /// Selectively purge cache entries that depend on changed data paths
226    /// Simpler version without value comparison for cases where we don't have old data
227    pub fn purge_cache_for_changed_data(&self, changed_data_paths: &[String]) {
228        if changed_data_paths.is_empty() {
229            return;
230        }
231
232        // Find all eval_keys that depend on the changed paths
233        let mut affected_eval_keys = IndexSet::new();
234
235        for (eval_key, deps) in self.dependencies.iter() {
236            // Check if this evaluation depends on any of the changed paths
237            let is_affected = deps.iter().any(|dep| {
238                // Check if dependency path matches any changed data path using flexible matching
239                changed_data_paths.iter().any(|changed_for_purge| {
240                    // Check both directions:
241                    // 1. Dependency matches changed data (dependency is child of change)
242                    // 2. Changed data matches dependency (change is child of dependency)
243                    Self::paths_match_flexible(dep, changed_for_purge)
244                        || Self::paths_match_flexible(changed_for_purge, dep)
245                })
246            });
247
248            if is_affected {
249                affected_eval_keys.insert(eval_key.clone());
250            }
251        }
252
253        // Remove all cache entries for affected eval_keys using retain
254        // Keep entries whose eval_key is NOT in the affected set
255        self.eval_cache
256            .retain(|cache_key, _| !affected_eval_keys.contains(&cache_key.eval_key));
257    }
258
259    /// Flexible path matching that handles structural schema keywords (e.g. properties, oneOf)
260    /// Returns true if schema_path structurally matches data_path
261    fn paths_match_flexible(schema_path: &str, data_path: &str) -> bool {
262        let s_segs: Vec<&str> = schema_path
263            .trim_start_matches('#')
264            .trim_start_matches('/')
265            .split('/')
266            .filter(|s| !s.is_empty())
267            .collect();
268        let d_segs: Vec<&str> = data_path
269            .trim_start_matches('/')
270            .split('/')
271            .filter(|s| !s.is_empty())
272            .collect();
273
274        let mut d_idx = 0;
275
276        for s_seg in s_segs {
277            // If we matched all data segments, we are good (schema is deeper/parent)
278            if d_idx >= d_segs.len() {
279                return true;
280            }
281
282            let d_seg = d_segs[d_idx];
283
284            if s_seg == d_seg {
285                // Exact match, advance data pointer
286                d_idx += 1;
287            } else if s_seg == "items"
288                || s_seg == "additionalProperties"
289                || s_seg == "patternProperties"
290            {
291                // Wildcard match for arrays/maps - consume data segment if it looks valid
292                // Note: items matches array index (numeric). additionalProperties matches any key.
293                if s_seg == "items" {
294                    // Only match if data segment is numeric (array index)
295                    if d_seg.chars().all(|c| c.is_ascii_digit()) {
296                        d_idx += 1;
297                    }
298                } else {
299                    // additionalProperties/patternProperties matches any string key
300                    d_idx += 1;
301                }
302            } else if Self::is_structural_keyword(s_seg)
303                || s_seg.chars().all(|c| c.is_ascii_digit())
304            {
305                // Skip structural keywords (properties, oneOf, etc) and numeric indices in schema (e.g. oneOf/0)
306                continue;
307            } else {
308                // Mismatch: schema has a named segment that data doesn't have
309                return false;
310            }
311        }
312
313        // Return true if we consumed all data segments
314        true
315    }
316    
317    /// Purge cache entries affected by context changes
318    /// Purge cache entries that depend on context
319    pub fn purge_cache_for_context_change(&self) {
320        // Find all eval_keys that depend on $context
321        let mut affected_eval_keys = IndexSet::new();
322
323        for (eval_key, deps) in self.dependencies.iter() {
324            let is_affected = deps.iter().any(|dep| {
325                dep == "$context" || dep.starts_with("$context.") || dep.starts_with("/$context")
326            });
327
328            if is_affected {
329                affected_eval_keys.insert(eval_key.clone());
330            }
331        }
332
333        self.eval_cache
334            .retain(|cache_key, _| !affected_eval_keys.contains(&cache_key.eval_key));
335    }
336
337    /// Get cache statistics
338    pub fn cache_stats(&self) -> CacheStats {
339        self.eval_cache.stats()
340    }
341
342    /// Clear the cache manually
343    pub fn clear_cache(&self) {
344        self.eval_cache.clear();
345    }
346
347    /// Enable caching
348    pub fn enable_cache(&mut self) {
349        self.cache_enabled = true;
350        for subform in self.subforms.values_mut() {
351            subform.enable_cache();
352        }
353    }
354
355    /// Disable caching
356    pub fn disable_cache(&mut self) {
357        self.cache_enabled = false;
358        self.eval_cache.clear();
359        for subform in self.subforms.values_mut() {
360            subform.disable_cache();
361        }
362    }
363    
364    /// Check if cache is enabled
365    pub fn is_cache_enabled(&self) -> bool {
366        self.cache_enabled
367    }
368
369    /// Helper to check if a key is a structural JSON Schema keyword
370    /// Helper to check if a key is a structural JSON Schema keyword
371    fn is_structural_keyword(key: &str) -> bool {
372        matches!(
373            key,
374            "properties"
375                | "definitions"
376                | "$defs"
377                | "allOf"
378                | "anyOf"
379                | "oneOf"
380                | "not"
381                | "if"
382                | "then"
383                | "else"
384                | "dependentSchemas"
385                | "$params"
386                | "dependencies"
387        )
388    }
389    
390    /// Get cache size
391    pub fn cache_len(&self) -> usize {
392        self.eval_cache.len()
393    }
394}