Skip to main content

lintel_validation_cache/
lib.rs

1use std::path::PathBuf;
2
3use serde::{Deserialize, Serialize};
4use serde_json::Value;
5use sha2::{Digest, Sha256};
6
7/// Whether a validation result was served from the disk cache or freshly computed.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ValidationCacheStatus {
10    /// Validation result was found in the disk cache.
11    Hit,
12    /// Validation result was computed (cache miss or skip-read mode).
13    Miss,
14}
15
16#[derive(Serialize, Deserialize)]
17struct CachedError {
18    instance_path: String,
19    message: String,
20}
21
22#[derive(Serialize, Deserialize)]
23struct CachedResult {
24    errors: Vec<CachedError>,
25}
26
27/// A disk-backed cache for JSON Schema validation results.
28///
29/// Results are keyed by `SHA-256(file_content + schema_json + validate_formats_byte)`.
30/// Cache files are stored as `<cache_dir>/<sha256-hex>.json`.
31#[derive(Clone)]
32pub struct ValidationCache {
33    cache_dir: PathBuf,
34    skip_read: bool,
35}
36
37impl ValidationCache {
38    pub fn new(cache_dir: PathBuf, skip_read: bool) -> Self {
39        Self {
40            cache_dir,
41            skip_read,
42        }
43    }
44
45    /// Look up a cached validation result.
46    ///
47    /// Returns `(Some(errors), Hit)` on cache hit, where each error is
48    /// `(instance_path, message)`. Returns `(None, Miss)` on cache miss or
49    /// when `skip_read` is set.
50    ///
51    /// `schema_hash` should be obtained from [`schema_hash`] — pass the same
52    /// value for all files in a schema group to avoid redundant serialization.
53    pub async fn lookup(
54        &self,
55        file_content: &str,
56        schema_hash: &str,
57        validate_formats: bool,
58    ) -> (Option<Vec<(String, String)>>, ValidationCacheStatus) {
59        if self.skip_read {
60            return (None, ValidationCacheStatus::Miss);
61        }
62
63        let key = Self::cache_key(file_content, schema_hash, validate_formats);
64        let cache_path = self.cache_dir.join(format!("{key}.json"));
65
66        let Ok(data) = tokio::fs::read_to_string(&cache_path).await else {
67            return (None, ValidationCacheStatus::Miss);
68        };
69
70        let Ok(cached) = serde_json::from_str::<CachedResult>(&data) else {
71            return (None, ValidationCacheStatus::Miss);
72        };
73
74        let errors: Vec<(String, String)> = cached
75            .errors
76            .into_iter()
77            .map(|e| (e.instance_path, e.message))
78            .collect();
79
80        (Some(errors), ValidationCacheStatus::Hit)
81    }
82
83    /// Store a validation result to the disk cache.
84    ///
85    /// Always writes regardless of `skip_read`, so running with
86    /// `--force-validation` repopulates the cache for future runs.
87    ///
88    /// `schema_hash` should be obtained from [`schema_hash`] — pass the same
89    /// value for all files in a schema group to avoid redundant serialization.
90    pub async fn store(
91        &self,
92        file_content: &str,
93        schema_hash: &str,
94        validate_formats: bool,
95        errors: &[(String, String)],
96    ) {
97        let key = Self::cache_key(file_content, schema_hash, validate_formats);
98        let cache_path = self.cache_dir.join(format!("{key}.json"));
99
100        let cached = CachedResult {
101            errors: errors
102                .iter()
103                .map(|(ip, msg)| CachedError {
104                    instance_path: ip.clone(),
105                    message: msg.clone(),
106                })
107                .collect(),
108        };
109
110        let Ok(json) = serde_json::to_string(&cached) else {
111            return;
112        };
113
114        if tokio::fs::create_dir_all(&self.cache_dir).await.is_ok() {
115            let _ = tokio::fs::write(&cache_path, json).await;
116        }
117    }
118
119    /// Compute the SHA-256 cache key from file content, a pre-computed schema hash, and format flag.
120    fn cache_key(file_content: &str, schema_hash: &str, validate_formats: bool) -> String {
121        let mut hasher = Sha256::new();
122        hasher.update(file_content.as_bytes());
123        hasher.update(schema_hash.as_bytes());
124        hasher.update([u8::from(validate_formats)]);
125        format!("{:x}", hasher.finalize())
126    }
127}
128
129/// Compute a SHA-256 hash of a schema `Value`.
130///
131/// Call this once per schema group and pass the result to
132/// [`ValidationCache::lookup`] and [`ValidationCache::store`].
133pub fn schema_hash(schema: &Value) -> String {
134    let mut hasher = Sha256::new();
135    hasher.update(schema.to_string().as_bytes());
136    format!("{:x}", hasher.finalize())
137}
138
139/// Return a usable cache directory for validation results, creating it if necessary.
140///
141/// Tries `<system_cache>/lintel/validations` first, falling back to
142/// `<temp_dir>/lintel/validations` when the preferred path is unwritable.
143pub fn ensure_cache_dir() -> PathBuf {
144    let candidates = [
145        dirs::cache_dir().map(|d| d.join("lintel").join("validations")),
146        Some(std::env::temp_dir().join("lintel").join("validations")),
147    ];
148    for candidate in candidates.into_iter().flatten() {
149        if std::fs::create_dir_all(&candidate).is_ok() {
150            return candidate;
151        }
152    }
153    std::env::temp_dir().join("lintel").join("validations")
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    fn sample_schema() -> Value {
161        serde_json::json!({"type": "object", "properties": {"name": {"type": "string"}}})
162    }
163
164    #[test]
165    fn cache_key_deterministic() {
166        let hash = schema_hash(&sample_schema());
167        let a = ValidationCache::cache_key("hello", &hash, true);
168        let b = ValidationCache::cache_key("hello", &hash, true);
169        assert_eq!(a, b);
170    }
171
172    #[test]
173    fn cache_key_differs_on_content() {
174        let hash = schema_hash(&sample_schema());
175        let a = ValidationCache::cache_key("hello", &hash, true);
176        let b = ValidationCache::cache_key("world", &hash, true);
177        assert_ne!(a, b);
178    }
179
180    #[test]
181    fn cache_key_differs_on_schema() {
182        let hash_a = schema_hash(&sample_schema());
183        let hash_b = schema_hash(&serde_json::json!({"type": "string"}));
184        let a = ValidationCache::cache_key("hello", &hash_a, true);
185        let b = ValidationCache::cache_key("hello", &hash_b, true);
186        assert_ne!(a, b);
187    }
188
189    #[test]
190    fn cache_key_differs_on_formats() {
191        let hash = schema_hash(&sample_schema());
192        let a = ValidationCache::cache_key("hello", &hash, true);
193        let b = ValidationCache::cache_key("hello", &hash, false);
194        assert_ne!(a, b);
195    }
196
197    #[tokio::test]
198    async fn store_and_lookup() -> anyhow::Result<()> {
199        let tmp = tempfile::tempdir()?;
200        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
201        let hash = schema_hash(&sample_schema());
202
203        let errors = vec![("/name".to_string(), "missing required property".to_string())];
204        cache.store("content", &hash, true, &errors).await;
205
206        let (result, status) = cache.lookup("content", &hash, true).await;
207        assert_eq!(status, ValidationCacheStatus::Hit);
208        let result = result.expect("expected cache hit");
209        assert_eq!(result.len(), 1);
210        assert_eq!(result[0].0, "/name");
211        assert_eq!(result[0].1, "missing required property");
212        Ok(())
213    }
214
215    #[tokio::test]
216    async fn lookup_miss() -> anyhow::Result<()> {
217        let tmp = tempfile::tempdir()?;
218        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
219        let hash = schema_hash(&sample_schema());
220
221        let (result, status) = cache.lookup("content", &hash, true).await;
222        assert_eq!(status, ValidationCacheStatus::Miss);
223        assert!(result.is_none());
224        Ok(())
225    }
226
227    #[tokio::test]
228    async fn skip_read_forces_miss() -> anyhow::Result<()> {
229        let tmp = tempfile::tempdir()?;
230        let cache_write = ValidationCache::new(tmp.path().to_path_buf(), false);
231        let cache_skip = ValidationCache::new(tmp.path().to_path_buf(), true);
232        let hash = schema_hash(&sample_schema());
233
234        // Store a result
235        cache_write.store("content", &hash, true, &[]).await;
236
237        // With skip_read, lookup always returns miss
238        let (result, status) = cache_skip.lookup("content", &hash, true).await;
239        assert_eq!(status, ValidationCacheStatus::Miss);
240        assert!(result.is_none());
241
242        // But store still writes (verify by reading with non-skip cache)
243        cache_skip
244            .store(
245                "other",
246                &hash,
247                true,
248                &[("path".to_string(), "msg".to_string())],
249            )
250            .await;
251        let (result, status) = cache_write.lookup("other", &hash, true).await;
252        assert_eq!(status, ValidationCacheStatus::Hit);
253        assert!(result.is_some());
254        Ok(())
255    }
256
257    #[test]
258    fn ensure_cache_dir_ends_with_validations() {
259        let dir = ensure_cache_dir();
260        assert!(dir.ends_with("lintel/validations"));
261    }
262}