Skip to main content

lintel_validation_cache/
lib.rs

1use std::fs;
2use std::path::PathBuf;
3
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use sha2::{Digest, Sha256};
7
8/// Whether a validation result was served from the disk cache or freshly computed.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum ValidationCacheStatus {
11    /// Validation result was found in the disk cache.
12    Hit,
13    /// Validation result was computed (cache miss or skip-read mode).
14    Miss,
15}
16
17#[derive(Serialize, Deserialize)]
18struct CachedError {
19    instance_path: String,
20    message: String,
21}
22
23#[derive(Serialize, Deserialize)]
24struct CachedResult {
25    errors: Vec<CachedError>,
26}
27
28/// A disk-backed cache for JSON Schema validation results.
29///
30/// Results are keyed by `SHA-256(file_content + schema_json + validate_formats_byte)`.
31/// Cache files are stored as `<cache_dir>/<sha256-hex>.json`.
32#[derive(Clone)]
33pub struct ValidationCache {
34    cache_dir: PathBuf,
35    skip_read: bool,
36}
37
38impl ValidationCache {
39    pub fn new(cache_dir: PathBuf, skip_read: bool) -> Self {
40        Self {
41            cache_dir,
42            skip_read,
43        }
44    }
45
46    /// Look up a cached validation result.
47    ///
48    /// Returns `(Some(errors), Hit)` on cache hit, where each error is
49    /// `(instance_path, message)`. Returns `(None, Miss)` on cache miss or
50    /// when `skip_read` is set.
51    ///
52    /// `schema_hash` should be obtained from [`schema_hash`] — pass the same
53    /// value for all files in a schema group to avoid redundant serialization.
54    pub fn lookup(
55        &self,
56        file_content: &str,
57        schema_hash: &str,
58        validate_formats: bool,
59    ) -> (Option<Vec<(String, String)>>, ValidationCacheStatus) {
60        if self.skip_read {
61            return (None, ValidationCacheStatus::Miss);
62        }
63
64        let key = Self::cache_key(file_content, schema_hash, validate_formats);
65        let cache_path = self.cache_dir.join(format!("{key}.json"));
66
67        let Ok(data) = fs::read_to_string(&cache_path) else {
68            return (None, ValidationCacheStatus::Miss);
69        };
70
71        let Ok(cached) = serde_json::from_str::<CachedResult>(&data) else {
72            return (None, ValidationCacheStatus::Miss);
73        };
74
75        let errors: Vec<(String, String)> = cached
76            .errors
77            .into_iter()
78            .map(|e| (e.instance_path, e.message))
79            .collect();
80
81        (Some(errors), ValidationCacheStatus::Hit)
82    }
83
84    /// Store a validation result to the disk cache.
85    ///
86    /// Always writes regardless of `skip_read`, so running with
87    /// `--force-validation` repopulates the cache for future runs.
88    ///
89    /// `schema_hash` should be obtained from [`schema_hash`] — pass the same
90    /// value for all files in a schema group to avoid redundant serialization.
91    pub fn store(
92        &self,
93        file_content: &str,
94        schema_hash: &str,
95        validate_formats: bool,
96        errors: &[(String, String)],
97    ) {
98        let key = Self::cache_key(file_content, schema_hash, validate_formats);
99        let cache_path = self.cache_dir.join(format!("{key}.json"));
100
101        let cached = CachedResult {
102            errors: errors
103                .iter()
104                .map(|(ip, msg)| CachedError {
105                    instance_path: ip.clone(),
106                    message: msg.clone(),
107                })
108                .collect(),
109        };
110
111        let Ok(json) = serde_json::to_string(&cached) else {
112            return;
113        };
114
115        if fs::create_dir_all(&self.cache_dir).is_ok() {
116            let _ = fs::write(&cache_path, json);
117        }
118    }
119
120    /// Compute the SHA-256 cache key from file content, a pre-computed schema hash, and format flag.
121    fn cache_key(file_content: &str, schema_hash: &str, validate_formats: bool) -> String {
122        let mut hasher = Sha256::new();
123        hasher.update(file_content.as_bytes());
124        hasher.update(schema_hash.as_bytes());
125        hasher.update([u8::from(validate_formats)]);
126        format!("{:x}", hasher.finalize())
127    }
128}
129
130/// Compute a SHA-256 hash of a schema `Value`.
131///
132/// Call this once per schema group and pass the result to
133/// [`ValidationCache::lookup`] and [`ValidationCache::store`].
134pub fn schema_hash(schema: &Value) -> String {
135    let mut hasher = Sha256::new();
136    hasher.update(schema.to_string().as_bytes());
137    format!("{:x}", hasher.finalize())
138}
139
140/// Return the default cache directory for validation results:
141/// `<system_cache>/lintel/validations`.
142pub fn default_cache_dir() -> PathBuf {
143    dirs::cache_dir()
144        .unwrap_or_else(|| PathBuf::from(".cache"))
145        .join("lintel")
146        .join("validations")
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    fn sample_schema() -> Value {
154        serde_json::json!({"type": "object", "properties": {"name": {"type": "string"}}})
155    }
156
157    #[test]
158    fn cache_key_deterministic() {
159        let hash = schema_hash(&sample_schema());
160        let a = ValidationCache::cache_key("hello", &hash, true);
161        let b = ValidationCache::cache_key("hello", &hash, true);
162        assert_eq!(a, b);
163    }
164
165    #[test]
166    fn cache_key_differs_on_content() {
167        let hash = schema_hash(&sample_schema());
168        let a = ValidationCache::cache_key("hello", &hash, true);
169        let b = ValidationCache::cache_key("world", &hash, true);
170        assert_ne!(a, b);
171    }
172
173    #[test]
174    fn cache_key_differs_on_schema() {
175        let hash_a = schema_hash(&sample_schema());
176        let hash_b = schema_hash(&serde_json::json!({"type": "string"}));
177        let a = ValidationCache::cache_key("hello", &hash_a, true);
178        let b = ValidationCache::cache_key("hello", &hash_b, true);
179        assert_ne!(a, b);
180    }
181
182    #[test]
183    fn cache_key_differs_on_formats() {
184        let hash = schema_hash(&sample_schema());
185        let a = ValidationCache::cache_key("hello", &hash, true);
186        let b = ValidationCache::cache_key("hello", &hash, false);
187        assert_ne!(a, b);
188    }
189
190    #[test]
191    fn store_and_lookup() -> anyhow::Result<()> {
192        let tmp = tempfile::tempdir()?;
193        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
194        let hash = schema_hash(&sample_schema());
195
196        let errors = vec![("/name".to_string(), "missing required property".to_string())];
197        cache.store("content", &hash, true, &errors);
198
199        let (result, status) = cache.lookup("content", &hash, true);
200        assert_eq!(status, ValidationCacheStatus::Hit);
201        let result = result.expect("expected cache hit");
202        assert_eq!(result.len(), 1);
203        assert_eq!(result[0].0, "/name");
204        assert_eq!(result[0].1, "missing required property");
205        Ok(())
206    }
207
208    #[test]
209    fn lookup_miss() -> anyhow::Result<()> {
210        let tmp = tempfile::tempdir()?;
211        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
212        let hash = schema_hash(&sample_schema());
213
214        let (result, status) = cache.lookup("content", &hash, true);
215        assert_eq!(status, ValidationCacheStatus::Miss);
216        assert!(result.is_none());
217        Ok(())
218    }
219
220    #[test]
221    fn skip_read_forces_miss() -> anyhow::Result<()> {
222        let tmp = tempfile::tempdir()?;
223        let cache_write = ValidationCache::new(tmp.path().to_path_buf(), false);
224        let cache_skip = ValidationCache::new(tmp.path().to_path_buf(), true);
225        let hash = schema_hash(&sample_schema());
226
227        // Store a result
228        cache_write.store("content", &hash, true, &[]);
229
230        // With skip_read, lookup always returns miss
231        let (result, status) = cache_skip.lookup("content", &hash, true);
232        assert_eq!(status, ValidationCacheStatus::Miss);
233        assert!(result.is_none());
234
235        // But store still writes (verify by reading with non-skip cache)
236        cache_skip.store(
237            "other",
238            &hash,
239            true,
240            &[("path".to_string(), "msg".to_string())],
241        );
242        let (result, status) = cache_write.lookup("other", &hash, true);
243        assert_eq!(status, ValidationCacheStatus::Hit);
244        assert!(result.is_some());
245        Ok(())
246    }
247
248    #[test]
249    fn default_cache_dir_ends_with_validations() {
250        let dir = default_cache_dir();
251        assert!(dir.ends_with("lintel/validations"));
252    }
253}