Skip to main content

lintel_validation_cache/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::path::PathBuf;
4
5use serde::{Deserialize, Serialize};
6use serde_json::Value;
7use sha2::{Digest, Sha256};
8
9/// Whether a validation result was served from the disk cache or freshly computed.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum ValidationCacheStatus {
12    /// Validation result was found in the disk cache.
13    Hit,
14    /// Validation result was computed (cache miss or skip-read mode).
15    Miss,
16}
17
18/// A single validation error with its location and schema context.
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20pub struct ValidationError {
21    /// JSON Pointer to the failing instance (e.g. `/jobs/build`).
22    pub instance_path: String,
23    /// Human-readable error message.
24    pub message: String,
25    /// JSON Schema path that triggered the error (e.g. `/properties/jobs/oneOf`).
26    #[serde(default)]
27    pub schema_path: String,
28}
29
30/// The cache lookup/store key: file content, schema hash, and format-validation flag.
31pub struct CacheKey<'a> {
32    /// The raw file content being validated.
33    pub file_content: &'a str,
34    /// Pre-computed SHA-256 hash of the schema (see [`schema_hash`]).
35    pub schema_hash: &'a str,
36    /// Whether format validation was enabled.
37    pub validate_formats: bool,
38}
39
40#[derive(Serialize, Deserialize)]
41struct CachedResult {
42    errors: Vec<ValidationError>,
43}
44
45/// A disk-backed cache for JSON Schema validation results.
46///
47/// Results are keyed by `SHA-256(crate_version + file_content + schema_json + validate_formats_byte)`.
48/// Cache files are stored as `<cache_dir>/<sha256-hex>.json`.
49#[derive(Clone)]
50pub struct ValidationCache {
51    cache_dir: PathBuf,
52    skip_read: bool,
53}
54
55impl ValidationCache {
56    pub fn new(cache_dir: PathBuf, skip_read: bool) -> Self {
57        Self {
58            cache_dir,
59            skip_read,
60        }
61    }
62
63    /// Look up a cached validation result.
64    ///
65    /// Returns `(Some(errors), Hit)` on cache hit.
66    /// Returns `(None, Miss)` on cache miss or when `skip_read` is set.
67    ///
68    /// `key.schema_hash` should be obtained from [`schema_hash`] — pass the same
69    /// value for all files in a schema group to avoid redundant serialization.
70    pub async fn lookup(
71        &self,
72        key: &CacheKey<'_>,
73    ) -> (Option<Vec<ValidationError>>, ValidationCacheStatus) {
74        if self.skip_read {
75            return (None, ValidationCacheStatus::Miss);
76        }
77
78        let hash = Self::cache_key(key);
79        let cache_path = self.cache_dir.join(format!("{hash}.json"));
80
81        let Ok(data) = tokio::fs::read_to_string(&cache_path).await else {
82            return (None, ValidationCacheStatus::Miss);
83        };
84
85        let Ok(cached) = serde_json::from_str::<CachedResult>(&data) else {
86            return (None, ValidationCacheStatus::Miss);
87        };
88
89        (Some(cached.errors), ValidationCacheStatus::Hit)
90    }
91
92    /// Store a validation result to the disk cache.
93    ///
94    /// Always writes regardless of `skip_read`, so running with
95    /// `--force-validation` repopulates the cache for future runs.
96    ///
97    /// `key.schema_hash` should be obtained from [`schema_hash`] — pass the same
98    /// value for all files in a schema group to avoid redundant serialization.
99    pub async fn store(&self, key: &CacheKey<'_>, errors: &[ValidationError]) {
100        let hash = Self::cache_key(key);
101        let cache_path = self.cache_dir.join(format!("{hash}.json"));
102
103        let cached = CachedResult {
104            errors: errors.to_vec(),
105        };
106
107        let Ok(json) = serde_json::to_string(&cached) else {
108            return;
109        };
110
111        if tokio::fs::create_dir_all(&self.cache_dir).await.is_ok() {
112            let _ = tokio::fs::write(&cache_path, json).await;
113        }
114    }
115
116    /// Compute the SHA-256 cache key from a [`CacheKey`].
117    ///
118    /// The crate version is included in the hash so that upgrading lintel
119    /// automatically invalidates stale cache entries.
120    pub fn cache_key(key: &CacheKey<'_>) -> String {
121        let mut hasher = Sha256::new();
122        hasher.update(env!("CARGO_PKG_VERSION").as_bytes());
123        hasher.update(key.file_content.as_bytes());
124        hasher.update(key.schema_hash.as_bytes());
125        hasher.update([u8::from(key.validate_formats)]);
126        format!("{:x}", hasher.finalize())
127    }
128}
129
130/// Compute a SHA-256 hash of a schema `Value`.
131///
132/// Call this once per schema group and pass the result to
133/// [`ValidationCache::lookup`] and [`ValidationCache::store`].
134pub fn schema_hash(schema: &Value) -> String {
135    let mut hasher = Sha256::new();
136    hasher.update(schema.to_string().as_bytes());
137    format!("{:x}", hasher.finalize())
138}
139
140/// Return a usable cache directory for validation results, creating it if necessary.
141///
142/// Tries `<system_cache>/lintel/validations` first, falling back to
143/// `<temp_dir>/lintel/validations` when the preferred path is unwritable.
144pub fn ensure_cache_dir() -> PathBuf {
145    let candidates = [
146        dirs::cache_dir().map(|d| d.join("lintel").join("validations")),
147        Some(std::env::temp_dir().join("lintel").join("validations")),
148    ];
149    for candidate in candidates.into_iter().flatten() {
150        if std::fs::create_dir_all(&candidate).is_ok() {
151            return candidate;
152        }
153    }
154    std::env::temp_dir().join("lintel").join("validations")
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    fn sample_schema() -> Value {
162        serde_json::json!({"type": "object", "properties": {"name": {"type": "string"}}})
163    }
164
165    #[test]
166    fn cache_key_deterministic() {
167        let hash = schema_hash(&sample_schema());
168        let key = CacheKey {
169            file_content: "hello",
170            schema_hash: &hash,
171            validate_formats: true,
172        };
173        let a = ValidationCache::cache_key(&key);
174        let b = ValidationCache::cache_key(&key);
175        assert_eq!(a, b);
176    }
177
178    #[test]
179    fn cache_key_differs_on_content() {
180        let hash = schema_hash(&sample_schema());
181        let a = ValidationCache::cache_key(&CacheKey {
182            file_content: "hello",
183            schema_hash: &hash,
184            validate_formats: true,
185        });
186        let b = ValidationCache::cache_key(&CacheKey {
187            file_content: "world",
188            schema_hash: &hash,
189            validate_formats: true,
190        });
191        assert_ne!(a, b);
192    }
193
194    #[test]
195    fn cache_key_differs_on_schema() {
196        let hash_a = schema_hash(&sample_schema());
197        let hash_b = schema_hash(&serde_json::json!({"type": "string"}));
198        let a = ValidationCache::cache_key(&CacheKey {
199            file_content: "hello",
200            schema_hash: &hash_a,
201            validate_formats: true,
202        });
203        let b = ValidationCache::cache_key(&CacheKey {
204            file_content: "hello",
205            schema_hash: &hash_b,
206            validate_formats: true,
207        });
208        assert_ne!(a, b);
209    }
210
211    #[test]
212    fn cache_key_differs_on_formats() {
213        let hash = schema_hash(&sample_schema());
214        let a = ValidationCache::cache_key(&CacheKey {
215            file_content: "hello",
216            schema_hash: &hash,
217            validate_formats: true,
218        });
219        let b = ValidationCache::cache_key(&CacheKey {
220            file_content: "hello",
221            schema_hash: &hash,
222            validate_formats: false,
223        });
224        assert_ne!(a, b);
225    }
226
227    #[tokio::test]
228    async fn store_and_lookup() -> anyhow::Result<()> {
229        let tmp = tempfile::tempdir()?;
230        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
231        let hash = schema_hash(&sample_schema());
232
233        let errors = vec![ValidationError {
234            instance_path: "/name".to_string(),
235            message: "missing required property".to_string(),
236            schema_path: "/required".to_string(),
237        }];
238        let key = CacheKey {
239            file_content: "content",
240            schema_hash: &hash,
241            validate_formats: true,
242        };
243        cache.store(&key, &errors).await;
244
245        let (result, status) = cache.lookup(&key).await;
246        assert_eq!(status, ValidationCacheStatus::Hit);
247        let result = result.expect("expected cache hit");
248        assert_eq!(result.len(), 1);
249        assert_eq!(result[0].instance_path, "/name");
250        assert_eq!(result[0].message, "missing required property");
251        assert_eq!(result[0].schema_path, "/required");
252        Ok(())
253    }
254
255    #[tokio::test]
256    async fn lookup_miss() -> anyhow::Result<()> {
257        let tmp = tempfile::tempdir()?;
258        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
259        let hash = schema_hash(&sample_schema());
260
261        let key = CacheKey {
262            file_content: "content",
263            schema_hash: &hash,
264            validate_formats: true,
265        };
266        let (result, status) = cache.lookup(&key).await;
267        assert_eq!(status, ValidationCacheStatus::Miss);
268        assert!(result.is_none());
269        Ok(())
270    }
271
272    #[tokio::test]
273    async fn skip_read_forces_miss() -> anyhow::Result<()> {
274        let tmp = tempfile::tempdir()?;
275        let cache_write = ValidationCache::new(tmp.path().to_path_buf(), false);
276        let cache_skip = ValidationCache::new(tmp.path().to_path_buf(), true);
277        let hash = schema_hash(&sample_schema());
278
279        // Store a result
280        let key = CacheKey {
281            file_content: "content",
282            schema_hash: &hash,
283            validate_formats: true,
284        };
285        cache_write.store(&key, &[]).await;
286
287        // With skip_read, lookup always returns miss
288        let (result, status) = cache_skip.lookup(&key).await;
289        assert_eq!(status, ValidationCacheStatus::Miss);
290        assert!(result.is_none());
291
292        // But store still writes (verify by reading with non-skip cache)
293        let key_other = CacheKey {
294            file_content: "other",
295            schema_hash: &hash,
296            validate_formats: true,
297        };
298        cache_skip
299            .store(
300                &key_other,
301                &[ValidationError {
302                    instance_path: "path".to_string(),
303                    message: "msg".to_string(),
304                    schema_path: String::new(),
305                }],
306            )
307            .await;
308        let (result, status) = cache_write.lookup(&key_other).await;
309        assert_eq!(status, ValidationCacheStatus::Hit);
310        assert!(result.is_some());
311        Ok(())
312    }
313
314    #[test]
315    fn ensure_cache_dir_ends_with_validations() {
316        let dir = ensure_cache_dir();
317        assert!(dir.ends_with("lintel/validations"));
318    }
319}