Skip to main content

lintel_validation_cache/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::path::PathBuf;
4
5use serde::{Deserialize, Serialize};
6use serde_json::Value;
7use sha2::{Digest, Sha256};
8
9/// Whether a validation result was served from the disk cache or freshly computed.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum ValidationCacheStatus {
12    /// Validation result was found in the disk cache.
13    Hit,
14    /// Validation result was computed (cache miss or skip-read mode).
15    Miss,
16}
17
18/// A single validation error with its location and schema context.
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20pub struct ValidationError {
21    /// JSON Pointer to the failing instance (e.g. `/jobs/build`).
22    pub instance_path: String,
23    /// Human-readable error message.
24    pub message: String,
25    /// JSON Schema path that triggered the error (e.g. `/properties/jobs/oneOf`).
26    #[serde(default)]
27    pub schema_path: String,
28}
29
30#[derive(Serialize, Deserialize)]
31struct CachedResult {
32    errors: Vec<ValidationError>,
33}
34
35/// A disk-backed cache for JSON Schema validation results.
36///
37/// Results are keyed by `SHA-256(file_content + schema_json + validate_formats_byte)`.
38/// Cache files are stored as `<cache_dir>/<sha256-hex>.json`.
39#[derive(Clone)]
40pub struct ValidationCache {
41    cache_dir: PathBuf,
42    skip_read: bool,
43}
44
45impl ValidationCache {
46    pub fn new(cache_dir: PathBuf, skip_read: bool) -> Self {
47        Self {
48            cache_dir,
49            skip_read,
50        }
51    }
52
53    /// Look up a cached validation result.
54    ///
55    /// Returns `(Some(errors), Hit)` on cache hit.
56    /// Returns `(None, Miss)` on cache miss or when `skip_read` is set.
57    ///
58    /// `schema_hash` should be obtained from [`schema_hash`] — pass the same
59    /// value for all files in a schema group to avoid redundant serialization.
60    pub async fn lookup(
61        &self,
62        file_content: &str,
63        schema_hash: &str,
64        validate_formats: bool,
65    ) -> (Option<Vec<ValidationError>>, ValidationCacheStatus) {
66        if self.skip_read {
67            return (None, ValidationCacheStatus::Miss);
68        }
69
70        let key = Self::cache_key(file_content, schema_hash, validate_formats);
71        let cache_path = self.cache_dir.join(format!("{key}.json"));
72
73        let Ok(data) = tokio::fs::read_to_string(&cache_path).await else {
74            return (None, ValidationCacheStatus::Miss);
75        };
76
77        let Ok(cached) = serde_json::from_str::<CachedResult>(&data) else {
78            return (None, ValidationCacheStatus::Miss);
79        };
80
81        (Some(cached.errors), ValidationCacheStatus::Hit)
82    }
83
84    /// Store a validation result to the disk cache.
85    ///
86    /// Always writes regardless of `skip_read`, so running with
87    /// `--force-validation` repopulates the cache for future runs.
88    ///
89    /// `schema_hash` should be obtained from [`schema_hash`] — pass the same
90    /// value for all files in a schema group to avoid redundant serialization.
91    pub async fn store(
92        &self,
93        file_content: &str,
94        schema_hash: &str,
95        validate_formats: bool,
96        errors: &[ValidationError],
97    ) {
98        let key = Self::cache_key(file_content, schema_hash, validate_formats);
99        let cache_path = self.cache_dir.join(format!("{key}.json"));
100
101        let cached = CachedResult {
102            errors: errors.to_vec(),
103        };
104
105        let Ok(json) = serde_json::to_string(&cached) else {
106            return;
107        };
108
109        if tokio::fs::create_dir_all(&self.cache_dir).await.is_ok() {
110            let _ = tokio::fs::write(&cache_path, json).await;
111        }
112    }
113
114    /// Compute the SHA-256 cache key from file content, a pre-computed schema hash, and format flag.
115    pub fn cache_key(file_content: &str, schema_hash: &str, validate_formats: bool) -> String {
116        let mut hasher = Sha256::new();
117        hasher.update(file_content.as_bytes());
118        hasher.update(schema_hash.as_bytes());
119        hasher.update([u8::from(validate_formats)]);
120        format!("{:x}", hasher.finalize())
121    }
122}
123
124/// Compute a SHA-256 hash of a schema `Value`.
125///
126/// Call this once per schema group and pass the result to
127/// [`ValidationCache::lookup`] and [`ValidationCache::store`].
128pub fn schema_hash(schema: &Value) -> String {
129    let mut hasher = Sha256::new();
130    hasher.update(schema.to_string().as_bytes());
131    format!("{:x}", hasher.finalize())
132}
133
134/// Return a usable cache directory for validation results, creating it if necessary.
135///
136/// Tries `<system_cache>/lintel/validations` first, falling back to
137/// `<temp_dir>/lintel/validations` when the preferred path is unwritable.
138pub fn ensure_cache_dir() -> PathBuf {
139    let candidates = [
140        dirs::cache_dir().map(|d| d.join("lintel").join("validations")),
141        Some(std::env::temp_dir().join("lintel").join("validations")),
142    ];
143    for candidate in candidates.into_iter().flatten() {
144        if std::fs::create_dir_all(&candidate).is_ok() {
145            return candidate;
146        }
147    }
148    std::env::temp_dir().join("lintel").join("validations")
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    fn sample_schema() -> Value {
156        serde_json::json!({"type": "object", "properties": {"name": {"type": "string"}}})
157    }
158
159    #[test]
160    fn cache_key_deterministic() {
161        let hash = schema_hash(&sample_schema());
162        let a = ValidationCache::cache_key("hello", &hash, true);
163        let b = ValidationCache::cache_key("hello", &hash, true);
164        assert_eq!(a, b);
165    }
166
167    #[test]
168    fn cache_key_differs_on_content() {
169        let hash = schema_hash(&sample_schema());
170        let a = ValidationCache::cache_key("hello", &hash, true);
171        let b = ValidationCache::cache_key("world", &hash, true);
172        assert_ne!(a, b);
173    }
174
175    #[test]
176    fn cache_key_differs_on_schema() {
177        let hash_a = schema_hash(&sample_schema());
178        let hash_b = schema_hash(&serde_json::json!({"type": "string"}));
179        let a = ValidationCache::cache_key("hello", &hash_a, true);
180        let b = ValidationCache::cache_key("hello", &hash_b, true);
181        assert_ne!(a, b);
182    }
183
184    #[test]
185    fn cache_key_differs_on_formats() {
186        let hash = schema_hash(&sample_schema());
187        let a = ValidationCache::cache_key("hello", &hash, true);
188        let b = ValidationCache::cache_key("hello", &hash, false);
189        assert_ne!(a, b);
190    }
191
192    #[tokio::test]
193    async fn store_and_lookup() -> anyhow::Result<()> {
194        let tmp = tempfile::tempdir()?;
195        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
196        let hash = schema_hash(&sample_schema());
197
198        let errors = vec![ValidationError {
199            instance_path: "/name".to_string(),
200            message: "missing required property".to_string(),
201            schema_path: "/required".to_string(),
202        }];
203        cache.store("content", &hash, true, &errors).await;
204
205        let (result, status) = cache.lookup("content", &hash, true).await;
206        assert_eq!(status, ValidationCacheStatus::Hit);
207        let result = result.expect("expected cache hit");
208        assert_eq!(result.len(), 1);
209        assert_eq!(result[0].instance_path, "/name");
210        assert_eq!(result[0].message, "missing required property");
211        assert_eq!(result[0].schema_path, "/required");
212        Ok(())
213    }
214
215    #[tokio::test]
216    async fn lookup_miss() -> anyhow::Result<()> {
217        let tmp = tempfile::tempdir()?;
218        let cache = ValidationCache::new(tmp.path().to_path_buf(), false);
219        let hash = schema_hash(&sample_schema());
220
221        let (result, status) = cache.lookup("content", &hash, true).await;
222        assert_eq!(status, ValidationCacheStatus::Miss);
223        assert!(result.is_none());
224        Ok(())
225    }
226
227    #[tokio::test]
228    async fn skip_read_forces_miss() -> anyhow::Result<()> {
229        let tmp = tempfile::tempdir()?;
230        let cache_write = ValidationCache::new(tmp.path().to_path_buf(), false);
231        let cache_skip = ValidationCache::new(tmp.path().to_path_buf(), true);
232        let hash = schema_hash(&sample_schema());
233
234        // Store a result
235        cache_write.store("content", &hash, true, &[]).await;
236
237        // With skip_read, lookup always returns miss
238        let (result, status) = cache_skip.lookup("content", &hash, true).await;
239        assert_eq!(status, ValidationCacheStatus::Miss);
240        assert!(result.is_none());
241
242        // But store still writes (verify by reading with non-skip cache)
243        cache_skip
244            .store(
245                "other",
246                &hash,
247                true,
248                &[ValidationError {
249                    instance_path: "path".to_string(),
250                    message: "msg".to_string(),
251                    schema_path: String::new(),
252                }],
253            )
254            .await;
255        let (result, status) = cache_write.lookup("other", &hash, true).await;
256        assert_eq!(status, ValidationCacheStatus::Hit);
257        assert!(result.is_some());
258        Ok(())
259    }
260
261    #[test]
262    fn ensure_cache_dir_ends_with_validations() {
263        let dir = ensure_cache_dir();
264        assert!(dir.ends_with("lintel/validations"));
265    }
266}