Skip to main content

lintel_schema_cache/
lib.rs

1use std::collections::hash_map::DefaultHasher;
2use std::error::Error;
3use std::fs;
4use std::hash::{Hash, Hasher};
5use std::path::PathBuf;
6
7use serde_json::Value;
8
9/// Whether a schema was served from disk cache or fetched from the network.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum CacheStatus {
12    /// Schema was found in the disk cache.
13    Hit,
14    /// Schema was fetched from the network (and possibly written to cache).
15    Miss,
16    /// Caching is disabled (`cache_dir` is `None`).
17    Disabled,
18}
19
20/// Trait for fetching content over HTTP.
21pub trait HttpClient: Clone + Send + Sync + 'static {
22    fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>>;
23}
24
25/// Default HTTP client using ureq.
26#[derive(Clone)]
27pub struct UreqClient;
28
29impl HttpClient for UreqClient {
30    fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
31        let mut response = ureq::get(uri).call()?;
32        Ok(response.body_mut().read_to_string()?)
33    }
34}
35
36/// A disk-backed cache for JSON Schema files.
37///
38/// Schemas are fetched via HTTP and stored as `<cache_dir>/<hash>.json`
39/// where `<hash>` is a hex-encoded hash of the URI. When a schema is
40/// requested, the cache is checked first; on a miss the schema is fetched
41/// and written to disk for future use.
42#[derive(Clone)]
43pub struct SchemaCache<C: HttpClient = UreqClient> {
44    cache_dir: Option<PathBuf>,
45    client: C,
46}
47
48impl<C: HttpClient> SchemaCache<C> {
49    pub fn new(cache_dir: Option<PathBuf>, client: C) -> Self {
50        Self { cache_dir, client }
51    }
52
53    /// Fetch a schema by URI, using the disk cache when available.
54    ///
55    /// Returns the parsed schema and a [`CacheStatus`] indicating whether the
56    /// result came from the disk cache, the network, or caching was disabled.
57    pub fn fetch(&self, uri: &str) -> Result<(Value, CacheStatus), Box<dyn Error + Send + Sync>> {
58        // Check cache first
59        if let Some(ref cache_dir) = self.cache_dir {
60            let hash = Self::hash_uri(uri);
61            let cache_path = cache_dir.join(format!("{hash}.json"));
62            if cache_path.exists() {
63                let content = fs::read_to_string(&cache_path)?;
64                return Ok((serde_json::from_str(&content)?, CacheStatus::Hit));
65            }
66        }
67
68        // Fetch from network
69        let body = self.client.get(uri)?;
70        let value: Value = serde_json::from_str(&body)?;
71
72        let status = if let Some(ref cache_dir) = self.cache_dir {
73            // Write to cache
74            fs::create_dir_all(cache_dir)?;
75            let hash = Self::hash_uri(uri);
76            let cache_path = cache_dir.join(format!("{hash}.json"));
77            fs::write(&cache_path, &body)?;
78            CacheStatus::Miss
79        } else {
80            CacheStatus::Disabled
81        };
82
83        Ok((value, status))
84    }
85
86    fn hash_uri(uri: &str) -> String {
87        let mut hasher = DefaultHasher::new();
88        uri.hash(&mut hasher);
89        format!("{:016x}", hasher.finish())
90    }
91}
92
93/// Return the default cache directory for schemas: `<system_cache>/lintel/schemas`.
94pub fn default_cache_dir() -> PathBuf {
95    dirs::cache_dir()
96        .unwrap_or_else(|| PathBuf::from(".cache"))
97        .join("lintel")
98        .join("schemas")
99}
100
101// -- jsonschema trait impls --------------------------------------------------
102
103impl<C: HttpClient> jsonschema::Retrieve for SchemaCache<C> {
104    fn retrieve(
105        &self,
106        uri: &jsonschema::Uri<String>,
107    ) -> Result<Value, Box<dyn Error + Send + Sync>> {
108        let (value, _status) = self.fetch(uri.as_str())?;
109        Ok(value)
110    }
111}
112
113#[async_trait::async_trait]
114impl<C: HttpClient> jsonschema::AsyncRetrieve for SchemaCache<C> {
115    async fn retrieve(
116        &self,
117        uri: &jsonschema::Uri<String>,
118    ) -> Result<Value, Box<dyn Error + Send + Sync>> {
119        let cache = self.clone();
120        let uri_str = uri.as_str().to_string();
121        let (value, _status) = tokio::task::spawn_blocking(move || cache.fetch(&uri_str)).await??;
122        Ok(value)
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129    use std::collections::HashMap;
130
131    #[derive(Clone)]
132    struct MockClient(HashMap<String, String>);
133
134    impl HttpClient for MockClient {
135        fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
136            self.0
137                .get(uri)
138                .cloned()
139                .ok_or_else(|| format!("mock: no response for {uri}").into())
140        }
141    }
142
143    fn mock(entries: &[(&str, &str)]) -> MockClient {
144        MockClient(
145            entries
146                .iter()
147                .map(|(k, v)| (k.to_string(), v.to_string()))
148                .collect(),
149        )
150    }
151
152    #[test]
153    fn hash_uri_deterministic() {
154        let a = SchemaCache::<MockClient>::hash_uri("https://example.com/schema.json");
155        let b = SchemaCache::<MockClient>::hash_uri("https://example.com/schema.json");
156        assert_eq!(a, b);
157    }
158
159    #[test]
160    fn hash_uri_different_inputs() {
161        let a = SchemaCache::<MockClient>::hash_uri("https://example.com/a.json");
162        let b = SchemaCache::<MockClient>::hash_uri("https://example.com/b.json");
163        assert_ne!(a, b);
164    }
165
166    #[test]
167    fn fetch_no_cache_dir() {
168        let client = mock(&[("https://example.com/s.json", r#"{"type":"object"}"#)]);
169        let cache = SchemaCache::new(None, client);
170        let (val, status) = cache.fetch("https://example.com/s.json").unwrap();
171        assert_eq!(val, serde_json::json!({"type": "object"}));
172        assert_eq!(status, CacheStatus::Disabled);
173    }
174
175    #[test]
176    fn fetch_cold_cache() {
177        let tmp = tempfile::tempdir().unwrap();
178        let client = mock(&[("https://example.com/s.json", r#"{"type":"string"}"#)]);
179        let cache = SchemaCache::new(Some(tmp.path().to_path_buf()), client);
180        let (val, status) = cache.fetch("https://example.com/s.json").unwrap();
181        assert_eq!(val, serde_json::json!({"type": "string"}));
182        assert_eq!(status, CacheStatus::Miss);
183
184        // Verify file was written to disk
185        let hash = SchemaCache::<MockClient>::hash_uri("https://example.com/s.json");
186        let cache_path = tmp.path().join(format!("{hash}.json"));
187        assert!(cache_path.exists());
188    }
189
190    #[test]
191    fn fetch_warm_cache() {
192        let tmp = tempfile::tempdir().unwrap();
193        let hash = SchemaCache::<MockClient>::hash_uri("https://example.com/s.json");
194        let cache_path = tmp.path().join(format!("{hash}.json"));
195        fs::write(&cache_path, r#"{"type":"number"}"#).unwrap();
196
197        // Client has no entries — if it were called, it would error
198        let client = mock(&[]);
199        let cache = SchemaCache::new(Some(tmp.path().to_path_buf()), client);
200        let (val, status) = cache.fetch("https://example.com/s.json").unwrap();
201        assert_eq!(val, serde_json::json!({"type": "number"}));
202        assert_eq!(status, CacheStatus::Hit);
203    }
204
205    #[test]
206    fn fetch_client_error() {
207        let client = mock(&[]);
208        let cache = SchemaCache::new(None, client);
209        assert!(cache.fetch("https://example.com/missing.json").is_err());
210    }
211
212    #[test]
213    fn fetch_invalid_json() {
214        let client = mock(&[("https://example.com/bad.json", "not json")]);
215        let cache = SchemaCache::new(None, client);
216        assert!(cache.fetch("https://example.com/bad.json").is_err());
217    }
218
219    #[test]
220    fn retrieve_trait_delegates() {
221        let client = mock(&[("https://example.com/s.json", r#"{"type":"object"}"#)]);
222        let cache = SchemaCache::new(None, client);
223        let uri: jsonschema::Uri<String> = "https://example.com/s.json".parse().unwrap();
224        let val = jsonschema::Retrieve::retrieve(&cache, &uri).unwrap();
225        assert_eq!(val, serde_json::json!({"type": "object"}));
226    }
227
228    #[tokio::test]
229    async fn async_retrieve_trait_delegates() {
230        let client = mock(&[("https://example.com/s.json", r#"{"type":"object"}"#)]);
231        let cache = SchemaCache::new(None, client);
232        let uri: jsonschema::Uri<String> = "https://example.com/s.json".parse().unwrap();
233        let val = jsonschema::AsyncRetrieve::retrieve(&cache, &uri)
234            .await
235            .unwrap();
236        assert_eq!(val, serde_json::json!({"type": "object"}));
237    }
238
239    #[test]
240    fn default_cache_dir_ends_with_schemas() {
241        let dir = default_cache_dir();
242        assert!(dir.ends_with("lintel/schemas"));
243    }
244}