Skip to main content

lintel_schema_cache/
lib.rs

1use std::collections::hash_map::DefaultHasher;
2use std::error::Error;
3use std::fs;
4use std::hash::{Hash, Hasher};
5use std::path::PathBuf;
6
7use serde_json::Value;
8
9/// Whether a schema was served from disk cache or fetched from the network.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum CacheStatus {
12    /// Schema was found in the disk cache.
13    Hit,
14    /// Schema was fetched from the network (and possibly written to cache).
15    Miss,
16    /// Caching is disabled (`cache_dir` is `None`).
17    Disabled,
18}
19
20/// Trait for fetching content over HTTP.
21pub trait HttpClient: Clone + Send + Sync + 'static {
22    /// # Errors
23    ///
24    /// Returns an error if the HTTP request fails or the response cannot be read.
25    fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>>;
26}
27
28/// Default HTTP client using ureq.
29#[derive(Clone)]
30pub struct UreqClient;
31
32impl HttpClient for UreqClient {
33    fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
34        let mut response = ureq::get(uri).call()?;
35        Ok(response.body_mut().read_to_string()?)
36    }
37}
38
39/// A disk-backed cache for JSON Schema files.
40///
41/// Schemas are fetched via HTTP and stored as `<cache_dir>/<hash>.json`
42/// where `<hash>` is a hex-encoded hash of the URI. When a schema is
43/// requested, the cache is checked first; on a miss the schema is fetched
44/// and written to disk for future use.
45#[derive(Clone)]
46pub struct SchemaCache<C: HttpClient = UreqClient> {
47    cache_dir: Option<PathBuf>,
48    client: C,
49}
50
51impl<C: HttpClient> SchemaCache<C> {
52    pub fn new(cache_dir: Option<PathBuf>, client: C) -> Self {
53        Self { cache_dir, client }
54    }
55
56    /// Fetch a schema by URI, using the disk cache when available.
57    ///
58    /// Returns the parsed schema and a [`CacheStatus`] indicating whether the
59    /// result came from the disk cache, the network, or caching was disabled.
60    ///
61    /// # Errors
62    ///
63    /// Returns an error if the schema cannot be fetched from the network,
64    /// read from disk cache, or parsed as JSON.
65    pub fn fetch(&self, uri: &str) -> Result<(Value, CacheStatus), Box<dyn Error + Send + Sync>> {
66        // Check cache first
67        if let Some(ref cache_dir) = self.cache_dir {
68            let hash = Self::hash_uri(uri);
69            let cache_path = cache_dir.join(format!("{hash}.json"));
70            if cache_path.exists() {
71                let content = fs::read_to_string(&cache_path)?;
72                return Ok((serde_json::from_str(&content)?, CacheStatus::Hit));
73            }
74        }
75
76        // Fetch from network
77        let body = self.client.get(uri)?;
78        let value: Value = serde_json::from_str(&body)?;
79
80        let status = if let Some(ref cache_dir) = self.cache_dir {
81            // Write to cache
82            fs::create_dir_all(cache_dir)?;
83            let hash = Self::hash_uri(uri);
84            let cache_path = cache_dir.join(format!("{hash}.json"));
85            fs::write(&cache_path, &body)?;
86            CacheStatus::Miss
87        } else {
88            CacheStatus::Disabled
89        };
90
91        Ok((value, status))
92    }
93
94    fn hash_uri(uri: &str) -> String {
95        let mut hasher = DefaultHasher::new();
96        uri.hash(&mut hasher);
97        format!("{:016x}", hasher.finish())
98    }
99}
100
101/// Return the default cache directory for schemas: `<system_cache>/lintel/schemas`.
102pub fn default_cache_dir() -> PathBuf {
103    dirs::cache_dir()
104        .unwrap_or_else(|| PathBuf::from(".cache"))
105        .join("lintel")
106        .join("schemas")
107}
108
109// -- jsonschema trait impls --------------------------------------------------
110
111impl<C: HttpClient> jsonschema::Retrieve for SchemaCache<C> {
112    fn retrieve(
113        &self,
114        uri: &jsonschema::Uri<String>,
115    ) -> Result<Value, Box<dyn Error + Send + Sync>> {
116        let (value, _status) = self.fetch(uri.as_str())?;
117        Ok(value)
118    }
119}
120
121#[async_trait::async_trait]
122impl<C: HttpClient> jsonschema::AsyncRetrieve for SchemaCache<C> {
123    async fn retrieve(
124        &self,
125        uri: &jsonschema::Uri<String>,
126    ) -> Result<Value, Box<dyn Error + Send + Sync>> {
127        let cache = self.clone();
128        let uri_str = uri.as_str().to_string();
129        let (value, _status) = tokio::task::spawn_blocking(move || cache.fetch(&uri_str)).await??;
130        Ok(value)
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137    use std::collections::HashMap;
138
139    #[derive(Clone)]
140    struct MockClient(HashMap<String, String>);
141
142    impl HttpClient for MockClient {
143        fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
144            self.0
145                .get(uri)
146                .cloned()
147                .ok_or_else(|| format!("mock: no response for {uri}").into())
148        }
149    }
150
151    fn mock(entries: &[(&str, &str)]) -> MockClient {
152        MockClient(
153            entries
154                .iter()
155                .map(|(k, v)| (k.to_string(), v.to_string()))
156                .collect(),
157        )
158    }
159
160    #[test]
161    fn hash_uri_deterministic() {
162        let a = SchemaCache::<MockClient>::hash_uri("https://example.com/schema.json");
163        let b = SchemaCache::<MockClient>::hash_uri("https://example.com/schema.json");
164        assert_eq!(a, b);
165    }
166
167    #[test]
168    fn hash_uri_different_inputs() {
169        let a = SchemaCache::<MockClient>::hash_uri("https://example.com/a.json");
170        let b = SchemaCache::<MockClient>::hash_uri("https://example.com/b.json");
171        assert_ne!(a, b);
172    }
173
174    /// Convert a `Box<dyn Error + Send + Sync>` to `anyhow::Error`.
175    #[allow(clippy::needless_pass_by_value)]
176    fn boxerr(e: Box<dyn Error + Send + Sync>) -> anyhow::Error {
177        anyhow::anyhow!("{e}")
178    }
179
180    #[test]
181    fn fetch_no_cache_dir() -> anyhow::Result<()> {
182        let client = mock(&[("https://example.com/s.json", r#"{"type":"object"}"#)]);
183        let cache = SchemaCache::new(None, client);
184        let (val, status) = cache.fetch("https://example.com/s.json").map_err(boxerr)?;
185        assert_eq!(val, serde_json::json!({"type": "object"}));
186        assert_eq!(status, CacheStatus::Disabled);
187        Ok(())
188    }
189
190    #[test]
191    fn fetch_cold_cache() -> anyhow::Result<()> {
192        let tmp = tempfile::tempdir()?;
193        let client = mock(&[("https://example.com/s.json", r#"{"type":"string"}"#)]);
194        let cache = SchemaCache::new(Some(tmp.path().to_path_buf()), client);
195        let (val, status) = cache.fetch("https://example.com/s.json").map_err(boxerr)?;
196        assert_eq!(val, serde_json::json!({"type": "string"}));
197        assert_eq!(status, CacheStatus::Miss);
198
199        // Verify file was written to disk
200        let hash = SchemaCache::<MockClient>::hash_uri("https://example.com/s.json");
201        let cache_path = tmp.path().join(format!("{hash}.json"));
202        assert!(cache_path.exists());
203        Ok(())
204    }
205
206    #[test]
207    fn fetch_warm_cache() -> anyhow::Result<()> {
208        let tmp = tempfile::tempdir()?;
209        let hash = SchemaCache::<MockClient>::hash_uri("https://example.com/s.json");
210        let cache_path = tmp.path().join(format!("{hash}.json"));
211        fs::write(&cache_path, r#"{"type":"number"}"#)?;
212
213        // Client has no entries — if it were called, it would error
214        let client = mock(&[]);
215        let cache = SchemaCache::new(Some(tmp.path().to_path_buf()), client);
216        let (val, status) = cache.fetch("https://example.com/s.json").map_err(boxerr)?;
217        assert_eq!(val, serde_json::json!({"type": "number"}));
218        assert_eq!(status, CacheStatus::Hit);
219        Ok(())
220    }
221
222    #[test]
223    fn fetch_client_error() {
224        let client = mock(&[]);
225        let cache = SchemaCache::new(None, client);
226        assert!(cache.fetch("https://example.com/missing.json").is_err());
227    }
228
229    #[test]
230    fn fetch_invalid_json() {
231        let client = mock(&[("https://example.com/bad.json", "not json")]);
232        let cache = SchemaCache::new(None, client);
233        assert!(cache.fetch("https://example.com/bad.json").is_err());
234    }
235
236    #[test]
237    fn retrieve_trait_delegates() -> anyhow::Result<()> {
238        let client = mock(&[("https://example.com/s.json", r#"{"type":"object"}"#)]);
239        let cache = SchemaCache::new(None, client);
240        let uri: jsonschema::Uri<String> = "https://example.com/s.json".parse()?;
241        let val = jsonschema::Retrieve::retrieve(&cache, &uri).map_err(boxerr)?;
242        assert_eq!(val, serde_json::json!({"type": "object"}));
243        Ok(())
244    }
245
246    #[tokio::test]
247    async fn async_retrieve_trait_delegates() -> anyhow::Result<()> {
248        let client = mock(&[("https://example.com/s.json", r#"{"type":"object"}"#)]);
249        let cache = SchemaCache::new(None, client);
250        let uri: jsonschema::Uri<String> = "https://example.com/s.json".parse()?;
251        let val = jsonschema::AsyncRetrieve::retrieve(&cache, &uri)
252            .await
253            .map_err(boxerr)?;
254        assert_eq!(val, serde_json::json!({"type": "object"}));
255        Ok(())
256    }
257
258    #[test]
259    fn default_cache_dir_ends_with_schemas() {
260        let dir = default_cache_dir();
261        assert!(dir.ends_with("lintel/schemas"));
262    }
263}