use std::path::{Path, PathBuf};
use dashmap::DashMap;
use xxhash_rust::xxh64;
use crate::error::Result;
use super::result::FetchResult;
use super::traits::SchemaFetcher;
fn cache_filename(url: &str) -> String {
let hash = xxh64::xxh64(url.as_bytes(), 0);
format!("{:016x}.xsd", hash)
}
pub struct FileCachingFetcher<F: SchemaFetcher> {
inner: F,
cache_dir: PathBuf,
_temp_dir: Option<tempfile::TempDir>,
index: DashMap<String, PathBuf>,
}
impl<F: SchemaFetcher> FileCachingFetcher<F> {
pub fn new(inner: F) -> Result<Self> {
let temp_dir = tempfile::TempDir::new()?;
let cache_dir = temp_dir.path().to_path_buf();
Ok(Self {
inner,
cache_dir,
_temp_dir: Some(temp_dir),
index: DashMap::new(),
})
}
pub fn with_dir(inner: F, dir: impl AsRef<Path>) -> Self {
Self {
inner,
cache_dir: dir.as_ref().to_path_buf(),
_temp_dir: None,
index: DashMap::new(),
}
}
pub fn with_temp_dir(inner: F, dir: impl AsRef<Path>) -> Result<Self> {
let temp_dir = tempfile::TempDir::new_in(dir)?;
let cache_dir = temp_dir.path().to_path_buf();
Ok(Self {
inner,
cache_dir,
_temp_dir: Some(temp_dir),
index: DashMap::new(),
})
}
pub fn seed(&self, url: &str, content: Vec<u8>) -> Result<()> {
let filename = cache_filename(url);
let path = self.cache_dir.join(&filename);
std::fs::write(&path, &content)?;
self.index.insert(url.to_string(), path);
Ok(())
}
pub fn len(&self) -> usize {
self.index.len()
}
pub fn is_empty(&self) -> bool {
self.index.is_empty()
}
pub fn inner(&self) -> &F {
&self.inner
}
pub fn cache_dir(&self) -> &Path {
&self.cache_dir
}
fn write_cache(&self, url: &str, content: &[u8]) -> Result<PathBuf> {
let filename = cache_filename(url);
let path = self.cache_dir.join(&filename);
std::fs::write(&path, content)?;
self.index.insert(url.to_string(), path.clone());
Ok(path)
}
}
impl<F: SchemaFetcher> SchemaFetcher for FileCachingFetcher<F> {
fn fetch(&self, url: &str) -> Result<FetchResult> {
if let Some(entry) = self.index.get(url) {
let content = std::fs::read(entry.value())?;
return Ok(FetchResult {
content,
final_url: url.to_string(),
redirected: false,
});
}
let result = self.inner.fetch(url)?;
let path = self.write_cache(url, &result.content)?;
if result.final_url != url {
self.index.insert(result.final_url.clone(), path);
}
Ok(result)
}
}
#[cfg(feature = "tokio")]
pub struct AsyncFileCachingFetcher<F: super::traits::AsyncSchemaFetcher> {
inner: F,
cache_dir: PathBuf,
_temp_dir: Option<tempfile::TempDir>,
index: DashMap<String, PathBuf>,
}
#[cfg(feature = "tokio")]
impl<F: super::traits::AsyncSchemaFetcher> AsyncFileCachingFetcher<F> {
pub fn new(inner: F) -> Result<Self> {
let temp_dir = tempfile::TempDir::new()?;
let cache_dir = temp_dir.path().to_path_buf();
Ok(Self {
inner,
cache_dir,
_temp_dir: Some(temp_dir),
index: DashMap::new(),
})
}
pub fn with_dir(inner: F, dir: impl AsRef<Path>) -> Self {
Self {
inner,
cache_dir: dir.as_ref().to_path_buf(),
_temp_dir: None,
index: DashMap::new(),
}
}
pub fn with_temp_dir(inner: F, dir: impl AsRef<Path>) -> Result<Self> {
let temp_dir = tempfile::TempDir::new_in(dir)?;
let cache_dir = temp_dir.path().to_path_buf();
Ok(Self {
inner,
cache_dir,
_temp_dir: Some(temp_dir),
index: DashMap::new(),
})
}
pub async fn seed(&self, url: &str, content: Vec<u8>) -> Result<()> {
let filename = cache_filename(url);
let path = self.cache_dir.join(&filename);
tokio::fs::write(&path, &content).await?;
self.index.insert(url.to_string(), path);
Ok(())
}
pub fn len(&self) -> usize {
self.index.len()
}
pub fn is_empty(&self) -> bool {
self.index.is_empty()
}
pub fn inner(&self) -> &F {
&self.inner
}
pub fn cache_dir(&self) -> &Path {
&self.cache_dir
}
}
#[cfg(feature = "tokio")]
#[async_trait::async_trait]
impl<F: super::traits::AsyncSchemaFetcher> super::traits::AsyncSchemaFetcher
for AsyncFileCachingFetcher<F>
{
async fn fetch(&self, url: &str) -> Result<FetchResult> {
if let Some(entry) = self.index.get(url) {
let content = tokio::fs::read(entry.value()).await?;
return Ok(FetchResult {
content,
final_url: url.to_string(),
redirected: false,
});
}
let result = self.inner.fetch(url).await?;
let filename = cache_filename(url);
let path = self.cache_dir.join(&filename);
tokio::fs::write(&path, &result.content).await?;
self.index.insert(url.to_string(), path.clone());
if result.final_url != url {
self.index.insert(result.final_url.clone(), path);
}
Ok(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::fetcher::NoopFetcher;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
struct TrackingFetcher {
responses: HashMap<String, Vec<u8>>,
calls: Arc<Mutex<Vec<String>>>,
}
impl TrackingFetcher {
fn new(responses: HashMap<String, Vec<u8>>) -> Self {
Self {
responses,
calls: Arc::new(Mutex::new(Vec::new())),
}
}
fn call_count(&self) -> usize {
self.calls.lock().unwrap().len()
}
}
impl SchemaFetcher for TrackingFetcher {
fn fetch(&self, url: &str) -> Result<FetchResult> {
self.calls.lock().unwrap().push(url.to_string());
match self.responses.get(url) {
Some(content) => Ok(FetchResult {
content: content.clone(),
final_url: url.to_string(),
redirected: false,
}),
None => Err(crate::schema::fetcher::error::FetchError::RequestFailed {
url: url.to_string(),
message: "Not found".to_string(),
}
.into()),
}
}
}
struct RedirectFetcher {
content: Vec<u8>,
final_url: String,
}
impl SchemaFetcher for RedirectFetcher {
fn fetch(&self, _url: &str) -> Result<FetchResult> {
Ok(FetchResult {
content: self.content.clone(),
final_url: self.final_url.clone(),
redirected: true,
})
}
}
#[test]
fn test_file_caching_fetcher_caches_result() {
let mut responses = HashMap::new();
responses.insert(
"http://example.com/a.xsd".to_string(),
b"<schema/>".to_vec(),
);
let inner = TrackingFetcher::new(responses);
let fetcher = FileCachingFetcher::new(inner).unwrap();
let r1 = fetcher.fetch("http://example.com/a.xsd").unwrap();
assert_eq!(r1.content, b"<schema/>");
assert_eq!(fetcher.inner().call_count(), 1);
let r2 = fetcher.fetch("http://example.com/a.xsd").unwrap();
assert_eq!(r2.content, b"<schema/>");
assert_eq!(fetcher.inner().call_count(), 1); }
#[test]
fn test_file_caching_fetcher_seed() {
let fetcher = FileCachingFetcher::new(NoopFetcher).unwrap();
fetcher
.seed("http://example.com/test.xsd", b"<seeded/>".to_vec())
.unwrap();
let result = fetcher.fetch("http://example.com/test.xsd").unwrap();
assert_eq!(result.content, b"<seeded/>");
assert_eq!(fetcher.len(), 1);
}
#[test]
fn test_file_caching_fetcher_len_is_empty() {
let fetcher = FileCachingFetcher::new(NoopFetcher).unwrap();
assert!(fetcher.is_empty());
assert_eq!(fetcher.len(), 0);
fetcher
.seed("http://example.com/a.xsd", b"a".to_vec())
.unwrap();
assert!(!fetcher.is_empty());
assert_eq!(fetcher.len(), 1);
}
#[test]
fn test_file_caching_fetcher_with_dir() {
let dir = tempfile::TempDir::new().unwrap();
let fetcher = FileCachingFetcher::with_dir(NoopFetcher, dir.path());
assert_eq!(fetcher.cache_dir(), dir.path());
}
#[test]
fn test_file_caching_fetcher_with_temp_dir() {
let parent = tempfile::TempDir::new().unwrap();
let fetcher = FileCachingFetcher::with_temp_dir(NoopFetcher, parent.path()).unwrap();
assert!(fetcher.cache_dir().starts_with(parent.path()));
}
#[test]
fn test_file_caching_fetcher_redirect_caches_both_urls() {
let inner = RedirectFetcher {
content: b"<redirected/>".to_vec(),
final_url: "http://example.com/final.xsd".to_string(),
};
let fetcher = FileCachingFetcher::new(inner).unwrap();
let r = fetcher.fetch("http://example.com/original.xsd").unwrap();
assert_eq!(r.content, b"<redirected/>");
assert_eq!(fetcher.len(), 2);
let r2 = fetcher.fetch("http://example.com/final.xsd").unwrap();
assert_eq!(r2.content, b"<redirected/>");
}
#[test]
fn test_file_caching_fetcher_temp_dir_cleanup() {
let cache_dir;
{
let fetcher = FileCachingFetcher::new(NoopFetcher).unwrap();
fetcher
.seed("http://example.com/a.xsd", b"data".to_vec())
.unwrap();
cache_dir = fetcher.cache_dir().to_path_buf();
assert!(cache_dir.exists());
}
assert!(!cache_dir.exists());
}
#[test]
fn test_file_caching_fetcher_persistent_dir_not_cleaned() {
let dir = tempfile::TempDir::new().unwrap();
let dir_path = dir.path().to_path_buf();
{
let fetcher = FileCachingFetcher::with_dir(NoopFetcher, &dir_path);
fetcher
.seed("http://example.com/a.xsd", b"data".to_vec())
.unwrap();
}
assert!(dir_path.exists());
}
#[test]
fn test_cache_filename_deterministic() {
let a = cache_filename("http://example.com/schema.xsd");
let b = cache_filename("http://example.com/schema.xsd");
assert_eq!(a, b);
assert!(a.ends_with(".xsd"));
}
#[test]
fn test_cache_filename_different_urls() {
let a = cache_filename("http://example.com/a.xsd");
let b = cache_filename("http://example.com/b.xsd");
assert_ne!(a, b);
}
}