use anyhow::{bail, Context, Result};
use chrono::DateTime;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheEntry {
pub url: String,
pub cached_at: String,
pub size: u64,
}
#[derive(Debug, Clone)]
pub struct CacheStats {
pub hit_count: usize,
pub miss_count: usize,
pub total_size: u64,
pub entry_count: usize,
}
pub struct FileCache {
root: PathBuf,
ttl: Duration,
hit_count: usize,
miss_count: usize,
}
impl FileCache {
pub fn new(root: PathBuf, ttl: Duration) -> Result<Self> {
fs::create_dir_all(&root).context("Failed to create cache directory")?;
Ok(FileCache {
root,
ttl,
hit_count: 0,
miss_count: 0,
})
}
pub fn from_env() -> Result<Self> {
let root = std::env::var("RSS_CACHE_DIR").map(PathBuf::from).unwrap_or_else(|_| {
let home = std::env::var("HOME")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from("/tmp"));
home.join(".rss_cache")
});
let ttl_secs: u64 = std::env::var("RSS_CACHE_TTL")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(604800); let ttl = Duration::from_secs(ttl_secs);
Self::new(root, ttl)
}
pub fn url_hash(url: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(url.as_bytes());
hex::encode(hasher.finalize())
}
pub fn ttl(&self) -> Duration {
self.ttl
}
pub fn cache_path(&self, url: &str) -> PathBuf {
let hash = Self::url_hash(url);
let prefix = &hash[..2];
self.root.join(prefix).join(&hash)
}
pub fn meta_path(&self, url: &str) -> PathBuf {
let mut p = self.cache_path(url);
p.set_extension("meta");
p
}
pub fn get(&mut self, url: &str) -> Option<PathBuf> {
let cache_path = self.cache_path(url);
let meta_path = self.meta_path(url);
if !cache_path.exists() || !meta_path.exists() {
self.miss_count += 1;
return None;
}
let meta = fs::read_to_string(&meta_path).ok()?;
let entry: CacheEntry = serde_json::from_str(&meta).ok()?;
let cached_at = DateTime::parse_from_rfc3339(&entry.cached_at).ok()?;
let now = chrono::Utc::now();
if now.signed_duration_since(cached_at).to_std().ok()? > self.ttl {
self.miss_count += 1;
return None;
}
self.hit_count += 1;
Some(cache_path)
}
pub fn download_and_cache(
&mut self,
url: &str,
client: &reqwest::blocking::Client,
) -> Result<PathBuf> {
let cache_path = self.cache_path(url);
if let Some(path) = self.get(url) {
return Ok(path);
}
let response = client
.get(url)
.send()
.context("Failed to send request")?;
if !response.status().is_success() {
bail!(
"Failed to download {}: status {}",
url,
response.status()
);
}
let bytes = response.bytes().context("Failed to read response body")?;
let size = bytes.len() as u64;
if let Some(parent) = cache_path.parent() {
fs::create_dir_all(parent).context("Failed to create cache subdirectory")?;
}
fs::write(&cache_path, &bytes).context("Failed to write cached file")?;
let entry = CacheEntry {
url: url.to_string(),
cached_at: chrono::Utc::now().to_rfc3339(),
size,
};
let meta_path = self.meta_path(url);
let meta_json = serde_json::to_string(&entry).context("Failed to serialize cache entry")?;
fs::write(&meta_path, meta_json).context("Failed to write cache metadata")?;
Ok(cache_path)
}
pub fn symlink(&mut self, url: &str, link_path: &Path) -> Result<()> {
let cache_path = self
.get(url)
.ok_or_else(|| anyhow::anyhow!("URL not cached: {}", url))?;
if let Some(parent) = link_path.parent() {
fs::create_dir_all(parent).context("Failed to create parent directory for symlink")?;
}
if link_path.exists() {
fs::remove_file(link_path).context("Failed to remove existing file")?;
}
#[cfg(unix)]
std::os::unix::fs::symlink(&cache_path, link_path)
.context("Failed to create symlink")?;
#[cfg(windows)]
std::os::windows::fs::symlink_file(&cache_path, link_path)
.context("Failed to create symlink")?;
Ok(())
}
pub fn invalidate(&self, url: &str) -> bool {
let cache_path = self.cache_path(url);
let meta_path = self.meta_path(url);
let mut removed = false;
if cache_path.exists() {
if fs::remove_file(&cache_path).is_ok() {
removed = true;
}
}
if meta_path.exists() {
if fs::remove_file(&meta_path).is_ok() {
removed = true;
}
}
removed
}
pub fn cleanup(&self) -> usize {
let mut removed = 0;
if !self.root.exists() {
return 0;
}
let prefix_entries = match fs::read_dir(&self.root) {
Ok(r) => r,
Err(_) => return 0,
};
for prefix_entry in prefix_entries.flatten() {
let prefix_dir = prefix_entry.path();
if !prefix_dir.is_dir() {
continue;
}
let entries = match fs::read_dir(&prefix_dir) {
Ok(r) => r,
Err(_) => continue,
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
if path.extension().map_or(false, |ext| ext == "meta") {
continue;
}
let meta_path = {
let mut m = path.clone();
m.set_extension("meta");
m
};
if !meta_path.exists() {
continue;
}
let meta = match fs::read_to_string(&meta_path) {
Ok(m) => m,
Err(_) => continue,
};
let entry: CacheEntry = match serde_json::from_str(&meta) {
Ok(e) => e,
Err(_) => continue,
};
let cached_at = match DateTime::parse_from_rfc3339(&entry.cached_at) {
Ok(t) => t,
Err(_) => continue,
};
let now = chrono::Utc::now();
if let Ok(elapsed) = now.signed_duration_since(cached_at).to_std() {
if elapsed > self.ttl {
let _ = fs::remove_file(&path);
let _ = fs::remove_file(&meta_path);
removed += 1;
}
}
}
}
removed
}
pub fn stats(&self) -> CacheStats {
let mut total_size: u64 = 0;
let mut entry_count = 0;
if !self.root.exists() {
return CacheStats {
hit_count: self.hit_count,
miss_count: self.miss_count,
total_size: 0,
entry_count: 0,
};
}
let prefix_entries = match fs::read_dir(&self.root) {
Ok(r) => r,
Err(_) => {
return CacheStats {
hit_count: self.hit_count,
miss_count: self.miss_count,
total_size,
entry_count,
}
}
};
for prefix_entry in prefix_entries.flatten() {
let prefix_dir = prefix_entry.path();
if !prefix_dir.is_dir() {
continue;
}
let entries = match fs::read_dir(&prefix_dir) {
Ok(r) => r,
Err(_) => continue,
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() || path.extension().map_or(false, |ext| ext == "meta") {
continue;
}
entry_count += 1;
if let Ok(metadata) = fs::metadata(&path) {
total_size += metadata.len();
}
}
}
CacheStats {
hit_count: self.hit_count,
miss_count: self.miss_count,
total_size,
entry_count,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn make_cache(ttl: Duration) -> (FileCache, TempDir) {
let dir = TempDir::new().expect("Failed to create temp dir");
let cache = FileCache::new(dir.path().to_path_buf(), ttl)
.expect("Failed to create cache");
(cache, dir)
}
fn make_test_cache() -> (FileCache, TempDir) {
make_cache(Duration::from_secs(3600)) }
#[test]
fn test_cache_new_with_params() {
let dir = TempDir::new().expect("Failed to create temp dir");
let cache = FileCache::new(dir.path().to_path_buf(), Duration::from_secs(3600))
.expect("Failed to create cache");
assert!(cache.root.exists());
}
#[test]
fn test_cache_get_miss() {
let (mut cache, _dir) = make_test_cache();
let result = cache.get("https://example.com/nonexistent.tif");
assert!(result.is_none());
let stats = cache.stats();
assert_eq!(stats.miss_count, 1);
}
#[test]
fn test_cache_download_and_cache_hit() {
let (mut cache, _dir) = make_test_cache();
let url = "https://example.com/test.tif";
let cache_path = cache.cache_path(url);
if let Some(parent) = cache_path.parent() {
fs::create_dir_all(parent).expect("Failed to create parent");
}
fs::write(&cache_path, b"test data").expect("Failed to write test file");
let entry = CacheEntry {
url: url.to_string(),
cached_at: chrono::Utc::now().to_rfc3339(),
size: 9,
};
let meta_path = cache.meta_path(url);
let meta_json = serde_json::to_string(&entry).expect("Failed to serialize");
fs::write(&meta_path, meta_json).expect("Failed to write meta");
let result = cache.get(url);
assert!(result.is_some());
let cached = result.unwrap();
assert_eq!(cached, cache_path);
let stats = cache.stats();
assert_eq!(stats.hit_count, 1);
}
#[test]
fn test_cache_creates_symlink() {
let (mut cache, dir) = make_test_cache();
let url = "https://example.com/symlink_test.tif";
let cache_path = cache.cache_path(url);
if let Some(parent) = cache_path.parent() {
fs::create_dir_all(parent).expect("Failed to create parent");
}
fs::write(&cache_path, b"symlink test data").expect("Failed to write test file");
let entry = CacheEntry {
url: url.to_string(),
cached_at: chrono::Utc::now().to_rfc3339(),
size: 17,
};
let meta_path = cache.meta_path(url);
let meta_json = serde_json::to_string(&entry).expect("Failed to serialize");
fs::write(&meta_path, meta_json).expect("Failed to write meta");
let link_path = dir.path().join("output").join("test.tif");
cache.symlink(url, &link_path).expect("Failed to create symlink");
assert!(link_path.exists());
#[cfg(unix)]
assert!(link_path.is_symlink());
#[cfg(windows)]
assert!(link_path.is_file()); }
#[test]
fn test_cache_ttl_expiry() {
let (mut cache, _dir) = make_cache(Duration::from_secs(1)); let url = "https://example.com/expiring.tif";
let cache_path = cache.cache_path(url);
if let Some(parent) = cache_path.parent() {
fs::create_dir_all(parent).expect("Failed to create parent");
}
fs::write(&cache_path, b"expiring data").expect("Failed to write test file");
let past = chrono::Utc::now() - chrono::TimeDelta::seconds(10);
let entry = CacheEntry {
url: url.to_string(),
cached_at: past.to_rfc3339(),
size: 13,
};
let meta_path = cache.meta_path(url);
let meta_json = serde_json::to_string(&entry).expect("Failed to serialize");
fs::write(&meta_path, meta_json).expect("Failed to write meta");
let result = cache.get(url);
assert!(result.is_none());
}
#[test]
fn test_cache_invalidate() {
let (mut cache, _dir) = make_test_cache();
let url = "https://example.com/invalidate.tif";
let cache_path = cache.cache_path(url);
if let Some(parent) = cache_path.parent() {
fs::create_dir_all(parent).expect("Failed to create parent");
}
fs::write(&cache_path, b"invalidate data").expect("Failed to write test file");
let entry = CacheEntry {
url: url.to_string(),
cached_at: chrono::Utc::now().to_rfc3339(),
size: 15,
};
let meta_path = cache.meta_path(url);
let meta_json = serde_json::to_string(&entry).expect("Failed to serialize");
fs::write(&meta_path, meta_json).expect("Failed to write meta");
assert!(cache.get(url).is_some());
let removed = cache.invalidate(url);
assert!(removed);
assert!(cache.get(url).is_none());
assert!(!cache_path.exists());
assert!(!meta_path.exists());
}
#[test]
fn test_cache_cleanup_removes_expired() {
let (cache, _dir) = make_cache(Duration::from_secs(1)); let url1 = "https://example.com/expired.tif";
let url2 = "https://example.com/fresh.tif";
let cache_path1 = cache.cache_path(url1);
if let Some(parent) = cache_path1.parent() {
fs::create_dir_all(parent).expect("Failed to create parent");
}
fs::write(&cache_path1, b"expired").expect("Failed to write test file");
let past = chrono::Utc::now() - chrono::TimeDelta::seconds(10);
let entry1 = CacheEntry {
url: url1.to_string(),
cached_at: past.to_rfc3339(),
size: 7,
};
let meta_path1 = cache.meta_path(url1);
let meta_json1 = serde_json::to_string(&entry1).expect("Failed to serialize");
fs::write(&meta_path1, meta_json1).expect("Failed to write meta");
let cache_path2 = cache.cache_path(url2);
if let Some(parent) = cache_path2.parent() {
fs::create_dir_all(parent).expect("Failed to create parent");
}
fs::write(&cache_path2, b"fresh").expect("Failed to write test file");
let entry2 = CacheEntry {
url: url2.to_string(),
cached_at: chrono::Utc::now().to_rfc3339(),
size: 5,
};
let meta_path2 = cache.meta_path(url2);
let meta_json2 = serde_json::to_string(&entry2).expect("Failed to serialize");
fs::write(&meta_path2, meta_json2).expect("Failed to write meta");
let removed = cache.cleanup();
assert_eq!(removed, 1);
assert!(!cache_path1.exists());
assert!(cache_path2.exists());
}
#[test]
fn test_cache_stats_tracking() {
let (mut cache, _dir) = make_test_cache();
let stats = cache.stats();
assert_eq!(stats.hit_count, 0);
assert_eq!(stats.miss_count, 0);
assert_eq!(stats.entry_count, 0);
cache.get("https://example.com/miss.tif");
let stats = cache.stats();
assert_eq!(stats.miss_count, 1);
let url = "https://example.com/stat_test.tif";
let cache_path = cache.cache_path(url);
if let Some(parent) = cache_path.parent() {
fs::create_dir_all(parent).expect("Failed to create parent");
}
fs::write(&cache_path, b"stats test").expect("Failed to write test file");
let entry = CacheEntry {
url: url.to_string(),
cached_at: chrono::Utc::now().to_rfc3339(),
size: 10,
};
let meta_path = cache.meta_path(url);
let meta_json = serde_json::to_string(&entry).expect("Failed to serialize");
fs::write(&meta_path, meta_json).expect("Failed to write meta");
cache.get(url);
let stats = cache.stats();
assert_eq!(stats.hit_count, 1);
assert_eq!(stats.entry_count, 1);
assert_eq!(stats.total_size, 10);
}
#[test]
fn test_cache_hash_different_urls() {
let hash1 = FileCache::url_hash("https://example.com/file1.tif");
let hash2 = FileCache::url_hash("https://example.com/file2.tif");
assert_ne!(hash1, hash2);
let hash3 = FileCache::url_hash("https://example.com/file1.tif");
assert_eq!(hash1, hash3);
}
#[test]
fn test_from_env_defaults() {
std::env::remove_var("RSS_CACHE_DIR");
std::env::remove_var("RSS_CACHE_TTL");
let cache = FileCache::from_env().expect("Failed to create cache from env");
assert_eq!(cache.ttl, Duration::from_secs(604800));
}
#[test]
fn test_from_env_custom() {
let dir = TempDir::new().expect("Failed to create temp dir");
std::env::set_var("RSS_CACHE_DIR", dir.path().to_str().unwrap());
std::env::set_var("RSS_CACHE_TTL", "1800");
let cache = FileCache::from_env().expect("Failed to create cache from env");
assert_eq!(cache.root, dir.path());
assert_eq!(cache.ttl, Duration::from_secs(1800));
std::env::remove_var("RSS_CACHE_DIR");
std::env::remove_var("RSS_CACHE_TTL");
}
}