mod sqlite;
mod types;
#[cfg(feature = "redis-cache")]
mod redis_impl;
pub use sqlite::SqliteCache;
pub use types::{CacheBackend, CacheEntry, CacheFilters, CacheKey};
#[cfg(feature = "redis-cache")]
pub use redis_impl::RedisCache;
use anyhow::Result;
use std::collections::HashSet;
pub struct CacheManager {
backend: Box<dyn CacheBackend>,
}
impl CacheManager {
pub async fn new_sqlite<P: AsRef<std::path::Path>>(db_path: P) -> Result<Self> {
let backend = Box::new(SqliteCache::new(db_path).await?);
Ok(Self { backend })
}
#[cfg(feature = "redis-cache")]
pub async fn new_redis(redis_url: &str) -> Result<Self> {
let backend = Box::new(RedisCache::new(redis_url).await?);
Ok(Self { backend })
}
pub async fn get_cached_urls(&self, key: &CacheKey) -> Result<Option<CacheEntry>> {
self.backend.get(key).await
}
pub async fn store_urls(&self, key: &CacheKey, entry: &CacheEntry) -> Result<()> {
self.backend.set(key, entry).await
}
pub async fn is_valid(&self, key: &CacheKey, ttl_seconds: u64) -> Result<bool> {
if let Some(entry) = self.backend.get(key).await? {
if entry.is_expired(ttl_seconds) {
let _ = self.backend.delete(key).await;
Ok(false)
} else {
Ok(true)
}
} else {
Ok(false)
}
}
pub async fn get_new_urls(
&self,
key: &CacheKey,
new_urls: &HashSet<String>,
) -> Result<HashSet<String>> {
if !self.backend.exists(key).await? {
return Ok(new_urls.clone());
}
if let Some(cached_entry) = self.backend.get(key).await? {
let cached_urls: HashSet<String> = cached_entry.urls.into_iter().collect();
Ok(new_urls.difference(&cached_urls).cloned().collect())
} else {
Ok(new_urls.clone())
}
}
pub async fn cleanup_expired(&self, ttl_seconds: u64) -> Result<()> {
self.backend.cleanup_expired(ttl_seconds).await
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
use tempfile::tempdir;
#[tokio::test]
async fn test_cache_manager_sqlite() -> Result<()> {
let temp_dir = tempdir()?;
let db_path = temp_dir.path().join("test.db");
let cache = CacheManager::new_sqlite(&db_path).await?;
let key = CacheKey {
domain: "example.com".to_string(),
providers: vec!["wayback".to_string()],
filters_hash: "test_hash".to_string(),
};
let entry = CacheEntry {
urls: vec![
"https://example.com/page1".to_string(),
"https://example.com/page2".to_string(),
],
timestamp: chrono::Utc::now(),
};
cache.store_urls(&key, &entry).await?;
let retrieved = cache.get_cached_urls(&key).await?;
assert!(retrieved.is_some());
let retrieved_entry = retrieved.unwrap();
assert_eq!(retrieved_entry.urls.len(), 2);
assert!(retrieved_entry
.urls
.contains(&"https://example.com/page1".to_string()));
assert!(retrieved_entry
.urls
.contains(&"https://example.com/page2".to_string()));
Ok(())
}
#[tokio::test]
async fn test_incremental_scanning() -> Result<()> {
let temp_dir = tempdir()?;
let db_path = temp_dir.path().join("test.db");
let cache = CacheManager::new_sqlite(&db_path).await?;
let key = CacheKey {
domain: "example.com".to_string(),
providers: vec!["wayback".to_string()],
filters_hash: "test_hash".to_string(),
};
let initial_entry = CacheEntry {
urls: vec![
"https://example.com/page1".to_string(),
"https://example.com/page2".to_string(),
],
timestamp: chrono::Utc::now(),
};
cache.store_urls(&key, &initial_entry).await?;
let new_urls: HashSet<String> = vec![
"https://example.com/page2".to_string(), "https://example.com/page3".to_string(), "https://example.com/page4".to_string(), ]
.into_iter()
.collect();
let incremental_urls = cache.get_new_urls(&key, &new_urls).await?;
assert_eq!(incremental_urls.len(), 2);
assert!(incremental_urls.contains("https://example.com/page3"));
assert!(incremental_urls.contains("https://example.com/page4"));
assert!(!incremental_urls.contains("https://example.com/page2"));
Ok(())
}
}