use anyhow::Result;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CacheKey {
pub domain: String,
pub providers: Vec<String>,
pub filters_hash: String,
}
impl CacheKey {
pub fn new(domain: &str, providers: &[String], filters: &CacheFilters) -> Self {
let mut providers = providers.to_vec();
providers.sort();
let filters_hash = filters.compute_hash();
Self {
domain: domain.to_string(),
providers,
filters_hash,
}
}
}
impl std::fmt::Display for CacheKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut hasher = Sha256::new();
hasher.update(&self.domain);
hasher.update(self.providers.join(","));
hasher.update(&self.filters_hash);
write!(f, "{:x}", hasher.finalize())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CacheFilters {
pub subs: bool,
pub extensions: Vec<String>,
pub exclude_extensions: Vec<String>,
pub patterns: Vec<String>,
pub exclude_patterns: Vec<String>,
pub presets: Vec<String>,
pub min_length: Option<usize>,
pub max_length: Option<usize>,
pub strict: bool,
pub normalize_url: bool,
pub merge_endpoint: bool,
}
impl CacheFilters {
pub fn compute_hash(&self) -> String {
let mut hasher = Sha256::new();
hasher.update(if self.subs { "1" } else { "0" });
hasher.update(self.extensions.join(","));
hasher.update(self.exclude_extensions.join(","));
hasher.update(self.patterns.join(","));
hasher.update(self.exclude_patterns.join(","));
hasher.update(self.presets.join(","));
hasher.update(self.min_length.map(|l| l.to_string()).unwrap_or_default());
hasher.update(self.max_length.map(|l| l.to_string()).unwrap_or_default());
hasher.update(if self.strict { "1" } else { "0" });
hasher.update(if self.normalize_url { "1" } else { "0" });
hasher.update(if self.merge_endpoint { "1" } else { "0" });
format!("{:x}", hasher.finalize())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheEntry {
pub urls: Vec<String>,
pub timestamp: DateTime<Utc>,
}
impl CacheEntry {
pub fn new(urls: Vec<String>) -> Self {
Self {
urls,
timestamp: Utc::now(),
}
}
pub fn is_expired(&self, ttl_seconds: u64) -> bool {
let now = Utc::now();
let elapsed = now.signed_duration_since(self.timestamp).num_seconds() as u64;
elapsed >= ttl_seconds
}
}
#[async_trait]
pub trait CacheBackend: Send + Sync {
async fn get(&self, key: &CacheKey) -> Result<Option<CacheEntry>>;
async fn set(&self, key: &CacheKey, entry: &CacheEntry) -> Result<()>;
async fn delete(&self, key: &CacheKey) -> Result<()>;
async fn cleanup_expired(&self, ttl_seconds: u64) -> Result<()>;
async fn exists(&self, key: &CacheKey) -> Result<bool>;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cache_key_creation() {
let filters = CacheFilters {
subs: true,
extensions: vec!["js".to_string(), "php".to_string()],
exclude_extensions: vec!["jpg".to_string()],
patterns: vec!["admin".to_string()],
exclude_patterns: vec!["logout".to_string()],
presets: vec!["no-images".to_string()],
min_length: Some(10),
max_length: Some(100),
strict: true,
normalize_url: true,
merge_endpoint: false,
};
let key = CacheKey::new(
"example.com",
&["wayback".to_string(), "cc".to_string()],
&filters,
);
assert_eq!(key.domain, "example.com");
assert_eq!(key.providers, vec!["cc", "wayback"]); assert!(!key.filters_hash.is_empty());
}
#[test]
fn test_cache_filters_hash_consistency() {
let filters1 = CacheFilters {
subs: true,
extensions: vec!["js".to_string(), "php".to_string()],
exclude_extensions: vec![],
patterns: vec![],
exclude_patterns: vec![],
presets: vec![],
min_length: None,
max_length: None,
strict: true,
normalize_url: false,
merge_endpoint: false,
};
let filters2 = CacheFilters {
subs: true,
extensions: vec!["js".to_string(), "php".to_string()],
exclude_extensions: vec![],
patterns: vec![],
exclude_patterns: vec![],
presets: vec![],
min_length: None,
max_length: None,
strict: true,
normalize_url: false,
merge_endpoint: false,
};
assert_eq!(filters1.compute_hash(), filters2.compute_hash());
}
#[test]
fn test_cache_filters_hash_different() {
let filters1 = CacheFilters {
subs: true,
extensions: vec!["js".to_string()],
exclude_extensions: vec![],
patterns: vec![],
exclude_patterns: vec![],
presets: vec![],
min_length: None,
max_length: None,
strict: true,
normalize_url: false,
merge_endpoint: false,
};
let filters2 = CacheFilters {
subs: false, extensions: vec!["js".to_string()],
exclude_extensions: vec![],
patterns: vec![],
exclude_patterns: vec![],
presets: vec![],
min_length: None,
max_length: None,
strict: true,
normalize_url: false,
merge_endpoint: false,
};
assert_ne!(filters1.compute_hash(), filters2.compute_hash());
}
#[test]
fn test_cache_entry_expiry() {
let mut entry = CacheEntry::new(vec!["https://example.com".to_string()]);
assert!(!entry.is_expired(3600));
entry.timestamp = Utc::now() - chrono::Duration::hours(2);
assert!(entry.is_expired(3600)); }
#[test]
fn test_cache_key_string_representation() {
let filters = CacheFilters {
subs: false,
extensions: vec![],
exclude_extensions: vec![],
patterns: vec![],
exclude_patterns: vec![],
presets: vec![],
min_length: None,
max_length: None,
strict: true,
normalize_url: false,
merge_endpoint: false,
};
let key1 = CacheKey::new("example.com", &["wayback".to_string()], &filters);
let key2 = CacheKey::new("example.com", &["wayback".to_string()], &filters);
let key3 = CacheKey::new("different.com", &["wayback".to_string()], &filters);
assert_eq!(format!("{}", key1), format!("{}", key2));
assert_ne!(format!("{}", key1), format!("{}", key3));
}
}