Skip to main content

synapse_pingora/crawler/
cache.rs

1//! Caching for crawler verification results.
2
3use moka::sync::Cache;
4use std::net::IpAddr;
5use std::time::Duration;
6
7use super::config::CrawlerConfig;
8use super::detector::CrawlerVerificationResult;
9
10/// Cache for crawler verification results and DNS lookups.
11#[derive(Debug)]
12pub struct VerificationCache {
13    /// Cache for full verification results
14    verification_cache: Cache<String, CrawlerVerificationResult>,
15    /// Cache for DNS lookups (IP -> hostname)
16    dns_cache: Cache<IpAddr, Option<String>>,
17}
18
19impl VerificationCache {
20    /// Create a new verification cache.
21    pub fn new(config: &CrawlerConfig) -> Self {
22        let verification_cache = Cache::builder()
23            .max_capacity(config.max_cache_entries)
24            .time_to_live(Duration::from_secs(config.verification_cache_ttl_secs))
25            .build();
26
27        let dns_cache = Cache::builder()
28            .max_capacity(config.max_cache_entries)
29            .time_to_live(Duration::from_secs(config.dns_cache_ttl_secs))
30            .build();
31
32        Self {
33            verification_cache,
34            dns_cache,
35        }
36    }
37
38    /// Generate cache key for verification result.
39    pub fn cache_key(user_agent: &str, ip: IpAddr) -> String {
40        // Use a simple hash of UA + IP as the key
41        use std::hash::{Hash, Hasher};
42        let mut hasher = std::collections::hash_map::DefaultHasher::new();
43        user_agent.hash(&mut hasher);
44        ip.hash(&mut hasher);
45        format!("{:x}", hasher.finish())
46    }
47
48    /// Get cached verification result.
49    pub fn get_verification(&self, key: &str) -> Option<CrawlerVerificationResult> {
50        self.verification_cache.get(key)
51    }
52
53    /// Cache a verification result.
54    pub fn put_verification(&self, key: String, result: CrawlerVerificationResult) {
55        self.verification_cache.insert(key, result);
56    }
57
58    /// Get cached DNS result.
59    pub fn get_dns(&self, ip: IpAddr) -> Option<Option<String>> {
60        self.dns_cache.get(&ip)
61    }
62
63    /// Cache a DNS result.
64    pub fn put_dns(&self, ip: IpAddr, hostname: Option<String>) {
65        self.dns_cache.insert(ip, hostname);
66    }
67
68    /// Get cache statistics.
69    pub fn stats(&self) -> CacheStats {
70        CacheStats {
71            verification_entries: self.verification_cache.entry_count() as usize,
72            dns_entries: self.dns_cache.entry_count() as usize,
73        }
74    }
75}
76
77/// Cache statistics.
78#[derive(Debug, Clone)]
79pub struct CacheStats {
80    pub verification_entries: usize,
81    pub dns_entries: usize,
82}