use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum DnsFailurePolicy {
Allow,
#[default]
ApplyRiskPenalty,
Block,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrawlerConfig {
pub enabled: bool,
pub dns_cache_ttl_secs: u64,
pub verification_cache_ttl_secs: u64,
pub max_cache_entries: u64,
pub dns_timeout_ms: u64,
pub max_concurrent_dns_lookups: usize,
pub verify_legitimate_crawlers: bool,
pub block_bad_bots: bool,
#[serde(default)]
pub dns_failure_policy: DnsFailurePolicy,
pub dns_failure_risk_penalty: u32,
pub max_stats_entries: usize,
}
impl Default for CrawlerConfig {
fn default() -> Self {
Self {
enabled: true,
dns_cache_ttl_secs: 300,
verification_cache_ttl_secs: 3600,
max_cache_entries: 50_000,
dns_timeout_ms: 2_000,
max_concurrent_dns_lookups: 100,
verify_legitimate_crawlers: true,
block_bad_bots: true,
dns_failure_policy: DnsFailurePolicy::ApplyRiskPenalty,
dns_failure_risk_penalty: 50,
max_stats_entries: 1000,
}
}
}
impl CrawlerConfig {
pub fn validate(&self) -> Result<(), String> {
if self.dns_timeout_ms == 0 {
return Err("dns_timeout_ms must be greater than 0".to_string());
}
if self.dns_timeout_ms > 30_000 {
return Err("dns_timeout_ms should not exceed 30 seconds".to_string());
}
if self.max_concurrent_dns_lookups == 0 {
return Err("max_concurrent_dns_lookups must be greater than 0".to_string());
}
if self.dns_failure_risk_penalty > 100 {
return Err("dns_failure_risk_penalty should not exceed 100".to_string());
}
Ok(())
}
}