use serde::{Deserialize, Serialize};
use std::sync::{Arc, Mutex};
use std::collections::HashMap;
use tracing::{debug, warn};
use crate::error::Result;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrawlerConfig {
#[serde(default = "default_max_queue_size")]
pub max_queue_size: usize,
#[serde(default = "default_max_memory_mb")]
pub max_memory_mb: usize,
#[serde(default = "default_max_concurrent_requests")]
pub max_concurrent_requests: usize,
#[serde(default = "default_circuit_breaker_threshold")]
pub circuit_breaker_threshold: usize,
#[serde(default = "default_backpressure_threshold")]
pub backpressure_threshold: u8,
#[serde(default = "default_true")]
pub enable_memory_monitoring: bool,
#[serde(default = "default_true")]
pub enable_circuit_breaker: bool,
}
impl Default for CrawlerConfig {
fn default() -> Self {
Self {
max_queue_size: default_max_queue_size(),
max_memory_mb: default_max_memory_mb(),
max_concurrent_requests: default_max_concurrent_requests(),
circuit_breaker_threshold: default_circuit_breaker_threshold(),
backpressure_threshold: default_backpressure_threshold(),
enable_memory_monitoring: true,
enable_circuit_breaker: true,
}
}
}
fn default_max_queue_size() -> usize {
1000
}
fn default_max_memory_mb() -> usize {
500
}
fn default_max_concurrent_requests() -> usize {
10
}
fn default_circuit_breaker_threshold() -> usize {
3
}
fn default_backpressure_threshold() -> u8 {
80
}
fn default_true() -> bool {
true
}
#[derive(Debug, Clone)]
pub struct CircuitBreaker {
failures: Arc<Mutex<HashMap<String, usize>>>,
threshold: usize,
}
impl CircuitBreaker {
pub fn new(threshold: usize) -> Self {
Self {
failures: Arc::new(Mutex::new(HashMap::new())),
threshold,
}
}
pub fn record_failure(&self, domain: &str) {
let mut failures = self.failures.lock().unwrap();
let count = failures.entry(domain.to_string()).or_insert(0);
*count += 1;
if *count >= self.threshold {
warn!("Circuit breaker triggered for domain: {} ({} failures)", domain, count);
}
}
pub fn record_success(&self, domain: &str) {
let mut failures = self.failures.lock().unwrap();
failures.remove(domain);
debug!("Circuit breaker reset for domain: {}", domain);
}
pub fn should_skip(&self, domain: &str) -> bool {
let failures = self.failures.lock().unwrap();
failures.get(domain).unwrap_or(&0) >= &self.threshold
}
pub fn get_failure_count(&self, domain: &str) -> usize {
let failures = self.failures.lock().unwrap();
*failures.get(domain).unwrap_or(&0)
}
pub fn get_total_failures(&self) -> usize {
let failures = self.failures.lock().unwrap();
failures.values().filter(|&&count| count >= self.threshold).count()
}
}
pub struct MemoryMonitor {
max_memory_mb: usize,
enabled: bool,
}
impl MemoryMonitor {
pub fn new(max_memory_mb: usize, enabled: bool) -> Self {
Self {
max_memory_mb,
enabled,
}
}
pub fn check_memory_limit(&self) -> Result<()> {
if !self.enabled {
return Ok(());
}
debug!("Memory check: limit is {}MB (lightweight mode)", self.max_memory_mb);
Ok(())
}
pub fn get_current_memory_mb(&self) -> u64 {
0
}
pub fn get_memory_percentage(&self) -> f64 {
0.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_crawler_config_defaults() {
let config = CrawlerConfig::default();
assert_eq!(config.max_queue_size, 1000);
assert_eq!(config.max_memory_mb, 500);
assert_eq!(config.max_concurrent_requests, 10);
assert_eq!(config.circuit_breaker_threshold, 3);
assert_eq!(config.backpressure_threshold, 80);
assert!(config.enable_memory_monitoring);
assert!(config.enable_circuit_breaker);
}
#[test]
fn test_circuit_breaker() {
let breaker = CircuitBreaker::new(3);
assert!(!breaker.should_skip("example.com"));
breaker.record_failure("example.com");
assert_eq!(breaker.get_failure_count("example.com"), 1);
assert!(!breaker.should_skip("example.com"));
breaker.record_failure("example.com");
assert_eq!(breaker.get_failure_count("example.com"), 2);
assert!(!breaker.should_skip("example.com"));
breaker.record_failure("example.com");
assert_eq!(breaker.get_failure_count("example.com"), 3);
assert!(breaker.should_skip("example.com"));
breaker.record_success("example.com");
assert!(!breaker.should_skip("example.com"));
assert_eq!(breaker.get_failure_count("example.com"), 0);
}
#[test]
fn test_circuit_breaker_multiple_domains() {
let breaker = CircuitBreaker::new(2);
breaker.record_failure("domain1.com");
breaker.record_failure("domain1.com");
breaker.record_failure("domain2.com");
breaker.record_failure("domain2.com");
assert!(breaker.should_skip("domain1.com"));
assert!(breaker.should_skip("domain2.com"));
assert_eq!(breaker.get_total_failures(), 2);
breaker.record_success("domain1.com");
assert!(!breaker.should_skip("domain1.com"));
assert!(breaker.should_skip("domain2.com"));
assert_eq!(breaker.get_total_failures(), 1);
}
#[test]
fn test_memory_monitor_disabled() {
let monitor = MemoryMonitor::new(100, false);
assert!(monitor.check_memory_limit().is_ok());
assert_eq!(monitor.get_current_memory_mb(), 0);
assert_eq!(monitor.get_memory_percentage(), 0.0);
}
#[test]
fn test_memory_monitor_enabled() {
let monitor = MemoryMonitor::new(100, true);
assert!(monitor.check_memory_limit().is_ok());
}
}