use serde::{Deserialize, Serialize};
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScraperConfig {
pub concurrent_requests: usize,
pub timeout: Duration,
pub max_retries: u32,
pub user_agent: String,
pub rate_limit: RateLimit,
pub llm_config: LLMConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RateLimit {
pub requests_per_second: f32,
pub burst_size: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LLMConfig {
pub endpoint: String,
pub temperature: f32,
pub max_tokens: u32,
}
impl Default for ScraperConfig {
fn default() -> Self {
Self {
concurrent_requests: crate::DEFAULT_CONCURRENT_REQUESTS,
timeout: crate::DEFAULT_TIMEOUT,
max_retries: crate::DEFAULT_MAX_RETRIES,
user_agent: String::from("Mozilla/5.0 (compatible; RustBot/1.0)"),
rate_limit: RateLimit {
requests_per_second: 2.0,
burst_size: 5,
},
llm_config: LLMConfig {
endpoint: String::from("http://localhost:11434/api/generate"),
temperature: 0.1,
max_tokens: 2048,
},
}
}
}