use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScrapedData {
pub url: String,
pub title: Option<String>,
pub content: String,
pub metadata: HashMap<String, serde_json::Value>,
pub extracted_data: HashMap<String, Vec<String>>,
pub timestamp: DateTime<Utc>,
pub status_code: u16,
pub headers: HashMap<String, String>,
pub scrape_time_ms: u64,
}
impl ScrapedData {
pub fn new(url: String) -> Self {
Self {
url,
title: None,
content: String::new(),
metadata: HashMap::new(),
extracted_data: HashMap::new(),
timestamp: Utc::now(),
status_code: 0,
headers: HashMap::new(),
scrape_time_ms: 0,
}
}
pub fn add_extracted_data(&mut self, key: &str, values: Vec<String>) {
self.extracted_data.insert(key.to_string(), values);
}
pub fn add_metadata(&mut self, key: &str, value: serde_json::Value) {
self.metadata.insert(key.to_string(), value);
}
pub fn get_extracted_values(&self, key: &str) -> Option<&Vec<String>> {
self.extracted_data.get(key)
}
pub fn get_first_value(&self, key: &str) -> Option<&String> {
self.extracted_data.get(key).and_then(|values| values.first())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetryPolicy {
pub max_attempts: u32,
pub base_delay: Duration,
pub max_delay: Duration,
pub exponential_backoff: bool,
pub backoff_multiplier: f64,
}
impl Default for RetryPolicy {
fn default() -> Self {
Self {
max_attempts: 3,
base_delay: Duration::from_millis(1000),
max_delay: Duration::from_millis(10000),
exponential_backoff: true,
backoff_multiplier: 2.0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionRule {
pub name: String,
pub selector: String,
pub extraction_type: ExtractionType,
pub multiple: bool,
pub attribute: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ExtractionType {
Text,
Html,
Attribute,
OuterHtml,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Default)]
pub enum HttpMethod {
#[default]
Get,
Post,
Put,
Delete,
Head,
Options,
Patch,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RequestStats {
pub total_requests: u64,
pub successful_requests: u64,
pub failed_requests: u64,
pub total_bytes: u64,
pub avg_response_time_ms: f64,
pub total_time_ms: u64,
}
impl Default for RequestStats {
fn default() -> Self {
Self::new()
}
}
impl RequestStats {
pub fn new() -> Self {
Self {
total_requests: 0,
successful_requests: 0,
failed_requests: 0,
total_bytes: 0,
avg_response_time_ms: 0.0,
total_time_ms: 0,
}
}
pub fn success_rate(&self) -> f64 {
if self.total_requests == 0 {
0.0
} else {
self.successful_requests as f64 / self.total_requests as f64
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RateLimit {
pub requests_per_period: u32,
pub period: Duration,
pub delay_between_requests: Duration,
}
impl Default for RateLimit {
fn default() -> Self {
Self {
requests_per_period: 10,
period: Duration::from_secs(60),
delay_between_requests: Duration::from_millis(1000),
}
}
}