use crate::domain::idempotency::IdempotencyKey;
use crate::domain::selector::Selector;
use crate::domain::transformation::Transformation;
use crate::reliability::ReliabilityScore;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Region {
pub name: String,
pub selector: Selector,
pub schema: Value,
pub transformations: Vec<Transformation>,
}
impl Region {
pub fn new(name: impl Into<String>, selector: Selector, schema: Value) -> Self {
Self {
name: name.into(),
selector,
schema,
transformations: vec![],
}
}
#[must_use]
pub fn with_transformation(mut self, transformation: Transformation) -> Self {
self.transformations.push(transformation);
self
}
pub fn validate(&self) -> crate::Result<()> {
if self.name.is_empty() {
return Err(crate::error::PluginError::TemplateValidationError(
"region name cannot be empty".to_string(),
));
}
if !self.schema.is_object() {
return Err(crate::error::PluginError::TemplateValidationError(format!(
"region schema must be a JSON object, got {}",
self.schema.get("type").unwrap_or(&Value::Null)
)));
}
self.selector.validate()?;
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionTemplate {
pub id: uuid::Uuid,
pub name: String,
pub description: Option<String>,
pub regions: Vec<Region>,
pub metadata: TemplateMetadata,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TemplateMetadata {
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub last_used_at: Option<DateTime<Utc>>,
pub usage_count: u64,
pub version: u32,
pub tags: Vec<String>,
}
impl Default for TemplateMetadata {
fn default() -> Self {
let now = Utc::now();
Self {
created_at: now,
updated_at: now,
last_used_at: None,
usage_count: 0,
version: 1,
tags: vec![],
}
}
}
impl ExtractionTemplate {
pub fn new(name: impl Into<String>) -> Self {
Self {
id: uuid::Uuid::new_v4(),
name: name.into(),
description: None,
regions: vec![],
metadata: TemplateMetadata::default(),
}
}
#[must_use]
pub fn with_region(mut self, region: Region) -> Self {
self.regions.push(region);
self
}
#[must_use]
pub fn with_description(mut self, desc: impl Into<String>) -> Self {
self.description = Some(desc.into());
self
}
#[must_use]
pub fn with_tags(mut self, tags: Vec<String>) -> Self {
self.metadata.tags = tags;
self
}
pub fn validate(&self) -> crate::Result<()> {
if self.name.is_empty() {
return Err(crate::error::PluginError::TemplateValidationError(
"template name cannot be empty".to_string(),
));
}
for region in &self.regions {
region.validate()?;
}
Ok(())
}
pub fn mark_used(&mut self) {
self.metadata.usage_count += 1;
self.metadata.last_used_at = Some(Utc::now());
self.metadata.updated_at = Utc::now();
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionRequest {
pub template: ExtractionTemplate,
pub url: String,
pub html: String,
pub idempotency_key: IdempotencyKey,
pub timeout_ms: u64,
pub context: Option<Value>,
}
impl ExtractionRequest {
pub fn new(
template: ExtractionTemplate,
url: impl Into<String>,
html: impl Into<String>,
) -> Self {
Self {
template,
url: url.into(),
html: html.into(),
idempotency_key: IdempotencyKey::new(),
timeout_ms: 30_000,
context: None,
}
}
#[must_use]
pub const fn with_idempotency_key(mut self, key: IdempotencyKey) -> Self {
self.idempotency_key = key;
self
}
#[must_use]
pub const fn with_timeout(mut self, ms: u64) -> Self {
self.timeout_ms = ms;
self
}
#[must_use]
pub fn with_context(mut self, context: Value) -> Self {
self.context = Some(context);
self
}
pub fn validate(&self) -> crate::Result<()> {
self.template.validate()?;
if self.url.is_empty() {
return Err(crate::error::PluginError::ExtractionError(
"URL cannot be empty".to_string(),
));
}
if self.html.is_empty() {
return Err(crate::error::PluginError::ExtractionError(
"HTML cannot be empty".to_string(),
));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionResult {
pub data: HashMap<String, Value>,
pub metadata: ExtractionMetadata,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionMetadata {
pub idempotency_key: IdempotencyKey,
pub completed_at: DateTime<Utc>,
pub elapsed_ms: u64,
pub selector_success_rate: f32,
pub region_status: HashMap<String, RegionStatus>,
pub errors: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reliability: Option<ReliabilityScore>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegionStatus {
pub success: bool,
pub matched_count: usize,
pub error: Option<String>,
}
impl ExtractionResult {
#[must_use]
pub fn new(idempotency_key: IdempotencyKey) -> Self {
Self {
data: HashMap::new(),
metadata: ExtractionMetadata {
idempotency_key,
completed_at: Utc::now(),
elapsed_ms: 0,
selector_success_rate: 0.0,
region_status: HashMap::new(),
errors: vec![],
reliability: None,
},
}
}
#[must_use]
pub fn with_region_data(mut self, region_name: impl Into<String>, data: Value) -> Self {
self.data.insert(region_name.into(), data);
self
}
#[must_use]
pub fn with_error(mut self, error: impl Into<String>) -> Self {
self.metadata.errors.push(error.into());
self
}
#[must_use]
pub const fn set_elapsed_ms(mut self, ms: u64) -> Self {
self.metadata.elapsed_ms = ms;
self
}
#[expect(
clippy::cast_precision_loss,
reason = "region counts are small enough to be safe as f32"
)]
pub fn calculate_success_rate(&mut self) {
if self.metadata.region_status.is_empty() {
self.metadata.selector_success_rate = 100.0;
return;
}
let successful = self
.metadata
.region_status
.values()
.filter(|status| status.success)
.count();
self.metadata.selector_success_rate =
(successful as f32 / self.metadata.region_status.len() as f32) * 100.0;
}
#[must_use]
pub fn is_fully_successful(&self) -> bool {
self.metadata.selector_success_rate >= 100.0 && self.metadata.errors.is_empty()
}
}