1use crate::domain::idempotency::IdempotencyKey;
4use crate::domain::selector::Selector;
5use crate::domain::transformation::Transformation;
6use crate::reliability::ReliabilityScore;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use serde_json::Value;
10use std::collections::HashMap;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct Region {
32 pub name: String,
34
35 pub selector: Selector,
37
38 pub schema: Value,
40
41 pub transformations: Vec<Transformation>,
43}
44
45impl Region {
46 pub fn new(name: impl Into<String>, selector: Selector, schema: Value) -> Self {
48 Self {
49 name: name.into(),
50 selector,
51 schema,
52 transformations: vec![],
53 }
54 }
55
56 #[must_use]
58 pub fn with_transformation(mut self, transformation: Transformation) -> Self {
59 self.transformations.push(transformation);
60 self
61 }
62
63 pub fn validate(&self) -> crate::Result<()> {
72 if self.name.is_empty() {
73 return Err(crate::error::PluginError::TemplateValidationError(
74 "region name cannot be empty".to_string(),
75 ));
76 }
77 if !self.schema.is_object() {
78 return Err(crate::error::PluginError::TemplateValidationError(format!(
79 "region schema must be a JSON object, got {}",
80 self.schema.get("type").unwrap_or(&Value::Null)
81 )));
82 }
83 self.selector.validate()?;
85 Ok(())
86 }
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct ExtractionTemplate {
110 pub id: uuid::Uuid,
112
113 pub name: String,
115
116 pub description: Option<String>,
118
119 pub regions: Vec<Region>,
121
122 pub metadata: TemplateMetadata,
124}
125
126#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct TemplateMetadata {
129 pub created_at: DateTime<Utc>,
131
132 pub updated_at: DateTime<Utc>,
134
135 pub last_used_at: Option<DateTime<Utc>>,
137
138 pub usage_count: u64,
140
141 pub version: u32,
143
144 pub tags: Vec<String>,
146}
147
148impl Default for TemplateMetadata {
149 fn default() -> Self {
150 let now = Utc::now();
151 Self {
152 created_at: now,
153 updated_at: now,
154 last_used_at: None,
155 usage_count: 0,
156 version: 1,
157 tags: vec![],
158 }
159 }
160}
161
162impl ExtractionTemplate {
163 pub fn new(name: impl Into<String>) -> Self {
165 Self {
166 id: uuid::Uuid::new_v4(),
167 name: name.into(),
168 description: None,
169 regions: vec![],
170 metadata: TemplateMetadata::default(),
171 }
172 }
173
174 #[must_use]
176 pub fn with_region(mut self, region: Region) -> Self {
177 self.regions.push(region);
178 self
179 }
180
181 #[must_use]
183 pub fn with_description(mut self, desc: impl Into<String>) -> Self {
184 self.description = Some(desc.into());
185 self
186 }
187
188 #[must_use]
190 pub fn with_tags(mut self, tags: Vec<String>) -> Self {
191 self.metadata.tags = tags;
192 self
193 }
194
195 pub fn validate(&self) -> crate::Result<()> {
204 if self.name.is_empty() {
205 return Err(crate::error::PluginError::TemplateValidationError(
206 "template name cannot be empty".to_string(),
207 ));
208 }
209 for region in &self.regions {
210 region.validate()?;
211 }
212 Ok(())
213 }
214
215 pub fn mark_used(&mut self) {
217 self.metadata.usage_count += 1;
218 self.metadata.last_used_at = Some(Utc::now());
219 self.metadata.updated_at = Utc::now();
220 }
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ExtractionRequest {
226 pub template: ExtractionTemplate,
228
229 pub url: String,
231
232 pub html: String,
234
235 pub idempotency_key: IdempotencyKey,
237
238 pub timeout_ms: u64,
240
241 pub context: Option<Value>,
243}
244
245impl ExtractionRequest {
246 pub fn new(
248 template: ExtractionTemplate,
249 url: impl Into<String>,
250 html: impl Into<String>,
251 ) -> Self {
252 Self {
253 template,
254 url: url.into(),
255 html: html.into(),
256 idempotency_key: IdempotencyKey::new(),
257 timeout_ms: 30_000,
258 context: None,
259 }
260 }
261
262 #[must_use]
264 pub const fn with_idempotency_key(mut self, key: IdempotencyKey) -> Self {
265 self.idempotency_key = key;
266 self
267 }
268
269 #[must_use]
271 pub const fn with_timeout(mut self, ms: u64) -> Self {
272 self.timeout_ms = ms;
273 self
274 }
275
276 #[must_use]
278 pub fn with_context(mut self, context: Value) -> Self {
279 self.context = Some(context);
280 self
281 }
282
283 pub fn validate(&self) -> crate::Result<()> {
292 self.template.validate()?;
293 if self.url.is_empty() {
294 return Err(crate::error::PluginError::ExtractionError(
295 "URL cannot be empty".to_string(),
296 ));
297 }
298 if self.html.is_empty() {
299 return Err(crate::error::PluginError::ExtractionError(
300 "HTML cannot be empty".to_string(),
301 ));
302 }
303 Ok(())
304 }
305}
306
307#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct ExtractionResult {
310 pub data: HashMap<String, Value>,
312
313 pub metadata: ExtractionMetadata,
315}
316
317#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct ExtractionMetadata {
320 pub idempotency_key: IdempotencyKey,
322
323 pub completed_at: DateTime<Utc>,
325
326 pub elapsed_ms: u64,
328
329 pub selector_success_rate: f32,
331
332 pub region_status: HashMap<String, RegionStatus>,
334
335 pub errors: Vec<String>,
337
338 #[serde(default, skip_serializing_if = "Option::is_none")]
345 pub reliability: Option<ReliabilityScore>,
346}
347
348#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct RegionStatus {
351 pub success: bool,
353
354 pub matched_count: usize,
356
357 pub error: Option<String>,
359}
360
361impl ExtractionResult {
362 #[must_use]
364 pub fn new(idempotency_key: IdempotencyKey) -> Self {
365 Self {
366 data: HashMap::new(),
367 metadata: ExtractionMetadata {
368 idempotency_key,
369 completed_at: Utc::now(),
370 elapsed_ms: 0,
371 selector_success_rate: 0.0,
372 region_status: HashMap::new(),
373 errors: vec![],
374 reliability: None,
375 },
376 }
377 }
378
379 #[must_use]
381 pub fn with_region_data(mut self, region_name: impl Into<String>, data: Value) -> Self {
382 self.data.insert(region_name.into(), data);
383 self
384 }
385
386 #[must_use]
388 pub fn with_error(mut self, error: impl Into<String>) -> Self {
389 self.metadata.errors.push(error.into());
390 self
391 }
392
393 #[must_use]
395 pub const fn set_elapsed_ms(mut self, ms: u64) -> Self {
396 self.metadata.elapsed_ms = ms;
397 self
398 }
399
400 #[expect(
402 clippy::cast_precision_loss,
403 reason = "region counts are small enough to be safe as f32"
404 )]
405 pub fn calculate_success_rate(&mut self) {
406 if self.metadata.region_status.is_empty() {
407 self.metadata.selector_success_rate = 100.0;
408 return;
409 }
410 let successful = self
411 .metadata
412 .region_status
413 .values()
414 .filter(|status| status.success)
415 .count();
416 self.metadata.selector_success_rate =
417 (successful as f32 / self.metadata.region_status.len() as f32) * 100.0;
418 }
419
420 #[must_use]
422 pub fn is_fully_successful(&self) -> bool {
423 self.metadata.selector_success_rate >= 100.0 && self.metadata.errors.is_empty()
424 }
425}