1use crate::domain::idempotency::IdempotencyKey;
4use crate::domain::selector::Selector;
5use crate::domain::transformation::Transformation;
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use std::collections::HashMap;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct Region {
31 pub name: String,
33
34 pub selector: Selector,
36
37 pub schema: Value,
39
40 pub transformations: Vec<Transformation>,
42}
43
44impl Region {
45 pub fn new(name: impl Into<String>, selector: Selector, schema: Value) -> Self {
47 Self {
48 name: name.into(),
49 selector,
50 schema,
51 transformations: vec![],
52 }
53 }
54
55 #[must_use]
57 pub fn with_transformation(mut self, transformation: Transformation) -> Self {
58 self.transformations.push(transformation);
59 self
60 }
61
62 pub fn validate(&self) -> crate::Result<()> {
64 if self.name.is_empty() {
65 return Err(crate::error::PluginError::TemplateValidationError(
66 "region name cannot be empty".to_string(),
67 ));
68 }
69 if !self.schema.is_object() {
70 return Err(crate::error::PluginError::TemplateValidationError(format!(
71 "region schema must be a JSON object, got {}",
72 self.schema.get("type").unwrap_or(&Value::Null)
73 )));
74 }
75 self.selector.validate()?;
77 Ok(())
78 }
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ExtractionTemplate {
102 pub id: uuid::Uuid,
104
105 pub name: String,
107
108 pub description: Option<String>,
110
111 pub regions: Vec<Region>,
113
114 pub metadata: TemplateMetadata,
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct TemplateMetadata {
121 pub created_at: DateTime<Utc>,
123
124 pub updated_at: DateTime<Utc>,
126
127 pub last_used_at: Option<DateTime<Utc>>,
129
130 pub usage_count: u64,
132
133 pub version: u32,
135
136 pub tags: Vec<String>,
138}
139
140impl Default for TemplateMetadata {
141 fn default() -> Self {
142 let now = Utc::now();
143 Self {
144 created_at: now,
145 updated_at: now,
146 last_used_at: None,
147 usage_count: 0,
148 version: 1,
149 tags: vec![],
150 }
151 }
152}
153
154impl ExtractionTemplate {
155 pub fn new(name: impl Into<String>) -> Self {
157 Self {
158 id: uuid::Uuid::new_v4(),
159 name: name.into(),
160 description: None,
161 regions: vec![],
162 metadata: TemplateMetadata::default(),
163 }
164 }
165
166 #[must_use]
168 pub fn with_region(mut self, region: Region) -> Self {
169 self.regions.push(region);
170 self
171 }
172
173 #[must_use]
175 pub fn with_description(mut self, desc: impl Into<String>) -> Self {
176 self.description = Some(desc.into());
177 self
178 }
179
180 #[must_use]
182 pub fn with_tags(mut self, tags: Vec<String>) -> Self {
183 self.metadata.tags = tags;
184 self
185 }
186
187 pub fn validate(&self) -> crate::Result<()> {
189 if self.name.is_empty() {
190 return Err(crate::error::PluginError::TemplateValidationError(
191 "template name cannot be empty".to_string(),
192 ));
193 }
194 for region in &self.regions {
195 region.validate()?;
196 }
197 Ok(())
198 }
199
200 pub fn mark_used(&mut self) {
202 self.metadata.usage_count += 1;
203 self.metadata.last_used_at = Some(Utc::now());
204 self.metadata.updated_at = Utc::now();
205 }
206}
207
208#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct ExtractionRequest {
211 pub template: ExtractionTemplate,
213
214 pub url: String,
216
217 pub html: String,
219
220 pub idempotency_key: IdempotencyKey,
222
223 pub timeout_ms: u64,
225
226 pub context: Option<Value>,
228}
229
230impl ExtractionRequest {
231 pub fn new(
233 template: ExtractionTemplate,
234 url: impl Into<String>,
235 html: impl Into<String>,
236 ) -> Self {
237 Self {
238 template,
239 url: url.into(),
240 html: html.into(),
241 idempotency_key: IdempotencyKey::new(),
242 timeout_ms: 30_000,
243 context: None,
244 }
245 }
246
247 #[must_use]
249 pub const fn with_idempotency_key(mut self, key: IdempotencyKey) -> Self {
250 self.idempotency_key = key;
251 self
252 }
253
254 #[must_use]
256 pub const fn with_timeout(mut self, ms: u64) -> Self {
257 self.timeout_ms = ms;
258 self
259 }
260
261 #[must_use]
263 pub fn with_context(mut self, context: Value) -> Self {
264 self.context = Some(context);
265 self
266 }
267
268 pub fn validate(&self) -> crate::Result<()> {
270 self.template.validate()?;
271 if self.url.is_empty() {
272 return Err(crate::error::PluginError::ExtractionError(
273 "URL cannot be empty".to_string(),
274 ));
275 }
276 if self.html.is_empty() {
277 return Err(crate::error::PluginError::ExtractionError(
278 "HTML cannot be empty".to_string(),
279 ));
280 }
281 Ok(())
282 }
283}
284
285#[derive(Debug, Clone, Serialize, Deserialize)]
287pub struct ExtractionResult {
288 pub data: HashMap<String, Value>,
290
291 pub metadata: ExtractionMetadata,
293}
294
295#[derive(Debug, Clone, Serialize, Deserialize)]
297pub struct ExtractionMetadata {
298 pub idempotency_key: IdempotencyKey,
300
301 pub completed_at: DateTime<Utc>,
303
304 pub elapsed_ms: u64,
306
307 pub selector_success_rate: f32,
309
310 pub region_status: HashMap<String, RegionStatus>,
312
313 pub errors: Vec<String>,
315}
316
317#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct RegionStatus {
320 pub success: bool,
322
323 pub matched_count: usize,
325
326 pub error: Option<String>,
328}
329
330impl ExtractionResult {
331 pub fn new(idempotency_key: IdempotencyKey) -> Self {
333 Self {
334 data: HashMap::new(),
335 metadata: ExtractionMetadata {
336 idempotency_key,
337 completed_at: Utc::now(),
338 elapsed_ms: 0,
339 selector_success_rate: 0.0,
340 region_status: HashMap::new(),
341 errors: vec![],
342 },
343 }
344 }
345
346 #[must_use]
348 pub fn with_region_data(mut self, region_name: impl Into<String>, data: Value) -> Self {
349 self.data.insert(region_name.into(), data);
350 self
351 }
352
353 #[must_use]
355 pub fn with_error(mut self, error: impl Into<String>) -> Self {
356 self.metadata.errors.push(error.into());
357 self
358 }
359
360 #[must_use]
362 pub const fn set_elapsed_ms(mut self, ms: u64) -> Self {
363 self.metadata.elapsed_ms = ms;
364 self
365 }
366
367 #[expect(
369 clippy::cast_precision_loss,
370 reason = "region counts are small enough to be safe as f32"
371 )]
372 pub fn calculate_success_rate(&mut self) {
373 if self.metadata.region_status.is_empty() {
374 self.metadata.selector_success_rate = 100.0;
375 return;
376 }
377 let successful = self
378 .metadata
379 .region_status
380 .values()
381 .filter(|status| status.success)
382 .count();
383 self.metadata.selector_success_rate =
384 (successful as f32 / self.metadata.region_status.len() as f32) * 100.0;
385 }
386
387 pub fn is_fully_successful(&self) -> bool {
389 self.metadata.selector_success_rate >= 100.0 && self.metadata.errors.is_empty()
390 }
391}