Skip to main content

stygian_plugin/reliability/
scorer.rs

1//! Reliability scorer: turns an [`ExtractionResult`] into a [`ReliabilityScore`].
2
3use crate::domain::{ExtractionMetadata, ExtractionResult};
4use serde::{Deserialize, Serialize};
5
6use super::score::{ReliabilityBand, ReliabilityScore, clamp_unit};
7
8/// Weight applied to each sub-score when computing the [`ReliabilityScore`].
9///
10/// Defaults are tuned for production scraping where missing data is worse
11/// than occasional retries: schema completeness dominates, transformations
12/// matter but are less impactful than missing fields, and retries only
13/// subtract.
14///
15/// # Example
16///
17/// ```
18/// use stygian_plugin::reliability::ScoringWeights;
19///
20/// let weights = ScoringWeights {
21///     schema: 0.5,
22///     transformation: 0.3,
23///     retry: 0.2,
24/// };
25/// assert!(weights.validate().is_ok());
26///
27/// // Out-of-range weights are rejected.
28/// let bad = ScoringWeights { schema: 1.5, transformation: 0.0, retry: 0.0 };
29/// assert!(bad.validate().is_err());
30/// ```
31#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
32pub struct ScoringWeights {
33    /// Weight for `schema_completeness` (must be in `[0.0, 1.0]`).
34    pub schema: f32,
35
36    /// Weight for `transformation_success` (must be in `[0.0, 1.0]`).
37    pub transformation: f32,
38
39    /// Weight for `retry_penalty` (subtracted; must be in `[0.0, 1.0]`).
40    pub retry: f32,
41}
42
43impl ScoringWeights {
44    /// Validate that every weight lies in `[0.0, 1.0]`.
45    ///
46    /// # Errors
47    ///
48    /// Returns [`crate::error::PluginError::TemplateValidationError`] when
49    /// any weight is outside `[0.0, 1.0]` or when any weight is NaN.
50    pub fn validate(&self) -> crate::Result<()> {
51        for (name, value) in [
52            ("schema", self.schema),
53            ("transformation", self.transformation),
54            ("retry", self.retry),
55        ] {
56            if value.is_nan() || !(0.0..=1.0).contains(&value) {
57                return Err(crate::error::PluginError::TemplateValidationError(format!(
58                    "scoring weight '{name}' must be in [0.0, 1.0], got {value}"
59                )));
60            }
61        }
62        Ok(())
63    }
64}
65
66impl Default for ScoringWeights {
67    fn default() -> Self {
68        Self {
69            schema: 0.70,
70            transformation: 0.30,
71            retry: 0.10,
72        }
73    }
74}
75
76/// Computes a [`ReliabilityScore`] for an [`ExtractionResult`].
77///
78/// The scorer is pure and deterministic — it derives every sub-score from
79/// the result's metadata plus an externally-supplied retry count. There is
80/// no I/O and no clock dependency.
81///
82/// # Example
83///
84/// ```
85/// use stygian_plugin::domain::{ExtractionResult, IdempotencyKey, RegionStatus};
86/// use stygian_plugin::reliability::ReliabilityScorer;
87/// use std::collections::HashMap;
88///
89/// let mut result = ExtractionResult::new(IdempotencyKey::new());
90/// result.metadata.region_status.insert(
91///     "title".to_string(),
92///     RegionStatus { success: true, matched_count: 1, error: None },
93/// );
94///
95/// let score = ReliabilityScorer::new().score_extraction(&result, 0);
96/// assert!((score.overall - 1.0).abs() < f32::EPSILON);
97/// ```
98#[derive(Debug, Clone, Copy)]
99pub struct ReliabilityScorer {
100    weights: ScoringWeights,
101}
102
103impl Default for ReliabilityScorer {
104    fn default() -> Self {
105        Self::new()
106    }
107}
108
109impl ReliabilityScorer {
110    /// Maximum retry count the `retry_penalty` sub-score saturates at.
111    ///
112    /// Beyond this many retries the `retry_penalty` sub-score is `1.0`
113    /// regardless of the actual count, so a single retry-bloated call can
114    /// never zero out the rest of the score on its own.
115    pub const MAX_RETRIES_FOR_PENALTY: u32 = 5;
116
117    /// Build a scorer with the default [`ScoringWeights`].
118    ///
119    /// # Example
120    ///
121    /// ```
122    /// use stygian_plugin::reliability::ReliabilityScorer;
123    /// let _scorer = ReliabilityScorer::new();
124    /// ```
125    #[must_use]
126    pub fn new() -> Self {
127        Self {
128            weights: ScoringWeights::default(),
129        }
130    }
131
132    /// Build a scorer with custom weights.
133    ///
134    /// # Errors
135    ///
136    /// Returns [`crate::error::PluginError::TemplateValidationError`] when
137    /// any weight is outside `[0.0, 1.0]` (see [`ScoringWeights::validate`]).
138    pub fn with_weights(weights: ScoringWeights) -> crate::Result<Self> {
139        weights.validate()?;
140        Ok(Self { weights })
141    }
142
143    /// Score an extraction result. `retry_count` is the number of retries
144    /// the caller had to take to produce this result.
145    ///
146    /// # Example
147    ///
148    /// ```
149    /// use stygian_plugin::domain::{ExtractionResult, IdempotencyKey, RegionStatus};
150    /// use stygian_plugin::reliability::{ReliabilityScorer, ReliabilityBand};
151    /// use std::collections::HashMap;
152    ///
153    /// let mut result = ExtractionResult::new(IdempotencyKey::new());
154    /// result.metadata.region_status.insert(
155    ///     "title".to_string(),
156    ///     RegionStatus { success: false, matched_count: 0, error: Some("missing".into()) },
157    /// );
158    ///
159    /// let score = ReliabilityScorer::new().score_extraction(&result, 0);
160    /// assert_eq!(score.band, ReliabilityBand::Low);
161    /// ```
162    #[must_use]
163    pub fn score_extraction(
164        &self,
165        result: &ExtractionResult,
166        retry_count: u32,
167    ) -> ReliabilityScore {
168        self.score_metadata(&result.metadata, retry_count)
169    }
170
171    /// Score from raw [`ExtractionMetadata`] without needing the full result.
172    ///
173    /// Useful when the metadata has been serialized over the wire (e.g.
174    /// into a MCP `debug` payload) and the caller only has the metadata.
175    #[must_use]
176    #[allow(
177        clippy::cast_precision_loss,
178        reason = "region counts are small enough to be safe as f32"
179    )]
180    pub fn score_metadata(
181        &self,
182        metadata: &ExtractionMetadata,
183        retry_count: u32,
184    ) -> ReliabilityScore {
185        let total = metadata.region_status.len();
186        let successful = metadata
187            .region_status
188            .values()
189            .filter(|s| s.success)
190            .count();
191
192        let schema_completeness = if total == 0 {
193            1.0
194        } else {
195            successful as f32 / total as f32
196        };
197
198        let transformation_success = if total == 0 {
199            1.0
200        } else {
201            let transformation_failures = metadata
202                .errors
203                .iter()
204                .filter(|msg| is_transformation_error(msg))
205                .count();
206            let bounded_failures = transformation_failures.min(total);
207            1.0 - (bounded_failures as f32 / total as f32)
208        };
209
210        let retry_penalty = if retry_count == 0 {
211            0.0
212        } else {
213            let capped = retry_count.min(Self::MAX_RETRIES_FOR_PENALTY) as f32;
214            capped / Self::MAX_RETRIES_FOR_PENALTY as f32
215        };
216
217        let weighted = schema_completeness * self.weights.schema
218            + transformation_success * self.weights.transformation
219            - retry_penalty * self.weights.retry;
220        let overall = clamp_unit(weighted);
221
222        let reasons = build_reasons(
223            schema_completeness,
224            transformation_success,
225            retry_penalty,
226            total,
227            successful,
228        );
229
230        ReliabilityScore {
231            overall,
232            schema_completeness,
233            transformation_success,
234            retry_penalty,
235            band: ReliabilityBand::from_overall(overall),
236            reasons,
237        }
238    }
239}
240
241/// Heuristic: a `Region 'X': ...` error message that mentions a
242/// transformation keyword is treated as a transformation failure.
243fn is_transformation_error(message: &str) -> bool {
244    let lower = message.to_lowercase();
245    lower.contains("transformation")
246        || lower.contains("regex")
247        || lower.contains("coerce")
248        || lower.contains("filter")
249}
250
251/// Build the per-candidate human-readable reasons that contributed to a score.
252#[must_use]
253fn build_reasons(
254    schema_completeness: f32,
255    transformation_success: f32,
256    retry_penalty: f32,
257    total: usize,
258    successful: usize,
259) -> Vec<String> {
260    let mut reasons = Vec::new();
261    if total == 0 {
262        reasons.push("no regions defined (vacuously complete)".to_string());
263    } else {
264        reasons.push(format!(
265            "{successful}/{total} regions succeeded ({:.0}%)",
266            schema_completeness * 100.0
267        ));
268    }
269    if transformation_success < 1.0 && total > 0 {
270        reasons.push(format!(
271            "{:.0}% transformation success",
272            transformation_success * 100.0
273        ));
274    }
275    if retry_penalty > 0.0 {
276        reasons.push(format!(
277            "retry penalty applied ({:.0}%)",
278            retry_penalty * 100.0
279        ));
280    }
281    reasons
282}
283
284#[cfg(test)]
285#[allow(
286    clippy::unwrap_used,
287    clippy::expect_used,
288    clippy::panic,
289    clippy::indexing_slicing
290)]
291mod tests {
292    use super::*;
293    use crate::domain::{IdempotencyKey, RegionStatus};
294    use std::collections::HashMap;
295
296    fn region_status(success: bool) -> RegionStatus {
297        RegionStatus {
298            success,
299            matched_count: usize::from(success),
300            error: if success {
301                None
302            } else {
303                Some("selector matched no elements".to_string())
304            },
305        }
306    }
307
308    #[test]
309    fn test_empty_metadata_scores_as_high() {
310        let metadata = ExtractionMetadata {
311            idempotency_key: IdempotencyKey::new(),
312            completed_at: chrono::Utc::now(),
313            elapsed_ms: 0,
314            selector_success_rate: 100.0,
315            region_status: HashMap::new(),
316            errors: vec![],
317            reliability: None,
318        };
319        let score = ReliabilityScorer::new().score_metadata(&metadata, 0);
320        assert!((score.overall - 1.0).abs() < f32::EPSILON);
321        assert_eq!(score.band, ReliabilityBand::High);
322        assert!(
323            score
324                .reasons
325                .iter()
326                .any(|r| r.contains("vacuously complete")),
327            "empty template should report vacuous completeness"
328        );
329    }
330
331    #[test]
332    fn test_complete_extraction_scores_high() {
333        let mut metadata = ExtractionMetadata {
334            idempotency_key: IdempotencyKey::new(),
335            completed_at: chrono::Utc::now(),
336            elapsed_ms: 0,
337            selector_success_rate: 100.0,
338            region_status: HashMap::new(),
339            errors: vec![],
340            reliability: None,
341        };
342        metadata
343            .region_status
344            .insert("title".to_string(), region_status(true));
345        metadata
346            .region_status
347            .insert("price".to_string(), region_status(true));
348        let score = ReliabilityScorer::new().score_metadata(&metadata, 0);
349        assert!((score.overall - 1.0).abs() < f32::EPSILON);
350        assert_eq!(score.band, ReliabilityBand::High);
351        assert!(score.reasons.iter().any(|r| r.contains("2/2")));
352    }
353
354    #[test]
355    fn test_partial_extraction_scores_medium() {
356        let mut metadata = ExtractionMetadata {
357            idempotency_key: IdempotencyKey::new(),
358            completed_at: chrono::Utc::now(),
359            elapsed_ms: 0,
360            selector_success_rate: 50.0,
361            region_status: HashMap::new(),
362            errors: vec![],
363            reliability: None,
364        };
365        metadata
366            .region_status
367            .insert("title".to_string(), region_status(true));
368        metadata
369            .region_status
370            .insert("price".to_string(), region_status(false));
371        let score = ReliabilityScorer::new().score_metadata(&metadata, 0);
372        assert!(score.overall < 1.0);
373        assert!(score.overall >= 0.5);
374        assert_eq!(score.band, ReliabilityBand::Medium);
375        assert!((score.schema_completeness - 0.5).abs() < f32::EPSILON);
376    }
377
378    #[test]
379    fn test_failed_extraction_scores_low() {
380        let mut metadata = ExtractionMetadata {
381            idempotency_key: IdempotencyKey::new(),
382            completed_at: chrono::Utc::now(),
383            elapsed_ms: 0,
384            selector_success_rate: 0.0,
385            region_status: HashMap::new(),
386            errors: vec![],
387            reliability: None,
388        };
389        metadata
390            .region_status
391            .insert("title".to_string(), region_status(false));
392        metadata
393            .region_status
394            .insert("price".to_string(), region_status(false));
395        let score = ReliabilityScorer::new().score_metadata(&metadata, 0);
396        assert!(score.overall < 0.5);
397        assert_eq!(score.band, ReliabilityBand::Low);
398        assert!((score.schema_completeness - 0.0).abs() < f32::EPSILON);
399    }
400
401    #[test]
402    fn test_transformation_failure_reduces_sub_score() {
403        let mut metadata = ExtractionMetadata {
404            idempotency_key: IdempotencyKey::new(),
405            completed_at: chrono::Utc::now(),
406            elapsed_ms: 0,
407            selector_success_rate: 100.0,
408            region_status: HashMap::new(),
409            errors: vec!["Region 'price': transformation failed".to_string()],
410            reliability: None,
411        };
412        metadata
413            .region_status
414            .insert("price".to_string(), region_status(true));
415        let score = ReliabilityScorer::new().score_metadata(&metadata, 0);
416        // Schema still 1.0 (region "succeeded" but transformation sub-score 0.0)
417        assert!((score.schema_completeness - 1.0).abs() < f32::EPSILON);
418        assert!(score.transformation_success < 1.0);
419    }
420
421    #[test]
422    fn test_retry_penalty_reduces_overall() {
423        let metadata = ExtractionMetadata {
424            idempotency_key: IdempotencyKey::new(),
425            completed_at: chrono::Utc::now(),
426            elapsed_ms: 0,
427            selector_success_rate: 100.0,
428            region_status: HashMap::new(),
429            errors: vec![],
430            reliability: None,
431        };
432        let no_retry = ReliabilityScorer::new().score_metadata(&metadata, 0);
433        let max_retries = ReliabilityScorer::new().score_metadata(&metadata, 99);
434        assert!(
435            max_retries.overall < no_retry.overall,
436            "retries must lower the overall score (no_retry={}, max_retries={})",
437            no_retry.overall,
438            max_retries.overall
439        );
440        assert!((max_retries.retry_penalty - 1.0).abs() < f32::EPSILON);
441    }
442
443    #[test]
444    fn test_custom_weights_override_defaults() {
445        let weights = ScoringWeights {
446            schema: 0.0,
447            transformation: 1.0,
448            retry: 0.0,
449        };
450        let scorer = ReliabilityScorer::with_weights(weights).unwrap();
451        let mut metadata = ExtractionMetadata {
452            idempotency_key: IdempotencyKey::new(),
453            completed_at: chrono::Utc::now(),
454            elapsed_ms: 0,
455            selector_success_rate: 100.0,
456            region_status: HashMap::new(),
457            errors: vec!["Region 'price': transformation failed".to_string()],
458            reliability: None,
459        };
460        // Need at least one region for transformation_success to be
461        // meaningful — when total == 0, transformation_success is vacuously 1.0.
462        metadata
463            .region_status
464            .insert("price".to_string(), region_status(true));
465        let score = scorer.score_metadata(&metadata, 0);
466        // schema weight 0 → schema_completeness doesn't contribute;
467        // transformation weight 1 → transformation_success is the whole score.
468        assert!(
469            score.overall < 1.0,
470            "transformation failure must lower the overall score (got {})",
471            score.overall
472        );
473        assert!(
474            (score.transformation_success - 0.0).abs() < f32::EPSILON,
475            "transformation_success should be 0.0 with one error and one region"
476        );
477    }
478
479    #[test]
480    fn test_invalid_weights_rejected() {
481        let bad = ScoringWeights {
482            schema: 1.5,
483            transformation: 0.0,
484            retry: 0.0,
485        };
486        assert!(ReliabilityScorer::with_weights(bad).is_err());
487
488        let nan = ScoringWeights {
489            schema: f32::NAN,
490            transformation: 0.0,
491            retry: 0.0,
492        };
493        assert!(ReliabilityScorer::with_weights(nan).is_err());
494    }
495
496    #[test]
497    fn test_is_transformation_error_heuristic() {
498        assert!(is_transformation_error(
499            "Region 'price': transformation failed"
500        ));
501        assert!(is_transformation_error("Invalid regex pattern"));
502        assert!(is_transformation_error("Cannot coerce value"));
503        assert!(is_transformation_error("Filter rejected the value"));
504        assert!(!is_transformation_error("No elements matched"));
505        assert!(!is_transformation_error("selector parse error"));
506    }
507}