Skip to main content

assay_core/judge/
reliability.rs

1use serde::{Deserialize, Serialize};
2
3/// Final determination from the judge layer.
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
5#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
6pub enum VerdictStatus {
7    /// Clearly passed the rubric.
8    Pass,
9    /// Clearly failed the rubric.
10    Fail,
11    /// Uncertain result, falls within the borderline band or judge is unstable.
12    Abstain,
13}
14
15/// Strategy for handling multiple judge evaluations.
16#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
17#[serde(rename_all = "snake_case")]
18pub enum RerunStrategy {
19    /// Only run once.
20    Single,
21    /// Sequential Probability Ratio Test inspired:
22    /// Run 1 -> Confident? Stop. Else Run 2 (Swapped) -> Agree? Stop. Else Run 3 -> Majority vote.
23    #[default]
24    SequentialSprt,
25    /// Always run 3 times and take majority.
26    AlwaysThree,
27}
28
29/// Policy for final verdict when judge remains uncertain (Abstain).
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq)]
31#[serde(rename_all = "snake_case")]
32pub enum TieBreakPolicy {
33    /// Fail the test if the judge is uncertain. (Security posture)
34    #[default]
35    FailClosed,
36    /// Flag as unstable/quarantine but don't hard fail.
37    Quarantine,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
41pub struct ReliabilityConfig {
42    /// Minimum score for "borderline" (inclusive). Default 0.4.
43    pub borderline_min: f64,
44    /// Maximum score for "borderline" (inclusive). Default 0.6.
45    pub borderline_max: f64,
46    /// Sequential rerun strategy.
47    pub rerun_strategy: RerunStrategy,
48    /// Max extra judge calls allowed per test. High-cost protection.
49    pub max_extra_calls_per_test: u32,
50    /// Total budget for extra judge calls across the whole suite run.
51    pub max_extra_calls_per_run: u32,
52    /// Policy for final Abstain results.
53    pub tie_break: TieBreakPolicy,
54    /// Use blind labeling (X/Y) in prompts to mitigate bias.
55    pub blind_labeling: bool,
56    /// Randomize candidate order (with seed) in prompts.
57    pub order_randomized: bool,
58    /// Hijack defense: wrap candidates in delimiters and add guard instructions.
59    pub hijack_defense: bool,
60}
61
62impl Default for ReliabilityConfig {
63    fn default() -> Self {
64        Self {
65            borderline_min: 0.4,
66            borderline_max: 0.6,
67            rerun_strategy: RerunStrategy::SequentialSprt,
68            max_extra_calls_per_test: 2,
69            max_extra_calls_per_run: 20,
70            tie_break: TieBreakPolicy::FailClosed,
71            blind_labeling: true,
72            order_randomized: true,
73            hijack_defense: true,
74        }
75    }
76}
77
78impl ReliabilityConfig {
79    /// Maps a raw probability/score [0.0, 1.0] to a verdict status based on borderline band.
80    pub fn assess(&self, score: f64) -> VerdictStatus {
81        if score >= self.borderline_min && score <= self.borderline_max {
82            VerdictStatus::Abstain
83        } else if score > self.borderline_max {
84            VerdictStatus::Pass
85        } else {
86            VerdictStatus::Fail
87        }
88    }
89
90    /// Determines if a re-evaluation is needed based on current results.
91    pub fn triggers_rerun(&self, score: f64, iteration: u32) -> bool {
92        match self.rerun_strategy {
93            RerunStrategy::Single => false,
94            RerunStrategy::AlwaysThree => iteration < 3,
95            RerunStrategy::SequentialSprt => {
96                // Adaptive Majority: Force at least 2 votes to detect instability,
97                // unless we already have a borderline score (split vote).
98                // If iteration < 2, we rerun.
99                // If iteration == 2 and score is 0.5 (borderline), we rerun (for 3rd).
100                // If iteration == 2 and score is 0.0 or 1.0, we stop.
101                iteration < 2 || (score >= self.borderline_min && score <= self.borderline_max)
102            }
103        }
104    }
105}