assay_core/judge/reliability.rs
1use serde::{Deserialize, Serialize};
2
3/// Final determination from the judge layer.
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
5#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
6pub enum VerdictStatus {
7 /// Clearly passed the rubric.
8 Pass,
9 /// Clearly failed the rubric.
10 Fail,
11 /// Uncertain result, falls within the borderline band or judge is unstable.
12 Abstain,
13}
14
15/// Strategy for handling multiple judge evaluations.
16#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
17#[serde(rename_all = "snake_case")]
18pub enum RerunStrategy {
19 /// Only run once.
20 Single,
21 /// Sequential Probability Ratio Test inspired:
22 /// Run 1 -> Confident? Stop. Else Run 2 (Swapped) -> Agree? Stop. Else Run 3 -> Majority vote.
23 #[default]
24 SequentialSprt,
25 /// Always run 3 times and take majority.
26 AlwaysThree,
27}
28
29/// Policy for final verdict when judge remains uncertain (Abstain).
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq)]
31#[serde(rename_all = "snake_case")]
32pub enum TieBreakPolicy {
33 /// Fail the test if the judge is uncertain. (Security posture)
34 #[default]
35 FailClosed,
36 /// Flag as unstable/quarantine but don't hard fail.
37 Quarantine,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
41pub struct ReliabilityConfig {
42 /// Minimum score for "borderline" (inclusive). Default 0.4.
43 pub borderline_min: f64,
44 /// Maximum score for "borderline" (inclusive). Default 0.6.
45 pub borderline_max: f64,
46 /// Sequential rerun strategy.
47 pub rerun_strategy: RerunStrategy,
48 /// Max extra judge calls allowed per test. High-cost protection.
49 pub max_extra_calls_per_test: u32,
50 /// Total budget for extra judge calls across the whole suite run.
51 pub max_extra_calls_per_run: u32,
52 /// Policy for final Abstain results.
53 pub tie_break: TieBreakPolicy,
54 /// Use blind labeling (X/Y) in prompts to mitigate bias.
55 pub blind_labeling: bool,
56 /// Randomize candidate order (with seed) in prompts.
57 pub order_randomized: bool,
58 /// Hijack defense: wrap candidates in delimiters and add guard instructions.
59 pub hijack_defense: bool,
60}
61
62impl Default for ReliabilityConfig {
63 fn default() -> Self {
64 Self {
65 borderline_min: 0.4,
66 borderline_max: 0.6,
67 rerun_strategy: RerunStrategy::SequentialSprt,
68 max_extra_calls_per_test: 2,
69 max_extra_calls_per_run: 20,
70 tie_break: TieBreakPolicy::FailClosed,
71 blind_labeling: true,
72 order_randomized: true,
73 hijack_defense: true,
74 }
75 }
76}
77
78impl ReliabilityConfig {
79 /// Maps a raw probability/score [0.0, 1.0] to a verdict status based on borderline band.
80 pub fn assess(&self, score: f64) -> VerdictStatus {
81 if score >= self.borderline_min && score <= self.borderline_max {
82 VerdictStatus::Abstain
83 } else if score > self.borderline_max {
84 VerdictStatus::Pass
85 } else {
86 VerdictStatus::Fail
87 }
88 }
89
90 /// Determines if a re-evaluation is needed based on current results.
91 pub fn triggers_rerun(&self, score: f64, iteration: u32) -> bool {
92 match self.rerun_strategy {
93 RerunStrategy::Single => false,
94 RerunStrategy::AlwaysThree => iteration < 3,
95 RerunStrategy::SequentialSprt => {
96 // Adaptive Majority: Force at least 2 votes to detect instability,
97 // unless we already have a borderline score (split vote).
98 // If iteration < 2, we rerun.
99 // If iteration == 2 and score is 0.5 (borderline), we rerun (for 3rd).
100 // If iteration == 2 and score is 0.0 or 1.0, we stop.
101 iteration < 2 || (score >= self.borderline_min && score <= self.borderline_max)
102 }
103 }
104 }
105}