1use crate::model::TestInput;
2use crate::providers::llm::LlmClient;
3use crate::storage::judge_cache::JudgeCache;
4use serde_json::json;
5use std::sync::Arc;
6
7#[derive(Clone, Debug)]
8pub struct JudgeRuntimeConfig {
9 pub enabled: bool,
10 pub provider: String, pub model: Option<String>,
12 pub samples: u32,
13 pub temperature: f32,
14 pub max_tokens: u32,
15 pub refresh: bool,
16}
17
18#[derive(Clone)]
19pub struct JudgeService {
20 config: JudgeRuntimeConfig,
21 cache: JudgeCache,
22 client: Option<Arc<dyn LlmClient>>,
23}
24
25impl JudgeService {
26 pub fn new(
27 config: JudgeRuntimeConfig,
28 cache: JudgeCache,
29 client: Option<Arc<dyn LlmClient>>,
30 ) -> Self {
31 Self {
32 config,
33 cache,
34 client,
35 }
36 }
37
38 pub async fn evaluate(
39 &self,
40 test_id: &str,
41 rubric_id: &str,
42 data: &TestInput,
43 response_text: &str,
44 suite_rubric_version: Option<&str>,
45 meta: &mut serde_json::Value,
46 ) -> anyhow::Result<()> {
47 let rubric_version = suite_rubric_version.unwrap_or("v1");
48
49 if let Some(_trace_judge) = meta.pointer(&format!("/assay/judge/{}", rubric_id)) {
51 return Ok(());
55 }
56
57 if !self.config.enabled {
59 anyhow::bail!(
60 "config error: test '{}' requires judge results ('{}:{}'), but judge is disabled.\n\
61 hint: options:\n\
62 1) run live judge: assay ci --judge openai\n\
63 2) run replay/CI offline: provide trace meta at meta.assay.judge.{}\n\
64 and re-run with: assay ci --trace-file traces.jsonl --no-judge",
65 test_id, rubric_id, rubric_version, rubric_id
66 );
67 }
68
69 let client = self.client.as_ref().ok_or_else(|| {
70 anyhow::anyhow!(
71 "config error: judge enabled but no client provided (verify --judge <provider>)"
72 )
73 })?;
74
75 let prompt = format!(
77 "Rubric: {}\nInput: {}\nResponse: {}\nContext: {:?}",
78 rubric_id, data.prompt, response_text, data.context
79 );
80 let input_hash = format!("{:x}", md5::compute(&prompt)); let cache_key = self.generate_cache_key(rubric_id, rubric_version, &input_hash);
82
83 if !self.config.refresh {
84 if let Some(mut cached) = self.cache.get(&cache_key)? {
85 if let Some(obj) = cached.as_object_mut() {
86 obj.insert("source".to_string(), json!("cache"));
87 obj.insert(
88 "cached_at".to_string(),
89 json!(chrono::Utc::now().to_rfc3339()),
90 );
91 }
92 self.inject_result(meta, rubric_id, cached)?;
93 return Ok(());
94 }
95 }
96
97 let samples = self.config.samples;
99 let mut votes = Vec::new();
100 let mut rationales = Vec::new();
101
102 for _ in 0..samples {
103 let _sys_prompt = format!("You are a judge for rubric {}. Output JSON with {{passed: bool, rationale: string}}.", rubric_id);
105 let resp = client.complete(&prompt, None).await?; votes.push(self.mock_vote_logic(rubric_id, &resp.text)); rationales.push(resp.text);
120 }
121
122 let pass_count = votes.iter().filter(|&&v| v).count() as u32;
124 let agreement = pass_count as f64 / samples as f64;
125 let passed = pass_count as f64 > (samples as f64 / 2.0); let result = json!({
132 "rubric_version": rubric_version,
133 "passed": passed,
134 "score": agreement, "source": "live",
137 "samples": votes,
138 "agreement": agreement,
139 "rationale": rationales.first().cloned().unwrap_or_default(), "cached_at": chrono::Utc::now().to_rfc3339()
141 });
142
143 self.cache.put(
145 &cache_key,
146 &self.config.provider,
147 self.config.model.as_deref().unwrap_or("default"),
148 rubric_id,
149 rubric_version,
150 &result,
151 )?;
152
153 self.inject_result(meta, rubric_id, result)?;
154
155 Ok(())
156 }
157
158 fn generate_cache_key(
159 &self,
160 rubric_id: &str,
161 rubric_version: &str,
162 input_hash: &str,
163 ) -> String {
164 let template_version = "v1-simple";
166 let raw = format!(
167 "{}:{}:{}:{}:{}:{}:{}:{}:{}",
168 self.config.provider,
169 self.config.model.as_deref().unwrap_or(""),
170 rubric_id,
171 rubric_version,
172 self.config.temperature,
173 self.config.max_tokens,
174 self.config.samples,
175 template_version,
176 input_hash
177 );
178 format!("{:x}", md5::compute(raw))
179 }
180
181 fn inject_result(
182 &self,
183 meta: &mut serde_json::Value,
184 rubric_id: &str,
185 result: serde_json::Value,
186 ) -> anyhow::Result<()> {
187 if let Some(obj) = meta.as_object_mut() {
188 let assay = obj
189 .entry("assay")
190 .or_insert(json!({}))
191 .as_object_mut()
192 .unwrap();
193 let judge = assay
194 .entry("judge")
195 .or_insert(json!({}))
196 .as_object_mut()
197 .unwrap();
198 judge.insert(rubric_id.to_string(), result);
199 }
200 Ok(())
201 }
202
203 fn mock_vote_logic(&self, _rubric: &str, text: &str) -> bool {
205 !text.contains("fail")
208 }
209}