use crate::model::TestInput;
use crate::providers::llm::LlmClient;
use crate::storage::judge_cache::JudgeCache;
use serde_json::json;
use std::sync::Arc;
#[derive(Clone, Debug)]
pub struct JudgeRuntimeConfig {
pub enabled: bool,
pub provider: String, pub model: Option<String>,
pub samples: u32,
pub temperature: f32,
pub max_tokens: u32,
pub refresh: bool,
}
#[derive(Clone)]
pub struct JudgeService {
config: JudgeRuntimeConfig,
cache: JudgeCache,
client: Option<Arc<dyn LlmClient>>,
}
impl JudgeService {
pub fn new(
config: JudgeRuntimeConfig,
cache: JudgeCache,
client: Option<Arc<dyn LlmClient>>,
) -> Self {
Self {
config,
cache,
client,
}
}
pub async fn evaluate(
&self,
test_id: &str,
rubric_id: &str,
data: &TestInput,
response_text: &str,
suite_rubric_version: Option<&str>,
meta: &mut serde_json::Value,
) -> anyhow::Result<()> {
let rubric_version = suite_rubric_version.unwrap_or("v1");
if let Some(_trace_judge) = meta.pointer(&format!("/assay/judge/{}", rubric_id)) {
return Ok(());
}
if !self.config.enabled {
anyhow::bail!(
"config error: test '{}' requires judge results ('{}:{}'), but judge is disabled.\n\
hint: options:\n\
1) run live judge: assay ci --judge openai\n\
2) run replay/CI offline: provide trace meta at meta.assay.judge.{}\n\
and re-run with: assay ci --trace-file traces.jsonl --no-judge",
test_id, rubric_id, rubric_version, rubric_id
);
}
let client = self.client.as_ref().ok_or_else(|| {
anyhow::anyhow!(
"config error: judge enabled but no client provided (verify --judge <provider>)"
)
})?;
let prompt = format!(
"Rubric: {}\nInput: {}\nResponse: {}\nContext: {:?}",
rubric_id, data.prompt, response_text, data.context
);
let input_hash = format!("{:x}", md5::compute(&prompt)); let cache_key = self.generate_cache_key(rubric_id, rubric_version, &input_hash);
if !self.config.refresh {
if let Some(mut cached) = self.cache.get(&cache_key)? {
if let Some(obj) = cached.as_object_mut() {
obj.insert("source".to_string(), json!("cache"));
obj.insert(
"cached_at".to_string(),
json!(chrono::Utc::now().to_rfc3339()),
);
}
self.inject_result(meta, rubric_id, cached)?;
return Ok(());
}
}
let samples = self.config.samples;
let mut votes = Vec::new();
let mut rationales = Vec::new();
for _ in 0..samples {
let _sys_prompt = format!("You are a judge for rubric {}. Output JSON with {{passed: bool, rationale: string}}.", rubric_id);
let resp = client.complete(&prompt, None).await?;
votes.push(self.mock_vote_logic(rubric_id, &resp.text)); rationales.push(resp.text);
}
let pass_count = votes.iter().filter(|&&v| v).count() as u32;
let agreement = pass_count as f64 / samples as f64;
let passed = pass_count as f64 > (samples as f64 / 2.0);
let result = json!({
"rubric_version": rubric_version,
"passed": passed,
"score": agreement, "source": "live",
"samples": votes,
"agreement": agreement,
"rationale": rationales.first().cloned().unwrap_or_default(), "cached_at": chrono::Utc::now().to_rfc3339()
});
self.cache.put(
&cache_key,
&self.config.provider,
self.config.model.as_deref().unwrap_or("default"),
rubric_id,
rubric_version,
&result,
)?;
self.inject_result(meta, rubric_id, result)?;
Ok(())
}
fn generate_cache_key(
&self,
rubric_id: &str,
rubric_version: &str,
input_hash: &str,
) -> String {
let template_version = "v1-simple";
let raw = format!(
"{}:{}:{}:{}:{}:{}:{}:{}:{}",
self.config.provider,
self.config.model.as_deref().unwrap_or(""),
rubric_id,
rubric_version,
self.config.temperature,
self.config.max_tokens,
self.config.samples,
template_version,
input_hash
);
format!("{:x}", md5::compute(raw))
}
fn inject_result(
&self,
meta: &mut serde_json::Value,
rubric_id: &str,
result: serde_json::Value,
) -> anyhow::Result<()> {
if let Some(obj) = meta.as_object_mut() {
let assay = obj
.entry("assay")
.or_insert(json!({}))
.as_object_mut()
.unwrap();
let judge = assay
.entry("judge")
.or_insert(json!({}))
.as_object_mut()
.unwrap();
judge.insert(rubric_id.to_string(), result);
}
Ok(())
}
fn mock_vote_logic(&self, _rubric: &str, text: &str) -> bool {
!text.contains("fail")
}
}