1mod judge_internal;
2pub mod reliability;
3use crate::model::TestInput;
4use crate::providers::llm::LlmClient;
5use crate::storage::judge_cache::JudgeCache;
6use std::sync::Arc;
7
8#[derive(Clone, Debug)]
9pub struct JudgeRuntimeConfig {
10 pub enabled: bool,
11 pub provider: String, pub model: Option<String>,
13 pub samples: u32,
14 pub temperature: f32,
15 pub max_tokens: u32,
16 pub refresh: bool,
17 pub reliability: reliability::ReliabilityConfig,
18 pub system_prompt_version: String,
19}
20
21pub(crate) struct JudgeCallResult {
22 pub(crate) passed: bool,
23 pub(crate) rationale: String,
24}
25
26#[derive(Clone)]
27pub struct JudgeService {
28 config: JudgeRuntimeConfig,
29 cache: JudgeCache,
30 client: Option<Arc<dyn LlmClient>>,
31 pub(crate) global_extra_calls: Arc<std::sync::atomic::AtomicU32>,
32}
33
34impl JudgeService {
35 pub fn new(
36 config: JudgeRuntimeConfig,
37 cache: JudgeCache,
38 client: Option<Arc<dyn LlmClient>>,
39 ) -> Self {
40 Self {
41 config,
42 cache,
43 client,
44 global_extra_calls: Arc::new(std::sync::atomic::AtomicU32::new(0)),
45 }
46 }
47
48 #[allow(clippy::too_many_arguments)]
49 pub async fn evaluate(
50 &self,
51 test_id: &str,
52 rubric_id: &str,
53 data: &TestInput,
54 response_text: &str,
55 suite_rubric_version: Option<&str>,
56 meta: &mut serde_json::Value,
57 seed: Option<u64>,
58 ) -> anyhow::Result<()> {
59 judge_internal::run::evaluate_impl(
60 self,
61 test_id,
62 rubric_id,
63 data,
64 response_text,
65 suite_rubric_version,
66 meta,
67 seed,
68 )
69 .await
70 }
71}