assay_core/judge/
mod.rs

1mod judge_internal;
2pub mod reliability;
3use crate::model::TestInput;
4use crate::providers::llm::LlmClient;
5use crate::storage::judge_cache::JudgeCache;
6use std::sync::Arc;
7
8#[derive(Clone, Debug)]
9pub struct JudgeRuntimeConfig {
10    pub enabled: bool,
11    pub provider: String, // "openai", "fake", "none"
12    pub model: Option<String>,
13    pub samples: u32,
14    pub temperature: f32,
15    pub max_tokens: u32,
16    pub refresh: bool,
17    pub reliability: reliability::ReliabilityConfig,
18    pub system_prompt_version: String,
19}
20
21pub(crate) struct JudgeCallResult {
22    pub(crate) passed: bool,
23    pub(crate) rationale: String,
24}
25
26#[derive(Clone)]
27pub struct JudgeService {
28    config: JudgeRuntimeConfig,
29    cache: JudgeCache,
30    client: Option<Arc<dyn LlmClient>>,
31    pub(crate) global_extra_calls: Arc<std::sync::atomic::AtomicU32>,
32}
33
34impl JudgeService {
35    pub fn new(
36        config: JudgeRuntimeConfig,
37        cache: JudgeCache,
38        client: Option<Arc<dyn LlmClient>>,
39    ) -> Self {
40        Self {
41            config,
42            cache,
43            client,
44            global_extra_calls: Arc::new(std::sync::atomic::AtomicU32::new(0)),
45        }
46    }
47
48    #[allow(clippy::too_many_arguments)]
49    pub async fn evaluate(
50        &self,
51        test_id: &str,
52        rubric_id: &str,
53        data: &TestInput,
54        response_text: &str,
55        suite_rubric_version: Option<&str>,
56        meta: &mut serde_json::Value,
57        seed: Option<u64>,
58    ) -> anyhow::Result<()> {
59        judge_internal::run::evaluate_impl(
60            self,
61            test_id,
62            rubric_id,
63            data,
64            response_text,
65            suite_rubric_version,
66            meta,
67            seed,
68        )
69        .await
70    }
71}
assay_core/judge/mod.rs

assay_core/judge/
mod.rs