use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalConfig {
pub model: String,
pub sample_size: usize,
pub seed: u64,
pub top_k: usize,
}
impl Default for EvalConfig {
fn default() -> Self {
Self {
model: "claude-sonnet-4-20250514".to_string(),
sample_size: 250,
seed: 42,
top_k: 10,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GroundTruthEntry {
pub query: String,
pub chunk_content: String,
pub chunk_source: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub chunk_start_secs: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub chunk_end_secs: Option<f64>,
pub domain: String,
pub course: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievalResultEntry {
pub query: String,
pub domain: String,
pub course: String,
pub results: Vec<RetrievedChunk>,
pub latency_s: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievedChunk {
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub source: Option<String>,
pub score: f32,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub start_secs: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub end_secs: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JudgeVerdict {
pub relevant: bool,
pub reasoning: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JudgeCacheEntry {
pub verdict: JudgeVerdict,
pub model: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct JudgeCache {
pub entries: HashMap<String, JudgeCacheEntry>,
}
impl JudgeCache {
pub fn load(path: &std::path::Path) -> Self {
std::fs::read_to_string(path)
.ok()
.and_then(|s| serde_json::from_str(&s).ok())
.unwrap_or_default()
}
pub fn save(&self, path: &std::path::Path) -> std::io::Result<()> {
let json = serde_json::to_string_pretty(self)?;
std::fs::write(path, json)
}
pub fn cache_key(query: &str, content: &str) -> String {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(query.as_bytes());
hasher.update(b"|||");
hasher.update(content.as_bytes());
let result = hasher.finalize();
hex::encode(&result[..8]) }
pub fn get(&self, query: &str, content: &str) -> Option<&JudgeVerdict> {
let key = Self::cache_key(query, content);
self.entries.get(&key).map(|e| &e.verdict)
}
pub fn insert(&mut self, query: &str, content: &str, verdict: JudgeVerdict, model: &str) {
let key = Self::cache_key(query, content);
self.entries.insert(key, JudgeCacheEntry { verdict, model: model.to_string() });
}
}
mod hex {
pub(crate) fn encode(bytes: &[u8]) -> String {
use std::fmt::Write;
bytes.iter().fold(String::with_capacity(bytes.len() * 2), |mut s, b| {
let _ = write!(s, "{b:02x}");
s
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JudgmentEntry {
pub query: String,
pub rank: usize,
pub relevant: bool,
pub reasoning: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub source: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub score: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalOutput {
pub timestamp: String,
pub config: EvalRunConfig,
pub aggregate: AggregateMetrics,
pub by_domain: HashMap<String, AggregateMetrics>,
pub per_query: Vec<QueryResult>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalRunConfig {
pub num_queries: usize,
pub top_k: usize,
pub judge_model: String,
pub cache_hits: usize,
pub api_calls: usize,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AggregateMetrics {
pub num_queries: usize,
pub mrr: f64,
#[serde(rename = "ndcg@5")]
pub ndcg_5: f64,
#[serde(rename = "ndcg@10")]
pub ndcg_10: f64,
#[serde(rename = "recall@5")]
pub recall_5: f64,
#[serde(rename = "precision@5")]
pub precision_5: f64,
#[serde(rename = "hit_rate@5")]
pub hit_rate_5: f64,
#[serde(rename = "hit_rate@10")]
pub hit_rate_10: f64,
pub map: f64,
pub mean_latency_s: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryResult {
pub query: String,
pub domain: String,
pub mrr: f64,
pub hit_5: bool,
pub relevant_count: usize,
pub total_results: usize,
pub latency_s: f64,
pub judgments: Vec<ChunkJudgment>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkJudgment {
pub rank: usize,
pub score: f32,
pub source: Option<String>,
pub relevant: bool,
pub reasoning: String,
}