1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct EvalConfig {
9 pub model: String,
11 pub sample_size: usize,
13 pub seed: u64,
15 pub top_k: usize,
17}
18
19impl Default for EvalConfig {
20 fn default() -> Self {
21 Self {
22 model: "claude-sonnet-4-20250514".to_string(),
23 sample_size: 250,
24 seed: 42,
25 top_k: 10,
26 }
27 }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct GroundTruthEntry {
33 pub query: String,
35 pub chunk_content: String,
37 pub chunk_source: String,
39 #[serde(skip_serializing_if = "Option::is_none")]
41 pub chunk_start_secs: Option<f64>,
42 #[serde(skip_serializing_if = "Option::is_none")]
44 pub chunk_end_secs: Option<f64>,
45 pub domain: String,
47 pub course: String,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct RetrievalResultEntry {
54 pub query: String,
56 pub domain: String,
58 pub course: String,
60 pub results: Vec<RetrievedChunk>,
62 pub latency_s: f64,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct RetrievedChunk {
69 pub content: String,
71 #[serde(skip_serializing_if = "Option::is_none")]
73 pub source: Option<String>,
74 pub score: f32,
76 #[serde(skip_serializing_if = "Option::is_none")]
78 pub title: Option<String>,
79 #[serde(skip_serializing_if = "Option::is_none")]
81 pub start_secs: Option<f64>,
82 #[serde(skip_serializing_if = "Option::is_none")]
84 pub end_secs: Option<f64>,
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct JudgeVerdict {
90 pub relevant: bool,
92 pub reasoning: String,
94}
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct JudgeCacheEntry {
99 pub verdict: JudgeVerdict,
101 pub model: String,
103}
104
105#[derive(Debug, Clone, Default, Serialize, Deserialize)]
107pub struct JudgeCache {
108 pub entries: HashMap<String, JudgeCacheEntry>,
110}
111
112impl JudgeCache {
113 pub fn load(path: &std::path::Path) -> Self {
115 std::fs::read_to_string(path)
116 .ok()
117 .and_then(|s| serde_json::from_str(&s).ok())
118 .unwrap_or_default()
119 }
120
121 pub fn save(&self, path: &std::path::Path) -> std::io::Result<()> {
123 let json = serde_json::to_string_pretty(self)?;
124 std::fs::write(path, json)
125 }
126
127 pub fn cache_key(query: &str, content: &str) -> String {
129 use sha2::{Digest, Sha256};
130 let mut hasher = Sha256::new();
131 hasher.update(query.as_bytes());
132 hasher.update(b"|||");
133 hasher.update(content.as_bytes());
134 let result = hasher.finalize();
135 hex::encode(&result[..8]) }
137
138 pub fn get(&self, query: &str, content: &str) -> Option<&JudgeVerdict> {
140 let key = Self::cache_key(query, content);
141 self.entries.get(&key).map(|e| &e.verdict)
142 }
143
144 pub fn insert(&mut self, query: &str, content: &str, verdict: JudgeVerdict, model: &str) {
146 let key = Self::cache_key(query, content);
147 self.entries.insert(key, JudgeCacheEntry { verdict, model: model.to_string() });
148 }
149}
150
151mod hex {
153 pub(crate) fn encode(bytes: &[u8]) -> String {
154 use std::fmt::Write;
155 bytes.iter().fold(String::with_capacity(bytes.len() * 2), |mut s, b| {
156 let _ = write!(s, "{b:02x}");
157 s
158 })
159 }
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct JudgmentEntry {
165 pub query: String,
167 pub rank: usize,
169 pub relevant: bool,
171 pub reasoning: String,
173 #[serde(skip_serializing_if = "Option::is_none")]
175 pub source: Option<String>,
176 #[serde(skip_serializing_if = "Option::is_none")]
178 pub score: Option<f32>,
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct EvalOutput {
184 pub timestamp: String,
186 pub config: EvalRunConfig,
188 pub aggregate: AggregateMetrics,
190 pub by_domain: HashMap<String, AggregateMetrics>,
192 pub per_query: Vec<QueryResult>,
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct EvalRunConfig {
199 pub num_queries: usize,
201 pub top_k: usize,
203 pub judge_model: String,
205 pub cache_hits: usize,
207 pub api_calls: usize,
209}
210
211#[derive(Debug, Clone, Default, Serialize, Deserialize)]
213pub struct AggregateMetrics {
214 pub num_queries: usize,
216 pub mrr: f64,
218 #[serde(rename = "ndcg@5")]
220 pub ndcg_5: f64,
221 #[serde(rename = "ndcg@10")]
223 pub ndcg_10: f64,
224 #[serde(rename = "recall@5")]
226 pub recall_5: f64,
227 #[serde(rename = "precision@5")]
229 pub precision_5: f64,
230 #[serde(rename = "hit_rate@5")]
232 pub hit_rate_5: f64,
233 #[serde(rename = "hit_rate@10")]
235 pub hit_rate_10: f64,
236 pub map: f64,
238 pub mean_latency_s: f64,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct QueryResult {
245 pub query: String,
247 pub domain: String,
249 pub mrr: f64,
251 pub hit_5: bool,
253 pub relevant_count: usize,
255 pub total_results: usize,
257 pub latency_s: f64,
259 pub judgments: Vec<ChunkJudgment>,
261}
262
263#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct ChunkJudgment {
266 pub rank: usize,
268 pub score: f32,
270 pub source: Option<String>,
272 pub relevant: bool,
274 pub reasoning: String,
276}