1use serde::{Deserialize, Serialize};
2use std::cmp::Reverse;
3use std::collections::HashMap;
4
5#[derive(Debug, Serialize, Deserialize)]
6pub struct HygieneReport {
7 pub schema_version: u32,
8 pub suite: String,
9 pub source: String,
10 pub score_source: String, pub generated_at: String,
12 pub window: ReportWindow,
13 pub tests: Vec<TestHygiene>,
14 pub notes: Vec<String>,
15}
16
17#[derive(Debug, Serialize, Deserialize)]
18pub struct ReportWindow {
19 pub last_runs: u32,
20}
21
22#[derive(Debug, Serialize, Deserialize)]
23pub struct TestHygiene {
24 pub test_id: String,
25 pub n: u32,
26 pub rates: TestOutcomeRates,
27 pub scores: HashMap<String, MetricStats>,
28 pub top_reasons: Vec<TopReason>,
29 #[serde(default, skip_serializing_if = "Vec::is_empty")]
30 pub suggested_actions: Vec<String>,
31}
32
33#[derive(Debug, Serialize, Deserialize)]
34pub struct TestOutcomeRates {
35 pub pass: f64,
36 pub fail: f64,
37 pub warn: f64,
38 pub flaky: f64,
39 pub unstable: f64,
40 pub skipped: f64,
41}
42
43#[derive(Debug, Serialize, Deserialize)]
44pub struct MetricStats {
45 pub p10: f64,
46 pub p50: f64,
47 pub p90: f64,
48 pub std: f64,
49}
50
51#[derive(Debug, Serialize, Deserialize)]
52pub struct TopReason {
53 pub kind: String, pub value: String,
55 pub count: u32,
56}
57
58use crate::model::{TestResultRow, TestStatus};
59use crate::storage::Store;
60
61pub fn report_from_db(store: &Store, suite: &str, last_runs: u32) -> anyhow::Result<HygieneReport> {
62 let results = store.fetch_results_for_last_n_runs(suite, last_runs)?;
63
64 let mut test_groups: HashMap<String, Vec<&TestResultRow>> = HashMap::new();
66 for r in &results {
67 test_groups.entry(r.test_id.clone()).or_default().push(r);
68 }
69
70 let mut tests = Vec::new();
71 let mut notes = Vec::new();
72
73 for (test_id, rows) in test_groups {
74 let n = rows.len() as u32;
75 let mut counts = HashMap::new();
76 let mut reasons = HashMap::new(); let mut scores: HashMap<String, Vec<f64>> = HashMap::new();
78
79 for r in &rows {
80 *counts.entry(r.status).or_insert(0) += 1;
81
82 if let Some(reason) = &r.skip_reason {
84 *reasons
85 .entry(("skip_reason".to_string(), reason.clone()))
86 .or_insert(0) += 1;
87 } else if r.status == TestStatus::Fail || r.status == TestStatus::Error {
88 let msg = if r.message.is_empty() {
90 "Undeclared failure".to_string()
91 } else {
92 r.message.clone()
93 };
94 *reasons.entry(("failure".to_string(), msg)).or_insert(0) += 1;
95 }
96
97 if let Some(obj) = r.details.get("metrics").and_then(|m| m.as_object()) {
100 for (metric_name, mv) in obj {
101 if let Some(reason) = mv.get("reason").and_then(|s| s.as_str()) {
103 let key = format!("{}: {}", metric_name, reason);
104 *reasons
105 .entry(("metric_reason".to_string(), key))
106 .or_insert(0) += 1;
107 }
108 }
109 }
110
111 if let Some(attempts) = &r.attempts {
113 if !attempts.is_empty() {
114 for attempt in attempts {
115 if let Some(obj) =
116 attempt.details.get("metrics").and_then(|m| m.as_object())
117 {
118 for (metric_name, mv) in obj {
119 if let Some(score) = mv.get("score").and_then(|s| s.as_f64()) {
120 scores.entry(metric_name.clone()).or_default().push(score);
121 }
122 }
123 }
124 }
125 } else {
126 if let Some(obj) = r.details.get("metrics").and_then(|m| m.as_object()) {
130 for (metric_name, mv) in obj {
131 if let Some(score) = mv.get("score").and_then(|s| s.as_f64()) {
132 scores.entry(metric_name.clone()).or_default().push(score);
133 }
134 }
135 }
136 }
137 } else {
138 if let Some(obj) = r.details.get("metrics").and_then(|m| m.as_object()) {
140 for (metric_name, mv) in obj {
141 if let Some(score) = mv.get("score").and_then(|s| s.as_f64()) {
142 scores.entry(metric_name.clone()).or_default().push(score);
143 }
144 }
145 }
146 }
147 }
148
149 let skipped_count = rows.iter().filter(|r| r.skip_reason.is_some()).count();
157 let rates = TestOutcomeRates {
158 pass: (*counts.get(&TestStatus::Pass).unwrap_or(&0) as f64) / n as f64,
159 fail: (*counts.get(&TestStatus::Fail).unwrap_or(&0) as f64) / n as f64,
160 warn: (*counts.get(&TestStatus::Warn).unwrap_or(&0) as f64) / n as f64,
161 flaky: (*counts.get(&TestStatus::Flaky).unwrap_or(&0) as f64) / n as f64,
162 unstable: 0.0, skipped: skipped_count as f64 / n as f64,
164 };
165
166 let mut score_stats = HashMap::new();
168 for (metric, mut vals) in scores {
169 vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
170 let sn = vals.len() as f64;
171 if sn == 0.0 {
172 continue;
173 }
174
175 let sum: f64 = vals.iter().sum();
176 let mean = sum / sn;
177 let variance = vals.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / sn;
178 let std = variance.sqrt();
179
180 let p = |q: f64| {
182 let idx = ((q * (sn - 1.0)).floor() as usize).min(vals.len() - 1);
183 vals[idx]
184 };
185
186 score_stats.insert(
187 metric,
188 MetricStats {
189 p10: p(0.10),
190 p50: p(0.50),
191 p90: p(0.90),
192 std,
193 },
194 );
195 }
196
197 let mut top_reasons: Vec<TopReason> = reasons
199 .into_iter()
200 .map(|((kind, value), count)| TopReason { kind, value, count })
201 .collect();
202 top_reasons.sort_by_key(|reason| Reverse(reason.count));
203 top_reasons.truncate(5);
204
205 let mut actions = Vec::new();
207 if rates.skipped > 0.4 {
208 actions.push(
209 "High skip rate: Check for fingerprint drift or over-aggressive caching"
210 .to_string(),
211 );
212 }
213 if rates.flaky > 0.1 {
214 actions.push(
215 "Flaky: Consider increasing retries or stabilizing the environment".to_string(),
216 );
217 }
218 if rates.fail > 0.2 {
219 actions.push("High failure rate: Investigate top reasons".to_string());
220 }
221 for (m, stats) in &score_stats {
223 if stats.p10 < 0.6 {
224 actions.push(format!(
226 "Low {} scores (P10 < 0.6): Consider tuning min_score or improving prompts",
227 m
228 ));
229 }
230 }
231
232 tests.push(TestHygiene {
233 test_id,
234 n,
235 rates,
236 scores: score_stats,
237 top_reasons,
238 suggested_actions: actions,
239 });
240 }
241
242 tests.sort_by(|a, b| {
244 b.rates
245 .fail
246 .partial_cmp(&a.rates.fail)
247 .unwrap_or(std::cmp::Ordering::Equal)
248 });
249
250 if tests.iter().any(|t| t.rates.skipped > 0.5) {
252 notes.push("High skip rate (>50%) detected in some tests. Check for over-aggressive fingerprinting.".to_string());
253 }
254
255 Ok(HygieneReport {
256 schema_version: 1,
257 suite: suite.to_string(),
258 source: "eval.db".to_string(),
259 score_source: "all_attempts".to_string(),
260 generated_at: chrono::Utc::now().to_rfc3339(),
261 window: ReportWindow { last_runs },
262 tests,
263 notes,
264 })
265}