1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4#[derive(Debug, Serialize, Deserialize)]
5pub struct HygieneReport {
6 pub schema_version: u32,
7 pub suite: String,
8 pub source: String,
9 pub score_source: String, pub generated_at: String,
11 pub window: ReportWindow,
12 pub tests: Vec<TestHygiene>,
13 pub notes: Vec<String>,
14}
15
16#[derive(Debug, Serialize, Deserialize)]
17pub struct ReportWindow {
18 pub last_runs: u32,
19}
20
21#[derive(Debug, Serialize, Deserialize)]
22pub struct TestHygiene {
23 pub test_id: String,
24 pub n: u32,
25 pub rates: TestOutcomeRates,
26 pub scores: HashMap<String, MetricStats>,
27 pub top_reasons: Vec<TopReason>,
28 #[serde(default, skip_serializing_if = "Vec::is_empty")]
29 pub suggested_actions: Vec<String>,
30}
31
32#[derive(Debug, Serialize, Deserialize)]
33pub struct TestOutcomeRates {
34 pub pass: f64,
35 pub fail: f64,
36 pub warn: f64,
37 pub flaky: f64,
38 pub unstable: f64,
39 pub skipped: f64,
40}
41
42#[derive(Debug, Serialize, Deserialize)]
43pub struct MetricStats {
44 pub p10: f64,
45 pub p50: f64,
46 pub p90: f64,
47 pub std: f64,
48}
49
50#[derive(Debug, Serialize, Deserialize)]
51pub struct TopReason {
52 pub kind: String, pub value: String,
54 pub count: u32,
55}
56
57use crate::model::{TestResultRow, TestStatus};
58use crate::storage::Store;
59
60pub fn report_from_db(store: &Store, suite: &str, last_runs: u32) -> anyhow::Result<HygieneReport> {
61 let results = store.fetch_results_for_last_n_runs(suite, last_runs)?;
62
63 let mut test_groups: HashMap<String, Vec<&TestResultRow>> = HashMap::new();
65 for r in &results {
66 test_groups.entry(r.test_id.clone()).or_default().push(r);
67 }
68
69 let mut tests = Vec::new();
70 let mut notes = Vec::new();
71
72 for (test_id, rows) in test_groups {
73 let n = rows.len() as u32;
74 let mut counts = HashMap::new();
75 let mut reasons = HashMap::new(); let mut scores: HashMap<String, Vec<f64>> = HashMap::new();
77
78 for r in &rows {
79 *counts.entry(r.status.clone()).or_insert(0) += 1;
80
81 if let Some(reason) = &r.skip_reason {
83 *reasons
84 .entry(("skip_reason".to_string(), reason.clone()))
85 .or_insert(0) += 1;
86 } else if r.status == TestStatus::Fail || r.status == TestStatus::Error {
87 let msg = if r.message.is_empty() {
89 "Undeclared failure".to_string()
90 } else {
91 r.message.clone()
92 };
93 *reasons.entry(("failure".to_string(), msg)).or_insert(0) += 1;
94 }
95
96 if let Some(obj) = r.details.get("metrics").and_then(|m| m.as_object()) {
99 for (metric_name, mv) in obj {
100 if let Some(reason) = mv.get("reason").and_then(|s| s.as_str()) {
102 let key = format!("{}: {}", metric_name, reason);
103 *reasons
104 .entry(("metric_reason".to_string(), key))
105 .or_insert(0) += 1;
106 }
107 }
108 }
109
110 if let Some(attempts) = &r.attempts {
112 if !attempts.is_empty() {
113 for attempt in attempts {
114 if let Some(obj) =
115 attempt.details.get("metrics").and_then(|m| m.as_object())
116 {
117 for (metric_name, mv) in obj {
118 if let Some(score) = mv.get("score").and_then(|s| s.as_f64()) {
119 scores.entry(metric_name.clone()).or_default().push(score);
120 }
121 }
122 }
123 }
124 } else {
125 if let Some(obj) = r.details.get("metrics").and_then(|m| m.as_object()) {
129 for (metric_name, mv) in obj {
130 if let Some(score) = mv.get("score").and_then(|s| s.as_f64()) {
131 scores.entry(metric_name.clone()).or_default().push(score);
132 }
133 }
134 }
135 }
136 } else {
137 if let Some(obj) = r.details.get("metrics").and_then(|m| m.as_object()) {
139 for (metric_name, mv) in obj {
140 if let Some(score) = mv.get("score").and_then(|s| s.as_f64()) {
141 scores.entry(metric_name.clone()).or_default().push(score);
142 }
143 }
144 }
145 }
146 }
147
148 let skipped_count = rows.iter().filter(|r| r.skip_reason.is_some()).count();
156 let rates = TestOutcomeRates {
157 pass: (*counts.get(&TestStatus::Pass).unwrap_or(&0) as f64) / n as f64,
158 fail: (*counts.get(&TestStatus::Fail).unwrap_or(&0) as f64) / n as f64,
159 warn: (*counts.get(&TestStatus::Warn).unwrap_or(&0) as f64) / n as f64,
160 flaky: (*counts.get(&TestStatus::Flaky).unwrap_or(&0) as f64) / n as f64,
161 unstable: 0.0, skipped: skipped_count as f64 / n as f64,
163 };
164
165 let mut score_stats = HashMap::new();
167 for (metric, mut vals) in scores {
168 vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
169 let sn = vals.len() as f64;
170 if sn == 0.0 {
171 continue;
172 }
173
174 let sum: f64 = vals.iter().sum();
175 let mean = sum / sn;
176 let variance = vals.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / sn;
177 let std = variance.sqrt();
178
179 let p = |q: f64| {
181 let idx = ((q * (sn - 1.0)).floor() as usize).min(vals.len() - 1);
182 vals[idx]
183 };
184
185 score_stats.insert(
186 metric,
187 MetricStats {
188 p10: p(0.10),
189 p50: p(0.50),
190 p90: p(0.90),
191 std,
192 },
193 );
194 }
195
196 let mut top_reasons: Vec<TopReason> = reasons
198 .into_iter()
199 .map(|((kind, value), count)| TopReason { kind, value, count })
200 .collect();
201 top_reasons.sort_by(|a, b| b.count.cmp(&a.count));
202 top_reasons.truncate(5);
203
204 let mut actions = Vec::new();
206 if rates.skipped > 0.4 {
207 actions.push(
208 "High skip rate: Check for fingerprint drift or over-aggressive caching"
209 .to_string(),
210 );
211 }
212 if rates.flaky > 0.1 {
213 actions.push(
214 "Flaky: Consider increasing retries or stabilizing the environment".to_string(),
215 );
216 }
217 if rates.fail > 0.2 {
218 actions.push("High failure rate: Investigate top reasons".to_string());
219 }
220 for (m, stats) in &score_stats {
222 if stats.p10 < 0.6 {
223 actions.push(format!(
225 "Low {} scores (P10 < 0.6): Consider tuning min_score or improving prompts",
226 m
227 ));
228 }
229 }
230
231 tests.push(TestHygiene {
232 test_id,
233 n,
234 rates,
235 scores: score_stats,
236 top_reasons,
237 suggested_actions: actions,
238 });
239 }
240
241 tests.sort_by(|a, b| {
243 b.rates
244 .fail
245 .partial_cmp(&a.rates.fail)
246 .unwrap_or(std::cmp::Ordering::Equal)
247 });
248
249 if tests.iter().any(|t| t.rates.skipped > 0.5) {
251 notes.push("High skip rate (>50%) detected in some tests. Check for over-aggressive fingerprinting.".to_string());
252 }
253
254 Ok(HygieneReport {
255 schema_version: 1,
256 suite: suite.to_string(),
257 source: "eval.db".to_string(),
258 score_source: "all_attempts".to_string(),
259 generated_at: chrono::Utc::now().to_rfc3339(),
260 window: ReportWindow { last_runs },
261 tests,
262 notes,
263 })
264}