1pub mod analyzers;
2pub mod model;
3
4use chrono::Utc;
5use std::collections::HashMap;
6use std::io::BufRead;
7use std::path::{Path, PathBuf};
8
9use crate::config::path_resolver::PathResolver;
10use crate::errors::diagnostic::{codes, Diagnostic};
11use crate::model::{EvalConfig, Expected, Policy};
12use crate::validate::{validate, ValidateOptions};
13
14use model::*;
15
16#[derive(Debug, Clone)]
17pub struct DoctorOptions {
18 pub config_path: PathBuf,
19 pub trace_file: Option<PathBuf>,
20 pub baseline_file: Option<PathBuf>,
21 pub db_path: Option<PathBuf>,
22 pub replay_strict: bool,
23}
24
25pub async fn doctor(
26 cfg: &EvalConfig,
27 opts: &DoctorOptions,
28 resolver: &PathResolver,
29) -> anyhow::Result<DoctorReport> {
30 let mut notes = vec![];
31 let mut diagnostics: Vec<Diagnostic> = vec![];
32
33 let vopts = ValidateOptions {
34 trace_file: opts.trace_file.clone(),
35 baseline_file: opts.baseline_file.clone(),
36 replay_strict: opts.replay_strict,
37 };
38 let vreport = validate(cfg, &vopts, resolver).await?;
39 diagnostics.extend(vreport.diagnostics);
40
41 let mut loaded_policies = HashMap::new();
42
43 let unknown_field_re = regex::Regex::new(r"unknown field `([^`]+)`, expected one of (.*)")
44 .expect("Invalid regex for unknown field parsing");
45
46 for test in &cfg.tests {
47 if let Some(path) = test.expected.get_policy_path() {
48 let mut p_str = path.to_string();
49 resolver.resolve_str(&mut p_str);
50 let pb = PathBuf::from(p_str);
51 if pb.exists() {
52 match Policy::load(&pb) {
53 Ok(p) => {
54 loaded_policies.insert(path.to_string(), p);
55 }
56 Err(e) => {
57 let msg = e.to_string();
58 let mut diag = Diagnostic::new(
59 codes::E_CFG_PARSE,
60 format!("Failed to parse policy '{}': {}", path, msg),
61 )
62 .with_source("doctor.policy_load")
63 .with_context(serde_json::json!({ "path": pb, "error": msg }));
64
65 if let Some(caps) = unknown_field_re.captures(&msg) {
66 let unknown = &caps[1];
67 let expected_str = &caps[2];
68 let candidates: Vec<String> = expected_str
70 .split(',')
71 .map(|s| s.trim().trim_matches('`').to_string())
72 .collect();
73
74 if let Some(hint) = crate::errors::similarity::closest_prompt(
75 unknown,
76 candidates.iter(),
77 ) {
78 diag = diag.with_fix_step(format!(
79 "Replace `{}` with `{}`",
80 unknown, hint.prompt
81 ));
82 }
83 }
84 diagnostics.push(diag);
85 }
86 }
87 }
88 }
89 }
90
91 analyzers::config::analyze_config_integrity(cfg, resolver, &mut diagnostics);
92 analyzers::policy::analyze_policy_usage(cfg, &loaded_policies, &mut diagnostics);
93
94 if let Some(p) = &opts.trace_file {
95 analyzers::trace::analyze_trace_schema(p, &mut diagnostics);
96 }
97
98 let config_summary = Some(summarize_config(cfg));
99
100 let trace_summary = match &opts.trace_file {
101 Some(p) => summarize_trace(p, cfg, &mut diagnostics).ok(),
102 None => None,
103 };
104
105 let baseline_summary = match &opts.baseline_file {
106 Some(p) => summarize_baseline(p, &mut diagnostics).ok(),
107 None => None,
108 };
109
110 let db_summary = match &opts.db_path {
111 Some(p) => summarize_db(p, &mut diagnostics).ok(),
112 None => None,
113 };
114
115 let caches = summarize_caches(&mut notes);
116
117 let suggested_actions = suggest_from(&diagnostics, cfg, &trace_summary, &baseline_summary);
118
119 Ok(DoctorReport {
120 schema_version: 1,
121 generated_at: Utc::now().to_rfc3339(),
122 assay_version: env!("CARGO_PKG_VERSION").to_string(),
123 platform: PlatformInfo {
124 os: std::env::consts::OS.to_string(),
125 arch: std::env::consts::ARCH.to_string(),
126 },
127 inputs: DoctorInputs {
128 config_path: opts.config_path.display().to_string(),
129 trace_file: opts.trace_file.as_ref().map(|p| p.display().to_string()),
130 baseline_file: opts.baseline_file.as_ref().map(|p| p.display().to_string()),
131 db_path: opts.db_path.as_ref().map(|p| p.display().to_string()),
132 replay_strict: opts.replay_strict,
133 },
134 config: config_summary,
135 trace: trace_summary,
136 baseline: baseline_summary,
137 db: db_summary,
138 caches,
139 diagnostics,
140 suggested_actions,
141 notes,
142 })
143}
144
145fn summarize_config(cfg: &EvalConfig) -> ConfigSummary {
151 use std::collections::BTreeMap;
152 let mut metric_counts: BTreeMap<String, u32> = BTreeMap::new();
153
154 for tc in &cfg.tests {
155 let key = match &tc.expected {
156 Expected::MustContain { .. } => "must_contain",
157 Expected::MustNotContain { .. } => "must_not_contain",
158 Expected::RegexMatch { .. } => "regex_match",
159 Expected::RegexNotMatch { .. } => "regex_not_match",
160 Expected::JsonSchema { .. } => "json_schema",
161 Expected::SemanticSimilarityTo { .. } => "semantic_similarity_to",
162 Expected::Faithfulness { .. } => "faithfulness",
163 Expected::Relevance { .. } => "relevance",
164 Expected::JudgeCriteria { .. } => "judge_criteria",
165 Expected::ArgsValid { .. } => "args_valid",
166 Expected::SequenceValid { .. } => "sequence_valid",
167 Expected::ToolBlocklist { .. } => "tool_blocklist",
168 Expected::Reference { .. } => "reference",
169 }
170 .to_string();
171
172 *metric_counts.entry(key).or_insert(0) += 1;
173 }
174
175 let (mode, max_drop, min_floor) = cfg
176 .settings
177 .thresholding
178 .as_ref()
179 .map(|t| (t.mode.clone(), t.max_drop, t.min_floor))
180 .unwrap_or((None, None, None));
181
182 ConfigSummary {
183 suite: cfg.suite.clone(),
184 model: cfg.model.clone(),
185 test_count: cfg.tests.len() as u32,
186 metric_counts,
187 thresholding_mode: mode,
188 max_drop,
189 min_floor,
190 }
191}
192
193fn summarize_trace(
194 path: &Path,
195 _cfg: &EvalConfig,
196 _diags: &mut Vec<Diagnostic>,
197) -> anyhow::Result<TraceSummary> {
198 let md = std::fs::metadata(path).ok();
200 let approx_size_bytes = md.map(|m| m.len());
201
202 let f = std::fs::File::open(path)?;
203 let rdr = std::io::BufReader::new(f);
204
205 let mut entries: u64 = 0;
206 let mut first_schema: Option<u32> = None;
207 let mut has_assay_meta = false;
208
209 let mut has_embeddings = false;
211 let mut has_judge_faithfulness = false;
212 let mut has_judge_relevance = false;
213
214 for (i, line) in rdr.lines().enumerate() {
215 let line = line?;
216 if line.trim().is_empty() {
217 continue;
218 }
219 entries += 1;
221
222 if i == 0 {
223 if let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) {
224 first_schema = v
225 .get("schema_version")
226 .and_then(|x| x.as_u64())
227 .map(|x| x as u32);
228 if v.get("meta").and_then(|m| m.get("assay")).is_some() {
229 has_assay_meta = true;
230 }
231 }
232 }
233
234 if i < 200 {
236 if let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) {
237 if let Some(meta) = v.get("meta").and_then(|m| m.get("assay")) {
238 if meta.pointer("/embeddings").is_some() {
239 has_embeddings = true;
240 }
241 if meta.pointer("/judge/faithfulness").is_some() {
242 has_judge_faithfulness = true;
243 }
244 if meta.pointer("/judge/relevance").is_some() {
245 has_judge_relevance = true;
246 }
247 }
248 }
249 } else if has_embeddings && has_judge_faithfulness && has_judge_relevance {
250 }
254 }
255
256 Ok(TraceSummary {
257 path: path.display().to_string(),
258 entries,
259 schema_version: first_schema,
260 has_assay_meta,
261 coverage: TraceCoverage {
262 has_embeddings,
263 has_judge_faithfulness,
264 has_judge_relevance,
265 },
266 approx_size_bytes,
267 })
268}
269
270fn summarize_baseline(
271 path: &Path,
272 _diags: &mut Vec<Diagnostic>,
273) -> anyhow::Result<BaselineSummary> {
274 let b = crate::baseline::Baseline::load(path)?;
275 Ok(BaselineSummary {
276 path: path.display().to_string(),
277 suite: b.suite.clone(),
278 schema_version: b.schema_version,
279 assay_version: Some(b.assay_version.clone()),
280 entry_count: b.entries.len() as u32,
281 })
282}
283
284fn summarize_db(path: &Path, _diags: &mut Vec<Diagnostic>) -> anyhow::Result<DbSummary> {
285 let size_bytes = std::fs::metadata(path).ok().map(|m| m.len());
286 let store = crate::storage::store::Store::open(path)?;
287 store.init_schema()?; let stats = store
291 .stats_best_effort()
292 .unwrap_or(crate::storage::store::StoreStats {
293 runs: None,
294 results: None,
295 last_run_id: None,
296 last_run_at: None,
297 version: None,
298 });
299
300 Ok(DbSummary {
301 path: path.display().to_string(),
302 size_bytes,
303 runs: stats.runs,
304 results: stats.results,
305 last_run_id: stats.last_run_id,
306 last_run_started_at: stats.last_run_at,
307 })
308}
309
310fn summarize_caches(notes: &mut Vec<String>) -> CacheSummary {
311 let home = std::env::var("HOME").ok();
313 if home.is_none() {
314 notes.push("HOME not set; cannot inspect ~/.assay caches".to_string());
315 return CacheSummary::default();
316 }
317 let home = home.unwrap();
318 let cache_dir = format!("{}/.assay/cache", home);
319 let emb_dir = format!("{}/.assay/embeddings", home);
320
321 CacheSummary {
322 assay_cache_dir: Some(cache_dir.clone()),
323 assay_embeddings_dir: Some(emb_dir.clone()),
324 cache_size_bytes: dir_size_bytes(&cache_dir).ok(),
325 embeddings_size_bytes: dir_size_bytes(&emb_dir).ok(),
326 }
327}
328
329fn dir_size_bytes(p: &str) -> anyhow::Result<u64> {
331 let mut total = 0u64;
332 let path = std::path::Path::new(p);
333 if !path.exists() {
334 return Ok(0);
335 }
336
337 if path.is_file() {
338 return Ok(path.metadata()?.len());
339 }
340
341 let entries = std::fs::read_dir(path)?;
342 for entry in entries {
343 let entry = entry?;
344 let ft = entry.file_type()?;
345 if ft.is_file() {
346 total += entry.metadata()?.len();
347 } else if ft.is_dir() {
348 if !ft.is_symlink() {
352 total += dir_size_bytes(entry.path().to_str().unwrap_or(""))?;
353 }
354 }
355 }
356 Ok(total)
357}
358
359fn suggest_from(
360 diags: &[Diagnostic],
361 _cfg: &EvalConfig,
362 trace: &Option<TraceSummary>,
363 _baseline: &Option<BaselineSummary>,
364) -> Vec<SuggestedAction> {
365 let mut out = vec![];
366
367 if diags.iter().any(|d| d.code == codes::E_TRACE_MISS) {
368 out.push(SuggestedAction {
369 title: "Fix trace miss (prompt drift)".into(),
370 relates_to: "failure_mode_1_trace_miss".into(),
371 why: "Config prompts must match trace prompts exactly in replay/offline modes.".into(),
372 steps: vec![
373 "Run: assay trace verify --trace <trace.jsonl> --config <eval.yaml>".into(),
374 "If prompts changed intentionally: re-ingest + precompute.".into(),
375 ],
376 });
377 }
378
379 if diags
380 .iter()
381 .any(|d| d.code == codes::E_REPLAY_STRICT_MISSING)
382 {
383 out.push(SuggestedAction {
384 title: "Make trace strict-replay ready".into(),
385 relates_to: "failure_mode_??_strict_replay_missing".into(),
386 why: "In --replay-strict, missing embeddings/judge meta is a hard setup error.".into(),
387 steps: vec![
388 "Run: assay trace precompute-embeddings --trace <trace.jsonl> --output <trace_enriched.jsonl> ...".into(),
389 "Run: assay trace precompute-judge --trace <trace_enriched.jsonl> --output <trace_enriched.jsonl> ...".into(),
390 ],
391 });
392 }
393
394 if diags.iter().any(|d| d.code == codes::E_BASE_MISMATCH) {
395 out.push(SuggestedAction {
396 title: "Regenerate or select correct baseline".into(),
397 relates_to: "failure_mode_3_schema_version_drift".into(),
398 why: "Baseline suite/schema must match config suite/schema.".into(),
399 steps: vec![
400 "Export on main: assay ci --config <eval.yaml> --trace-file <main.jsonl> --export-baseline baseline.json".into(),
401 "Gate PR: assay ci --baseline baseline.json".into(),
402 ],
403 });
404 }
405
406 if let Some(t) = trace {
408 if t.entries > 50_000 {
409 out.push(SuggestedAction {
410 title: "Speed up CI for large traces".into(),
411 relates_to: "failure_mode_9_large_trace_performance".into(),
412 why: "Large trace files increase parse time; CI should use a smaller slice + incremental.".into(),
413 steps: vec![
414 "Use a CI slice trace (e.g. top 1k).".into(),
415 "Enable incremental: assay ci --incremental".into(),
416 "Use precompute + --replay-strict for offline CI.".into(),
417 ],
418 });
419 }
420 }
421
422 out
423}