assay_core/doctor/
mod.rs

1pub mod analyzers;
2pub mod model;
3
4use chrono::Utc;
5use std::collections::HashMap;
6use std::io::BufRead;
7use std::path::{Path, PathBuf};
8
9use crate::config::path_resolver::PathResolver;
10use crate::errors::diagnostic::{codes, Diagnostic};
11use crate::model::{EvalConfig, Expected, Policy};
12use crate::validate::{validate, ValidateOptions};
13
14use model::*;
15
16#[derive(Debug, Clone)]
17pub struct DoctorOptions {
18    pub config_path: PathBuf,
19    pub trace_file: Option<PathBuf>,
20    pub baseline_file: Option<PathBuf>,
21    pub db_path: Option<PathBuf>,
22    pub replay_strict: bool,
23}
24
25pub async fn doctor(
26    cfg: &EvalConfig,
27    opts: &DoctorOptions,
28    resolver: &PathResolver,
29) -> anyhow::Result<DoctorReport> {
30    let mut notes = vec![];
31    let mut diagnostics: Vec<Diagnostic> = vec![];
32
33    let vopts = ValidateOptions {
34        trace_file: opts.trace_file.clone(),
35        baseline_file: opts.baseline_file.clone(),
36        replay_strict: opts.replay_strict,
37    };
38    let vreport = validate(cfg, &vopts, resolver).await?;
39    diagnostics.extend(vreport.diagnostics);
40
41    let mut loaded_policies = HashMap::new();
42
43    let unknown_field_re = regex::Regex::new(r"unknown field `([^`]+)`, expected one of (.*)")
44        .expect("Invalid regex for unknown field parsing");
45
46    for test in &cfg.tests {
47        if let Some(path) = test.expected.get_policy_path() {
48            let mut p_str = path.to_string();
49            resolver.resolve_str(&mut p_str);
50            let pb = PathBuf::from(p_str);
51            if pb.exists() {
52                match Policy::load(&pb) {
53                    Ok(p) => {
54                        loaded_policies.insert(path.to_string(), p);
55                    }
56                    Err(e) => {
57                        let msg = e.to_string();
58                        let mut diag = Diagnostic::new(
59                            codes::E_CFG_PARSE,
60                            format!("Failed to parse policy '{}': {}", path, msg),
61                        )
62                        .with_source("doctor.policy_load")
63                        .with_context(serde_json::json!({ "path": pb, "error": msg }));
64
65                        if let Some(caps) = unknown_field_re.captures(&msg) {
66                            let unknown = &caps[1];
67                            let expected_str = &caps[2];
68                            // expected_str usually looks like "`a`, `b`, `c`"
69                            let candidates: Vec<String> = expected_str
70                                .split(',')
71                                .map(|s| s.trim().trim_matches('`').to_string())
72                                .collect();
73
74                            if let Some(hint) = crate::errors::similarity::closest_prompt(
75                                unknown,
76                                candidates.iter(),
77                            ) {
78                                diag = diag.with_fix_step(format!(
79                                    "Replace `{}` with `{}`",
80                                    unknown, hint.prompt
81                                ));
82                            }
83                        }
84                        diagnostics.push(diag);
85                    }
86                }
87            }
88        }
89    }
90
91    analyzers::config::analyze_config_integrity(cfg, resolver, &mut diagnostics);
92    analyzers::policy::analyze_policy_usage(cfg, &loaded_policies, &mut diagnostics);
93
94    if let Some(p) = &opts.trace_file {
95        analyzers::trace::analyze_trace_schema(p, &mut diagnostics);
96    }
97
98    let config_summary = Some(summarize_config(cfg));
99
100    let trace_summary = match &opts.trace_file {
101        Some(p) => summarize_trace(p, cfg, &mut diagnostics).ok(),
102        None => None,
103    };
104
105    let baseline_summary = match &opts.baseline_file {
106        Some(p) => summarize_baseline(p, &mut diagnostics).ok(),
107        None => None,
108    };
109
110    let db_summary = match &opts.db_path {
111        Some(p) => summarize_db(p, &mut diagnostics).ok(),
112        None => None,
113    };
114
115    let caches = summarize_caches(&mut notes);
116
117    let suggested_actions = suggest_from(&diagnostics, cfg, &trace_summary, &baseline_summary);
118
119    Ok(DoctorReport {
120        schema_version: 1,
121        generated_at: Utc::now().to_rfc3339(),
122        assay_version: env!("CARGO_PKG_VERSION").to_string(),
123        platform: PlatformInfo {
124            os: std::env::consts::OS.to_string(),
125            arch: std::env::consts::ARCH.to_string(),
126        },
127        inputs: DoctorInputs {
128            config_path: opts.config_path.display().to_string(),
129            trace_file: opts.trace_file.as_ref().map(|p| p.display().to_string()),
130            baseline_file: opts.baseline_file.as_ref().map(|p| p.display().to_string()),
131            db_path: opts.db_path.as_ref().map(|p| p.display().to_string()),
132            replay_strict: opts.replay_strict,
133        },
134        config: config_summary,
135        trace: trace_summary,
136        baseline: baseline_summary,
137        db: db_summary,
138        caches,
139        diagnostics,
140        suggested_actions,
141        notes,
142    })
143}
144
145// ... include existing summarize helpers ...
146// (Omitting full copy-paste of helpers to keep context small, assuming I can append them or they are preserved if I use smart edit, verify?)
147// Since I'm replacing the whole file content essentially (or a large chunk), I need to be careful.
148// I will use `replace_file_content` targeting the top section effectively.
149
150fn summarize_config(cfg: &EvalConfig) -> ConfigSummary {
151    use std::collections::BTreeMap;
152    let mut metric_counts: BTreeMap<String, u32> = BTreeMap::new();
153
154    for tc in &cfg.tests {
155        let key = match &tc.expected {
156            Expected::MustContain { .. } => "must_contain",
157            Expected::MustNotContain { .. } => "must_not_contain",
158            Expected::RegexMatch { .. } => "regex_match",
159            Expected::RegexNotMatch { .. } => "regex_not_match",
160            Expected::JsonSchema { .. } => "json_schema",
161            Expected::SemanticSimilarityTo { .. } => "semantic_similarity_to",
162            Expected::Faithfulness { .. } => "faithfulness",
163            Expected::Relevance { .. } => "relevance",
164            Expected::JudgeCriteria { .. } => "judge_criteria",
165            Expected::ArgsValid { .. } => "args_valid",
166            Expected::SequenceValid { .. } => "sequence_valid",
167            Expected::ToolBlocklist { .. } => "tool_blocklist",
168            Expected::Reference { .. } => "reference",
169        }
170        .to_string();
171
172        *metric_counts.entry(key).or_insert(0) += 1;
173    }
174
175    let (mode, max_drop, min_floor) = cfg
176        .settings
177        .thresholding
178        .as_ref()
179        .map(|t| (t.mode.clone(), t.max_drop, t.min_floor))
180        .unwrap_or((None, None, None));
181
182    ConfigSummary {
183        suite: cfg.suite.clone(),
184        model: cfg.model.clone(),
185        test_count: cfg.tests.len() as u32,
186        metric_counts,
187        thresholding_mode: mode,
188        max_drop,
189        min_floor,
190    }
191}
192
193fn summarize_trace(
194    path: &Path,
195    _cfg: &EvalConfig,
196    _diags: &mut Vec<Diagnostic>,
197) -> anyhow::Result<TraceSummary> {
198    // Keep it cheap: count lines, peek first line for schema_version/meta shape.
199    let md = std::fs::metadata(path).ok();
200    let approx_size_bytes = md.map(|m| m.len());
201
202    let f = std::fs::File::open(path)?;
203    let rdr = std::io::BufReader::new(f);
204
205    let mut entries: u64 = 0;
206    let mut first_schema: Option<u32> = None;
207    let mut has_assay_meta = false;
208
209    // Coverage: best-effort scan until N lines (avoid huge files)
210    let mut has_embeddings = false;
211    let mut has_judge_faithfulness = false;
212    let mut has_judge_relevance = false;
213
214    for (i, line) in rdr.lines().enumerate() {
215        let line = line?;
216        if line.trim().is_empty() {
217            continue;
218        }
219        // Attempt to ignore non-JSON lines if possible, but assume JSONL
220        entries += 1;
221
222        if i == 0 {
223            if let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) {
224                first_schema = v
225                    .get("schema_version")
226                    .and_then(|x| x.as_u64())
227                    .map(|x| x as u32);
228                if v.get("meta").and_then(|m| m.get("assay")).is_some() {
229                    has_assay_meta = true;
230                }
231            }
232        }
233
234        // scan first 200 entries for assay meta coverage
235        if i < 200 {
236            if let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) {
237                if let Some(meta) = v.get("meta").and_then(|m| m.get("assay")) {
238                    if meta.pointer("/embeddings").is_some() {
239                        has_embeddings = true;
240                    }
241                    if meta.pointer("/judge/faithfulness").is_some() {
242                        has_judge_faithfulness = true;
243                    }
244                    if meta.pointer("/judge/relevance").is_some() {
245                        has_judge_relevance = true;
246                    }
247                }
248            }
249        } else if has_embeddings && has_judge_faithfulness && has_judge_relevance {
250            // Found everything, mostly likely. But we want to count entries completely?
251            // If file is huge, counting lines might be slow. But usually fast enough.
252            // Let's iterate all to count.
253        }
254    }
255
256    Ok(TraceSummary {
257        path: path.display().to_string(),
258        entries,
259        schema_version: first_schema,
260        has_assay_meta,
261        coverage: TraceCoverage {
262            has_embeddings,
263            has_judge_faithfulness,
264            has_judge_relevance,
265        },
266        approx_size_bytes,
267    })
268}
269
270fn summarize_baseline(
271    path: &Path,
272    _diags: &mut Vec<Diagnostic>,
273) -> anyhow::Result<BaselineSummary> {
274    let b = crate::baseline::Baseline::load(path)?;
275    Ok(BaselineSummary {
276        path: path.display().to_string(),
277        suite: b.suite.clone(),
278        schema_version: b.schema_version,
279        assay_version: Some(b.assay_version.clone()),
280        entry_count: b.entries.len() as u32,
281    })
282}
283
284fn summarize_db(path: &Path, _diags: &mut Vec<Diagnostic>) -> anyhow::Result<DbSummary> {
285    let size_bytes = std::fs::metadata(path).ok().map(|m| m.len());
286    let store = crate::storage::store::Store::open(path)?;
287    store.init_schema()?; // ensure migrations
288
289    // These queries are intentionally light
290    let stats = store
291        .stats_best_effort()
292        .unwrap_or(crate::storage::store::StoreStats {
293            runs: None,
294            results: None,
295            last_run_id: None,
296            last_run_at: None,
297            version: None,
298        });
299
300    Ok(DbSummary {
301        path: path.display().to_string(),
302        size_bytes,
303        runs: stats.runs,
304        results: stats.results,
305        last_run_id: stats.last_run_id,
306        last_run_started_at: stats.last_run_at,
307    })
308}
309
310fn summarize_caches(notes: &mut Vec<String>) -> CacheSummary {
311    // best effort: read HOME and check ~/.assay/*
312    let home = std::env::var("HOME").ok();
313    if home.is_none() {
314        notes.push("HOME not set; cannot inspect ~/.assay caches".to_string());
315        return CacheSummary::default();
316    }
317    let home = home.unwrap();
318    let cache_dir = format!("{}/.assay/cache", home);
319    let emb_dir = format!("{}/.assay/embeddings", home);
320
321    CacheSummary {
322        assay_cache_dir: Some(cache_dir.clone()),
323        assay_embeddings_dir: Some(emb_dir.clone()),
324        cache_size_bytes: dir_size_bytes(&cache_dir).ok(),
325        embeddings_size_bytes: dir_size_bytes(&emb_dir).ok(),
326    }
327}
328
329// Simple recursive directory size without external crates
330fn dir_size_bytes(p: &str) -> anyhow::Result<u64> {
331    let mut total = 0u64;
332    let path = std::path::Path::new(p);
333    if !path.exists() {
334        return Ok(0);
335    }
336
337    if path.is_file() {
338        return Ok(path.metadata()?.len());
339    }
340
341    let entries = std::fs::read_dir(path)?;
342    for entry in entries {
343        let entry = entry?;
344        let ft = entry.file_type()?;
345        if ft.is_file() {
346            total += entry.metadata()?.len();
347        } else if ft.is_dir() {
348            // Heuristic: limit recursion depth or just do 1 level?
349            // Standard recursion is fine for cache dirs (usually flat or few levels)
350            // But let's be careful about symlinks/cycles (ignore symlinks)
351            if !ft.is_symlink() {
352                total += dir_size_bytes(entry.path().to_str().unwrap_or(""))?;
353            }
354        }
355    }
356    Ok(total)
357}
358
359fn suggest_from(
360    diags: &[Diagnostic],
361    _cfg: &EvalConfig,
362    trace: &Option<TraceSummary>,
363    _baseline: &Option<BaselineSummary>,
364) -> Vec<SuggestedAction> {
365    let mut out = vec![];
366
367    if diags.iter().any(|d| d.code == codes::E_TRACE_MISS) {
368        out.push(SuggestedAction {
369            title: "Fix trace miss (prompt drift)".into(),
370            relates_to: "failure_mode_1_trace_miss".into(),
371            why: "Config prompts must match trace prompts exactly in replay/offline modes.".into(),
372            steps: vec![
373                "Run: assay trace verify --trace <trace.jsonl> --config <eval.yaml>".into(),
374                "If prompts changed intentionally: re-ingest + precompute.".into(),
375            ],
376        });
377    }
378
379    if diags
380        .iter()
381        .any(|d| d.code == codes::E_REPLAY_STRICT_MISSING)
382    {
383        out.push(SuggestedAction {
384            title: "Make trace strict-replay ready".into(),
385            relates_to: "failure_mode_??_strict_replay_missing".into(),
386            why: "In --replay-strict, missing embeddings/judge meta is a hard setup error.".into(),
387            steps: vec![
388                "Run: assay trace precompute-embeddings --trace <trace.jsonl> --output <trace_enriched.jsonl> ...".into(),
389                "Run: assay trace precompute-judge --trace <trace_enriched.jsonl> --output <trace_enriched.jsonl> ...".into(),
390            ],
391        });
392    }
393
394    if diags.iter().any(|d| d.code == codes::E_BASE_MISMATCH) {
395        out.push(SuggestedAction {
396            title: "Regenerate or select correct baseline".into(),
397            relates_to: "failure_mode_3_schema_version_drift".into(),
398            why: "Baseline suite/schema must match config suite/schema.".into(),
399            steps: vec![
400                "Export on main: assay ci --config <eval.yaml> --trace-file <main.jsonl> --export-baseline baseline.json".into(),
401                "Gate PR: assay ci --baseline baseline.json".into(),
402            ],
403        });
404    }
405
406    // Heuristic: large trace performance
407    if let Some(t) = trace {
408        if t.entries > 50_000 {
409            out.push(SuggestedAction {
410                title: "Speed up CI for large traces".into(),
411                relates_to: "failure_mode_9_large_trace_performance".into(),
412                why: "Large trace files increase parse time; CI should use a smaller slice + incremental.".into(),
413                steps: vec![
414                    "Use a CI slice trace (e.g. top 1k).".into(),
415                    "Enable incremental: assay ci --incremental".into(),
416                    "Use precompute + --replay-strict for offline CI.".into(),
417                ],
418            });
419        }
420    }
421
422    out
423}