use std::io::{self, IsTerminal};
use std::path::Path;
use anyhow::Result;
use clap::Args;
use serde::Serialize;
use super::check::{self, CheckItem, CheckStatus};
use super::daemon::{daemon_result, mati_root_for, DaemonResult};
#[derive(Args)]
pub struct DoctorArgs {
#[arg(long)]
pub json: bool,
#[arg(long)]
pub internal: bool,
}
pub async fn run(args: DoctorArgs) -> Result<()> {
let cwd = std::env::current_dir()?;
let root = mati_root_for(&cwd)?;
if args.internal {
return run_internal(&root, args.json).await;
}
let report = collect(&cwd, &root).await;
if args.json {
let json = serde_json::to_string_pretty(&report)?;
println!("{json}");
} else {
let use_color = io::stderr().is_terminal();
render_human(&report, use_color);
}
if report.summary.fail > 0 {
std::process::exit(1);
}
Ok(())
}
async fn run_internal(root: &Path, json: bool) -> Result<()> {
let resp = daemon_result(root, "metrics", serde_json::json!({})).await;
let data = match resp {
DaemonResult::Ok(envelope) => envelope
.get("data")
.cloned()
.unwrap_or(serde_json::Value::Null),
DaemonResult::NotRunning | DaemonResult::StaleSocket => {
eprintln!("daemon is not running — start it with `mati daemon start`");
std::process::exit(1);
}
DaemonResult::Unresponsive => {
eprintln!("daemon socket exists but is unresponsive");
eprintln!(" hint: mati daemon stop && mati daemon start");
std::process::exit(1);
}
};
if json {
println!("{}", serde_json::to_string_pretty(&data)?);
return Ok(());
}
render_internal_human(&data);
Ok(())
}
fn render_internal_human(data: &serde_json::Value) {
use comfy_table::{presets::UTF8_FULL_CONDENSED, Cell, ContentArrangement, Table};
if data.is_null() {
println!("(daemon has no metrics yet — none recorded since startup)");
return;
}
let uptime = data
.get("uptime_secs")
.and_then(|v| v.as_u64())
.unwrap_or(0);
let total = data
.get("total_calls")
.and_then(|v| v.as_u64())
.unwrap_or(0);
let errors = data
.get("total_errors")
.and_then(|v| v.as_u64())
.unwrap_or(0);
let err_pct = if total == 0 {
0.0
} else {
(errors as f64 / total as f64) * 100.0
};
println!("daemon metrics");
println!(" uptime {}", format_duration(uptime));
println!(" total calls {total} (errors: {errors}, {err_pct:.2}%)");
println!();
let Some(commands) = data.get("commands").and_then(|v| v.as_array()) else {
println!("(unknown metrics shape; raw payload:)");
if let Ok(pretty) = serde_json::to_string_pretty(data) {
println!("{pretty}");
}
return;
};
if commands.is_empty() {
println!("(no commands recorded since startup)");
return;
}
let mut table = Table::new();
table
.load_preset(UTF8_FULL_CONDENSED)
.set_content_arrangement(ContentArrangement::Dynamic)
.set_header(vec![
Cell::new("command"),
Cell::new("count"),
Cell::new("err%"),
Cell::new("mean"),
Cell::new("p50"),
Cell::new("p95"),
Cell::new("p99"),
Cell::new("max"),
]);
for cmd in commands {
let name = cmd.get("name").and_then(|v| v.as_str()).unwrap_or("?");
let count = cmd.get("count").and_then(|v| v.as_u64()).unwrap_or(0);
let errs = cmd.get("error_count").and_then(|v| v.as_u64()).unwrap_or(0);
let cmd_err_pct = if count == 0 {
0.0
} else {
(errs as f64 / count as f64) * 100.0
};
let mean = cmd.get("mean_us").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let p50 = cmd.get("p50_us").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let p95 = cmd.get("p95_us").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let p99 = cmd.get("p99_us").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let max = cmd.get("max_us").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
table.add_row(vec![
Cell::new(name),
Cell::new(count),
Cell::new(format!("{cmd_err_pct:.1}%")),
Cell::new(format_us(mean)),
Cell::new(format_us(p50)),
Cell::new(format_us(p95)),
Cell::new(format_us(p99)),
Cell::new(format_us(max)),
]);
}
println!("{table}");
}
fn format_us(us: u32) -> String {
if us < 1_000 {
format!("{us}µs")
} else if us < 1_000_000 {
format!("{:.1}ms", f64::from(us) / 1_000.0)
} else {
format!("{:.2}s", f64::from(us) / 1_000_000.0)
}
}
fn format_duration(secs: u64) -> String {
if secs < 60 {
format!("{secs}s")
} else if secs < 3600 {
format!("{}m {}s", secs / 60, secs % 60)
} else if secs < 86400 {
format!("{}h {}m", secs / 3600, (secs % 3600) / 60)
} else {
format!("{}d {}h", secs / 86400, (secs % 86400) / 3600)
}
}
#[derive(Serialize)]
struct Report {
version: u32,
root: String,
checks: Vec<CheckResult>,
lifecycle: Vec<LifecycleEntry>,
summary: Summary,
#[serde(skip_serializing_if = "Option::is_none")]
extraction: Option<mati_core::store::extraction::ExtractionStats>,
}
#[derive(Serialize)]
struct CheckResult {
section: &'static str,
name: &'static str,
status: Status,
detail: String,
#[serde(skip_serializing_if = "Option::is_none")]
fix: Option<&'static str>,
}
#[derive(Serialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
enum Status {
Pass,
Warn,
Fail,
Info,
}
#[derive(Serialize)]
struct LifecycleEntry {
ts: u64,
pid: u32,
event: String,
detail: String,
}
#[derive(Serialize, Default)]
struct Summary {
pass: u32,
warn: u32,
fail: u32,
info: u32,
}
async fn collect(cwd: &Path, root: &Path) -> Report {
let mut checks: Vec<CheckResult> = Vec::new();
checks.extend(enforcement_results(check::run_silent(cwd).await));
let daemon_state = daemon_result(root, "ping", serde_json::json!({})).await;
match &daemon_state {
DaemonResult::Ok(_) => checks.push(CheckResult {
section: "daemon",
name: "ping",
status: Status::Pass,
detail: "ok".into(),
fix: None,
}),
DaemonResult::Unresponsive => checks.push(CheckResult {
section: "daemon",
name: "ping",
status: Status::Fail,
detail: "socket exists but daemon is not responding".into(),
fix: Some("mati daemon stop && mati daemon start"),
}),
DaemonResult::StaleSocket => checks.push(CheckResult {
section: "daemon",
name: "ping",
status: Status::Warn,
detail: "stale socket detected and cleaned up".into(),
fix: None,
}),
DaemonResult::NotRunning => checks.push(CheckResult {
section: "daemon",
name: "ping",
status: Status::Info,
detail: "not running (OK if no agent session is active)".into(),
fix: None,
}),
}
match crate::cli::proxy::StoreProxy::open(cwd).await {
Ok(proxy) => {
if mati_core::store::repair::is_dirty(&proxy).await {
let detail = match mati_core::store::repair::read_dirty_marker(&proxy).await {
Some(m) => format!("{} key(s) flagged: {}", m.affected_keys.len(), m.cause),
None => "flagged but cause not readable".to_string(),
};
checks.push(CheckResult {
section: "integrity",
name: "dirty_marker",
status: Status::Warn,
detail,
fix: Some("mati repair --fast"),
});
} else {
checks.push(CheckResult {
section: "integrity",
name: "dirty_marker",
status: Status::Pass,
detail: "not set".into(),
fix: None,
});
}
match mati_core::store::repair::check_gotcha_indexes(&proxy).await {
Ok(report) => {
if report.has_drift() {
let detail = format!(
"missing_file={}, stale_file={}, missing_edge={}, stale_edge={}",
report.missing_file_links.len(),
report.stale_file_links.len(),
report.missing_edges.len(),
report.stale_edges.len(),
);
let fix = if !report.missing_file_links.is_empty() {
"mati init (re-index missing files), then mati repair"
} else {
"mati repair"
};
checks.push(CheckResult {
section: "integrity",
name: "drift",
status: Status::Fail,
detail,
fix: Some(fix),
});
} else {
checks.push(CheckResult {
section: "integrity",
name: "drift",
status: Status::Pass,
detail: "no drift detected".into(),
fix: None,
});
}
}
Err(e) => checks.push(CheckResult {
section: "integrity",
name: "drift",
status: Status::Fail,
detail: format!("check error: {e}"),
fix: None,
}),
}
let _ = proxy.close().await;
}
Err(_) => {
let no_store_detail =
"skipped — no store initialized for this directory (run `mati init` to set up)";
checks.push(CheckResult {
section: "integrity",
name: "dirty_marker",
status: Status::Info,
detail: no_store_detail.into(),
fix: Some("mati init"),
});
checks.push(CheckResult {
section: "integrity",
name: "drift",
status: Status::Info,
detail: no_store_detail.into(),
fix: Some("mati init"),
});
}
}
checks.push(collect_chain_check(cwd).await);
checks.push(collect_freshness_check(cwd).await);
checks.push(collect_network_attestation());
let lifecycle = read_lifecycle_tail(root, 5);
let extraction = collect_extraction_stats(cwd).await;
let mut summary = Summary::default();
for c in &checks {
match c.status {
Status::Pass => summary.pass += 1,
Status::Warn => summary.warn += 1,
Status::Fail => summary.fail += 1,
Status::Info => summary.info += 1,
}
}
Report {
version: 2,
root: root.display().to_string(),
checks,
lifecycle,
summary,
extraction,
}
}
fn collect_network_attestation() -> CheckResult {
if cfg!(feature = "semantic") {
CheckResult {
section: "attestation",
name: "network",
status: Status::Info,
detail: "semantic feature enabled — embedding-model fetch via hf-hub \
is linked (opt-in; downloads all-MiniLM-L6-v2 on first use). \
Enforcement decisions remain local and make no outbound \
connection."
.into(),
fix: None,
}
} else {
CheckResult {
section: "attestation",
name: "network",
status: Status::Pass,
detail: "no network/HTTP-client dependency linked in this build; the \
enforcement path makes no outbound connection (verified in \
CI: build-time dep-ban + runtime syscall audit). mati never \
phones home."
.into(),
fix: None,
}
}
}
fn enforcement_results(items: Vec<CheckItem>) -> Vec<CheckResult> {
items
.into_iter()
.filter_map(|item| {
let name: &'static str = match item.label {
"git repo" => "git_repo",
"mati in PATH" => "mati_on_path",
"awk float math" => "awk_float_math",
"agent host" => "agent_host",
"claude hooks" => "claude_hooks",
"claude config" => "claude_config",
"codex hooks" => "codex_hooks",
"codex config" => "codex_config",
_ => return None,
};
let (status, detail) = match item.status {
CheckStatus::Pass(extra) => {
(Status::Pass, extra.unwrap_or_else(|| "ok".to_string()))
}
CheckStatus::Warn(msg) => (Status::Warn, msg),
CheckStatus::Fail(msg) => (Status::Fail, msg),
};
let fix = if matches!(status, Status::Pass) {
None
} else {
Some(match name {
"git_repo" => "run mati from inside a git repository",
"mati_on_path" => "ensure the mati binary is on PATH",
"awk_float_math" => "install awk/gawk and ensure it is on PATH",
"agent_host" => "mati hooks --claude (or --codex)",
"codex_hooks" | "codex_config" => "mati hooks --codex",
_ => "mati hooks --claude",
})
};
Some(CheckResult {
section: "enforcement",
name,
status,
detail,
fix,
})
})
.collect()
}
async fn collect_chain_check(cwd: &Path) -> CheckResult {
let proxy = match crate::cli::proxy::StoreProxy::open(cwd).await {
Ok(p) => p,
Err(_) => {
return CheckResult {
section: "integrity",
name: "chain",
status: Status::Info,
detail: "skipped — no store initialized for this directory".to_string(),
fix: None,
};
}
};
let events = proxy
.scan_enforcement_events(0, u64::MAX)
.await
.unwrap_or_default();
let _ = proxy.close().await;
let total = events.len();
let result = mati_core::store::enforcement::verify_chain(&events);
if result.is_valid() {
CheckResult {
section: "integrity",
name: "chain",
status: Status::Pass,
detail: format!("intact — {total} event(s), every hash verified"),
fix: None,
}
} else {
CheckResult {
section: "integrity",
name: "chain",
status: Status::Fail,
detail: format!(
"BROKEN — {} tampered, {} linkage break(s), {} unknown-schema of {total} event(s)",
result.tampered_events, result.linkage_breaks, result.unknown_schema
),
fix: Some("mati verify-chain --verbose"),
}
}
}
async fn collect_freshness_check(cwd: &Path) -> CheckResult {
let proxy = match crate::cli::proxy::StoreProxy::open(cwd).await {
Ok(p) => p,
Err(_) => {
return CheckResult {
section: "knowledge",
name: "freshness",
status: Status::Info,
detail: "skipped — no store initialized for this directory".to_string(),
fix: None,
};
}
};
let summary = crate::cli::stale::cached_stale_summary(&proxy).await;
let _ = proxy.close().await;
match summary {
None => CheckResult {
section: "knowledge",
name: "freshness",
status: Status::Info,
detail: "unknown — run `mati stale` for a current count".to_string(),
fix: None,
},
Some(s) if s.total == 0 => CheckResult {
section: "knowledge",
name: "freshness",
status: Status::Pass,
detail: "no stale records".to_string(),
fix: None,
},
Some(s) => CheckResult {
section: "knowledge",
name: "freshness",
status: Status::Warn,
detail: format!(
"{} stale record(s) ({} stale, {} liability, {} tombstone)",
s.total, s.stale, s.liability, s.tombstone
),
fix: Some("mati stale (then mati init / mati reparse to refresh)"),
},
}
}
async fn collect_extraction_stats(
cwd: &Path,
) -> Option<mati_core::store::extraction::ExtractionStats> {
use mati_core::store::extraction::{aggregate_stats, ExtractionRecord, EXTRACTION_PREFIX};
let proxy = match crate::cli::proxy::StoreProxy::open(cwd).await {
Ok(p) => p,
Err(_) => return None,
};
let records = proxy
.scan_prefix(EXTRACTION_PREFIX)
.await
.unwrap_or_default();
let _ = proxy.close().await;
let extractions: Vec<ExtractionRecord> = records
.into_iter()
.filter_map(|r| r.payload.and_then(|p| serde_json::from_value(p).ok()))
.collect();
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let since = now.saturating_sub(30 * 86_400);
Some(aggregate_stats(&extractions, since, now))
}
fn read_lifecycle_tail(root: &Path, n: usize) -> Vec<LifecycleEntry> {
let path = root.join("lifecycle.log");
let contents = match std::fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let lines: Vec<&str> = contents.lines().collect();
let start = lines.len().saturating_sub(n);
lines[start..]
.iter()
.filter_map(|line| {
let cols: Vec<&str> = line.splitn(4, '\t').collect();
if cols.len() != 4 {
return None;
}
Some(LifecycleEntry {
ts: cols[0].parse().unwrap_or(0),
pid: cols[1].parse().unwrap_or(0),
event: cols[2].to_string(),
detail: cols[3].to_string(),
})
})
.collect()
}
fn render_human(report: &Report, use_color: bool) {
println!();
println!("mati doctor — {}", report.root);
println!();
let mut current_section: &str = "";
for c in &report.checks {
if c.section != current_section {
if !current_section.is_empty() {
println!();
}
let title = match c.section {
"enforcement" => "Enforcement",
"daemon" => "Daemon",
"integrity" => "Integrity",
"knowledge" => "Knowledge",
"attestation" => "Attestation",
other => other,
};
println!("{title}");
current_section = c.section;
}
let symbol = symbol_for(c.status, use_color);
println!(" {:18} {} {}", c.name, symbol, c.detail);
if let Some(fix) = c.fix {
println!(" fix: {fix}");
}
}
println!();
println!("Lifecycle (last {} events)", report.lifecycle.len());
if report.lifecycle.is_empty() {
println!(" (no lifecycle.log yet — log fills as the daemon runs)");
} else {
for e in &report.lifecycle {
println!(
" {:<14} pid={:<6} {:<18} {}",
relative_ts(e.ts),
e.pid,
e.event,
e.detail
);
}
}
if let Some(extraction) = &report.extraction {
render_extraction_section(extraction);
}
println!();
let s = &report.summary;
if s.fail > 0 {
println!(
"Result: {} fail, {} warn, {} pass — see fixes above",
s.fail, s.warn, s.pass
);
} else if s.warn > 0 {
println!("Result: clean with {} warn ({} pass)", s.warn, s.pass);
} else {
println!("Result: all checks passed ({} pass)", s.pass);
}
}
fn render_extraction_section(s: &mati_core::store::extraction::ExtractionStats) {
if s.total == 0 {
return;
}
println!();
println!("Extraction quality (last 30d, /mati-enrich pipeline)");
println!(" total {:>4}", s.total);
println!(
" confirmed {:>4} ({})",
s.confirmed,
rate_label(s.confirmed, s.total)
);
println!(
" tombstoned {:>4} ({})",
s.tombstoned,
rate_label(s.tombstoned, s.total)
);
println!(" pending {:>4}", s.pending);
if s.expired > 0 {
println!(" expired (>90d) {:>4}", s.expired);
}
let tiers = [
("fast ", &s.per_tier.fast),
("standard", &s.per_tier.standard),
("deep ", &s.per_tier.deep),
("unknown ", &s.per_tier.unknown),
];
let any_tier_used = tiers.iter().any(|(_, t)| t.total > 0);
if any_tier_used {
println!();
println!(" Per-tier:");
for (label, tier) in &tiers {
if tier.total == 0 {
continue;
}
let rate = tier.confirmed_rate().map_or_else(
|| "—".to_string(),
|r| format!("{:>3.0}% confirmed", r * 100.0),
);
println!(" {label} {:>3} extractions, {rate}", tier.total);
}
}
let active_configs: Vec<_> = s.per_config.iter().filter(|(_, t)| t.total > 0).collect();
if active_configs.len() >= 2 {
println!();
println!(" Per-config (A/B):");
for (label, tier) in &active_configs {
let rate = tier.confirmed_rate().map_or_else(
|| "—".to_string(),
|r| format!("{:>3.0}% confirmed", r * 100.0),
);
println!(" {label:>11} {:>3} extractions, {rate}", tier.total);
}
}
}
fn rate_label(n: u64, total: u64) -> String {
(n * 100)
.checked_div(total)
.map(|pct| format!("{pct}%"))
.unwrap_or_else(|| "0%".to_string())
}
fn symbol_for(status: Status, use_color: bool) -> String {
let s = match status {
Status::Pass => "ok",
Status::Warn => "WARN",
Status::Fail => "FAIL",
Status::Info => "—",
};
if !use_color {
return s.to_string();
}
match status {
Status::Pass => format!("\x1b[32m{s}\x1b[0m"),
Status::Warn => format!("\x1b[33m{s}\x1b[0m"),
Status::Fail => format!("\x1b[31m{s}\x1b[0m"),
Status::Info => format!("\x1b[90m{s}\x1b[0m"),
}
}
fn relative_ts(ts: u64) -> String {
use std::time::{SystemTime, UNIX_EPOCH};
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let ago = now.saturating_sub(ts);
if ago == 0 {
"just now".into()
} else if ago < 60 {
format!("{ago}s ago")
} else if ago < 3600 {
format!("{}m ago", ago / 60)
} else if ago < 86400 {
format!("{}h ago", ago / 3600)
} else {
format!("{}d ago", ago / 86400)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn read_lifecycle_tail_parses_well_formed_lines() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("lifecycle.log"),
"100\t111\tserve_start\tpid=111 owner=mcp\n\
200\t111\tserve_shutdown\tclean\n",
)
.unwrap();
let entries = read_lifecycle_tail(dir.path(), 5);
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].ts, 100);
assert_eq!(entries[0].event, "serve_start");
assert_eq!(entries[1].detail, "clean");
}
#[test]
fn read_lifecycle_tail_returns_last_n_only() {
let dir = tempfile::tempdir().unwrap();
let body: String = (0..10)
.map(|i| format!("{i}\t{i}\tevent{i}\tdetail{i}\n"))
.collect();
std::fs::write(dir.path().join("lifecycle.log"), body).unwrap();
let entries = read_lifecycle_tail(dir.path(), 3);
assert_eq!(entries.len(), 3);
assert_eq!(entries[0].ts, 7);
assert_eq!(entries[2].ts, 9);
}
#[test]
fn read_lifecycle_tail_skips_malformed_lines() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("lifecycle.log"),
"100\t111\tserve_start\tok\n\
not\ta\tvalid\n\
200\t111\tserve_shutdown\tclean\n",
)
.unwrap();
let entries = read_lifecycle_tail(dir.path(), 5);
assert_eq!(entries.len(), 2);
}
#[test]
fn read_lifecycle_tail_returns_empty_when_missing() {
let dir = tempfile::tempdir().unwrap();
let entries = read_lifecycle_tail(dir.path(), 5);
assert!(entries.is_empty());
}
#[test]
fn lifecycle_log_round_trip_writer_reader() {
use mati_core::mcp::metadata::record_lifecycle_event;
let dir = tempfile::tempdir().unwrap();
record_lifecycle_event(dir.path(), "serve_start", "owner=mcp");
record_lifecycle_event(dir.path(), "panic", "boom\twith\ttabs\nand newline");
record_lifecycle_event(dir.path(), "serve_shutdown", "reason=signal");
let entries = read_lifecycle_tail(dir.path(), 5);
assert_eq!(
entries.len(),
3,
"doctor reader must parse all 3 writer-emitted lines, got {}",
entries.len()
);
assert_eq!(entries[0].event, "serve_start");
assert_eq!(entries[0].detail, "owner=mcp");
assert_eq!(entries[1].event, "panic");
assert!(
!entries[1].detail.contains('\t') && !entries[1].detail.contains('\n'),
"writer must scrub tabs/newlines; reader saw: {:?}",
entries[1].detail
);
assert!(
entries[1].detail.contains("boom") && entries[1].detail.contains("tabs"),
"writer must preserve detail content (modulo separator scrubbing); got {:?}",
entries[1].detail
);
assert_eq!(entries[2].event, "serve_shutdown");
assert_eq!(entries[2].detail, "reason=signal");
assert!(entries[0].ts > 0, "writer must emit a real Unix timestamp");
assert!(
entries[0].ts <= entries[2].ts,
"timestamps must be monotonic"
);
let pid = std::process::id();
for e in &entries {
assert_eq!(e.pid, pid, "writer→reader pid round-trip must be stable");
}
}
#[tokio::test]
async fn doctor_json_shape_is_stable_across_scenarios() {
let dir = tempfile::tempdir().unwrap();
let report = collect(dir.path(), dir.path()).await;
let names: Vec<(&str, &str)> = report.checks.iter().map(|c| (c.section, c.name)).collect();
assert_eq!(
names,
vec![
("enforcement", "git_repo"),
("enforcement", "mati_on_path"),
("enforcement", "awk_float_math"),
("enforcement", "agent_host"),
("enforcement", "claude_hooks"),
("enforcement", "claude_config"),
("enforcement", "codex_hooks"),
("enforcement", "codex_config"),
("daemon", "ping"),
("integrity", "dirty_marker"),
("integrity", "drift"),
("integrity", "chain"),
("knowledge", "freshness"),
("attestation", "network"),
],
"doctor JSON check sequence must be the enforcement probes \
(git_repo → mati_on_path → awk_float_math → agent_host → \
claude_hooks → claude_config → codex_hooks → codex_config) then \
ping → dirty_marker → drift → chain → freshness → network \
(the network attestation is appended last so the --json array \
stays index-stable for existing consumers)"
);
assert_eq!(report.version, 2);
}
}