galdr 0.17.0

Record & Replay for agent skills — capture a session's tool calls and distill them into a reproducible skill. Local-first.
//! Operational health checks for galdr.

use anyhow::{Result, bail};

use crate::{catalog, config, ipc, observe_mac, paths, record, setup, style, validate};

/// A green `ok` status line. The 4-char tag + a space keeps every line's text aligned.
fn ok(msg: impl AsRef<str>) {
    println!("{}  {}", style::green("ok"), msg.as_ref());
}

/// An amber `warn` status line.
fn warn(msg: impl AsRef<str>) {
    println!("{} {}", style::amber("warn"), msg.as_ref());
}

/// A red `err` status line.
fn err(msg: impl AsRef<str>) {
    println!("{}  {}", style::red("err"), msg.as_ref());
}

/// A neutral, dimmed `note` status line: information, not a problem. Used for the
/// offline update check, which must never read as a failure.
fn note(msg: impl AsRef<str>) {
    println!("{} {}", style::dim("note"), msg.as_ref());
}

pub fn run() -> Result<()> {
    let mut issues = Vec::new();

    check_path("galdr root", paths::galdr_root().ok(), &mut issues);
    check_path("skills root", paths::skills_root().ok(), &mut issues);

    match config::Config::load() {
        Ok(cfg) => ok(format!("config endpoint is loopback: {}", cfg.endpoint)),
        Err(e) => {
            err(format!("config: {e:#}"));
            issues.push("config is invalid".to_string());
        }
    }

    match ipc::query(&ipc::Request::Ping) {
        Ok(ipc::Response::Pong { version }) => {
            report_daemon_version(version.as_deref());
            report_daemon_management();
        }
        _ => warn("daemon is not running; CLI fallbacks will be used"),
    }

    let actives = record::read_active_all();
    match actives.as_slice() {
        [] => ok("no active recording"),
        [active] => ok(format!(
            "active recording: {} ({})",
            active.name, active.rec_id
        )),
        many => ok(format!(
            "{} active recordings: {}",
            many.len(),
            many.iter()
                .map(|a| a.name.clone())
                .collect::<Vec<_>>()
                .join(", ")
        )),
    }

    match catalog::open_in_memory_indexed() {
        Ok(conn) => {
            let recordings = catalog::list_recordings(&conn).unwrap_or_default();
            let skills = catalog::list_skills(&conn).unwrap_or_default();
            let usages = catalog::list_skill_usage(&conn, None).unwrap_or_default();
            let outcomes = catalog::list_skill_outcomes(&conn, None).unwrap_or_default();
            let orphan_count = skills.iter().filter(|skill| skill.orphan).count();
            let draft_count = skills
                .iter()
                .filter(|skill| {
                    matches!(
                        skill.status.as_str(),
                        catalog::STATUS_DRAFT | catalog::STATUS_PARAM_DRAFT
                    )
                })
                .count();
            ok(format!(
                "catalog rebuild check: {} recordings, {} skills, {} usages, {} outcomes",
                recordings.len(),
                skills.len(),
                usages.len(),
                outcomes.len()
            ));
            if orphan_count > 0 {
                warn(format!(
                    "{orphan_count} skill(s) have missing recording provenance"
                ));
            }
            if draft_count > 0 {
                warn(format!("{draft_count} skill(s) are still drafts"));
            }
            report_discoverability(&skills);
            report_validation(&skills);
        }
        Err(e) => {
            err(format!("catalog rebuild check failed: {e:#}"));
            issues.push("catalog cannot be rebuilt from disk".to_string());
        }
    }

    match crate::skill::installed_version() {
        Some(v) if crate::skill::is_current() => {
            ok(format!("galdr skill installed and current (version {v})"))
        }
        Some(v) => warn(format!(
            "galdr skill is stale (installed {v}, binary {}); run `galdr setup skill`",
            env!("CARGO_PKG_VERSION")
        )),
        None => {
            warn("galdr skill not installed; run `galdr setup skill` so your agent can drive galdr")
        }
    }

    match setup::claude_hook_configured() {
        Some(true) => ok("Claude Code PostToolUse hook is configured"),
        Some(false) => {
            warn("Claude Code PostToolUse hook is missing");
            issues.push("Claude Code hook is missing".to_string());
        }
        None => {
            warn("Claude Code settings not found or unreadable");
            issues.push("Claude Code settings are unavailable".to_string());
        }
    }

    report_mac_permissions();

    if let Ok(root) = paths::frames_root()
        && root.is_dir()
    {
        let leftover = std::fs::read_dir(&root)
            .map(|d| {
                d.filter_map(|e| e.ok())
                    .filter(|e| e.path().is_dir())
                    .count()
            })
            .unwrap_or(0);
        if leftover > 0 {
            warn(format!(
                "{leftover} recording(s) have leftover authoring frames (pixels on disk) at {}; they purge on a final distill, or delete the directory",
                root.display()
            ));
        }
    }

    report_update_check();

    if issues.is_empty() {
        println!("{}", style::green("doctor: ok"));
        Ok(())
    } else {
        bail!("doctor found {} actionable issue(s)", issues.len())
    }
}

/// A fail-soft "is a newer galdr available?" line at the end of `doctor`. An explicit
/// `doctor` run is allowed to touch the network (bounded by curl's 3s timeout), but
/// the result is only ever informational: up-to-date is `ok`, an available update is
/// `warn` with the remedy, and offline (or any check failure) is a `note` that never
/// adds to the actionable-issue count. galdr never treats missing network as an error.
fn report_update_check() {
    match crate::upgrade::check_latest() {
        Ok(crate::upgrade::LatestCheck::UpToDate { current }) => {
            ok(format!("up to date (v{current})"))
        }
        Ok(crate::upgrade::LatestCheck::LocalAhead { current, latest }) => ok(format!(
            "up to date (local v{current} ahead of crates.io v{latest})"
        )),
        Ok(crate::upgrade::LatestCheck::Newer { latest, .. }) => warn(format!(
            "new version available: v{latest} — run galdr upgrade"
        )),
        Ok(crate::upgrade::LatestCheck::Offline) | Err(_) => note("update check skipped (offline)"),
    }
}

/// Compares the running daemon's version with this CLI's. A daemon left running from
/// an older binary keeps serving stale behavior over the socket while `doctor` used to
/// report only "daemon is running"; now the skew is surfaced with the exact remedy. A
/// warning, not an error (the fallbacks still work): restarting the daemon is the fix.
/// An older daemon predating the version field reports `None` — unknown, so warn too.
fn report_daemon_version(daemon_version: Option<&str>) {
    let cli = env!("CARGO_PKG_VERSION");
    match daemon_version {
        Some(v) if v == cli => ok(format!("daemon is running (version {v})")),
        Some(v) => warn(format!(
            "daemon is v{v}, CLI is v{cli} — restart it: galdr daemon stop && galdr daemon"
        )),
        None => warn(format!(
            "daemon is running but did not report its version (older daemon), CLI is v{cli} — restart it: galdr daemon stop && galdr daemon"
        )),
    }
}

/// Reports whether launchd manages the running daemon (macOS only). A managed daemon
/// survives logout and reboots; a loose (nohup) one silently dies and can go stale, so
/// that gets an informational note with the fix — never a failure, and nothing at all
/// off macOS, where there is no launchd.
fn report_daemon_management() {
    match crate::launchd::management() {
        Some(true) => ok("daemon is managed by launchd (auto-starts at login, restarts on crash)"),
        Some(false) => note(
            "daemon runs unmanaged (no LaunchAgent); run `galdr daemon install` to auto-start it and survive reboots",
        ),
        None => {}
    }
}

/// Reports whether galdr-distilled skills are discoverable by the installed
/// harnesses. A skill the harness can't load is galdr failing at its one job, so
/// this surfaces it (a warning, not an error: `galdr link` fixes it).
fn report_discoverability(skills: &[catalog::SkillRow]) {
    let galdr_skills: Vec<&catalog::SkillRow> = skills
        .iter()
        .filter(|s| s.origin == catalog::ORIGIN_GALDR)
        .collect();
    if galdr_skills.is_empty() {
        return;
    }
    let harnesses: Vec<crate::harness::HarnessInfo> = crate::harness::detect()
        .into_iter()
        .filter(|h| h.detected && crate::harness::skills_dir(&h.key).is_some())
        .collect();
    if harnesses.is_empty() {
        return;
    }
    let mut unreachable = 0;
    for skill in &galdr_skills {
        for h in &harnesses {
            if let Some(dir) = crate::harness::skills_dir(&h.key) {
                let link = dir.join(&skill.skill_name);
                // Same-root (the harness reads the canonical dir) counts as reachable.
                let same_root = crate::paths::skills_root()
                    .map(|r| dir == r)
                    .unwrap_or(false);
                if !same_root && !link.exists() {
                    unreachable += 1;
                    break;
                }
            }
        }
    }
    if unreachable > 0 {
        warn(format!(
            "{unreachable} galdr skill(s) are not discoverable by an installed harness; run `galdr link`"
        ));
    } else {
        ok(format!(
            "{} galdr skill(s) discoverable across {} harness(es)",
            galdr_skills.len(),
            harnesses.len()
        ));
    }
}

/// Runs the content gate over galdr's own installed skills and warns about any that
/// would fail it (e.g. a skill distilled before the gate existed). Scoped to
/// galdr-distilled skills: the shared root also holds hand-authored skills with their
/// own structure, which galdr neither wrote nor judges here (`galdr validate --all`
/// audits those on demand). A warning, not an error: the fix is the operator's, and a
/// pre-existing skill must not break `doctor`.
fn report_validation(skills: &[catalog::SkillRow]) {
    let galdr: Vec<&catalog::SkillRow> = skills
        .iter()
        .filter(|s| s.origin == catalog::ORIGIN_GALDR)
        .collect();
    if galdr.is_empty() {
        return;
    }
    let mut failing = Vec::new();
    for skill in &galdr {
        let Ok(md) = std::fs::read_to_string(&skill.skill_path) else {
            continue;
        };
        let draft = matches!(
            skill.status.as_str(),
            catalog::STATUS_DRAFT | catalog::STATUS_PARAM_DRAFT
        );
        let ctx = validate::ValidationCtx::new(draft, false);
        if validate::validate_skill(&md, &ctx).has_blocking(false) {
            failing.push(skill.skill_name.clone());
        }
    }
    if failing.is_empty() {
        ok(format!(
            "{} galdr skill(s) pass the validation gate",
            galdr.len()
        ));
    } else {
        warn(format!(
            "{} galdr skill(s) would fail the validation gate: {}",
            failing.len(),
            failing.join(", ")
        ));
        println!(
            "     {}",
            style::dim("fix or re-distill them; run `galdr validate <skill>` for the findings")
        );
    }
}

/// Reports the two TCC permissions the native macOS observe lane needs. Nothing off
/// macOS (no `observe mac` there). Never an error: without the grants, `observe mac`
/// simply can't run (Input Monitoring) or degrades to coordinate-only (Accessibility),
/// and the fix is the user's to make in System Settings — so this informs, not fails.
fn report_mac_permissions() {
    let Some(perms) = observe_mac::mac_permissions() else {
        return;
    };
    if perms.input_monitoring {
        ok("Input Monitoring granted (macOS observe: keys/clicks/scroll)");
    } else {
        note(
            "Input Monitoring not granted — `galdr observe mac` needs it. Grant it in System Settings → Privacy & Security → Input Monitoring (enable your terminal).",
        );
    }
    if perms.accessibility {
        ok("Accessibility granted (macOS observe: element role/name/window/app)");
    } else {
        note(
            "Accessibility not granted — `galdr observe mac` clicks would be coordinate-only. Grant it in System Settings → Privacy & Security → Accessibility for semantic targeting.",
        );
    }
}

fn check_path(label: &str, path: Option<std::path::PathBuf>, issues: &mut Vec<String>) {
    let Some(path) = path else {
        err(format!("{label}: path unavailable"));
        issues.push(format!("{label} unavailable"));
        return;
    };
    if path.exists() {
        ok(format!("{label}: {}", path.display()));
    } else {
        warn(format!("{label} missing: {}", path.display()));
    }
}