crtx 0.1.1 - Docs.rs

//! `cortex doctor` — strict store precondition checks.

use std::path::Path;

use clap::Args;
use cortex_ledger::audit::verify_schema_migration_v1_to_v2_boundary;
use serde::Serialize;

use crate::cmd::open_default_store;
use crate::cmd::restore::production::deployment_id_for;
use crate::exit::Exit;
use crate::output::{self, Envelope, Outcome};
use crate::paths::DataLayout;

/// `cortex doctor` flags.
#[derive(Debug, Args)]
pub struct DoctorArgs {
    /// Run strict schema-version gates.
    #[arg(long)]
    pub strict: bool,
    /// Print the deterministic deployment id derived from the current
    /// store's data directory and exit. The deployment id is the
    /// authoritative value to mint into a `RESTORE_INTENT` payload's
    /// `deployment_id` field (the verifier compares structurally —
    /// `restore.intent.deployment_id.mismatch` fires on any drift).
    /// Mutually exclusive with `--strict`: this is a read-only
    /// projection, not a schema-version gate.
    #[arg(long, conflicts_with = "strict")]
    pub print_deployment_id: bool,
    /// Apply any pending migrations and report the result. Safe to run on a
    /// store that is already fully migrated — the operation is idempotent.
    /// Resolves "duplicate column name: source_attestation_json" errors on
    /// stores where `apply_expand_backfill_skeleton` ran before migration
    /// `003_schema_v2_expand` was bundled into `apply_pending`.
    #[arg(
        long,
        conflicts_with = "strict",
        conflicts_with = "print_deployment_id"
    )]
    pub repair: bool,
    /// Show what `--repair` would apply without writing any changes.
    /// Requires `--repair`.
    #[arg(long, requires = "repair")]
    pub dry_run: bool,
}

/// Run the doctor command.
pub fn run(args: DoctorArgs) -> Exit {
    let json = output::json_enabled();
    if args.print_deployment_id {
        return run_print_deployment_id(json);
    }
    if args.repair {
        return run_repair(json, args.dry_run);
    }
    if !args.strict {
        if json {
            let envelope = Envelope::new(
                "cortex.doctor",
                Exit::Usage,
                DoctorReport::usage(
                    "doctor requires --strict to run schema-version gates (or --print-deployment-id for the deployment binding)",
                ),
            );
            return output::emit(&envelope, Exit::Usage);
        }
        eprintln!(
            "cortex doctor: pass --strict to run schema-version gates, or --print-deployment-id to print the deployment binding."
        );
        return Exit::Usage;
    }

    let pool = match open_default_store("doctor") {
        Ok(pool) => pool,
        Err(exit) => {
            return finish(
                exit,
                DoctorReport::precondition("failed to open store"),
                None,
            )
        }
    };
    let layout = match DataLayout::resolve(None, None) {
        Ok(layout) => layout,
        Err(exit) => {
            return finish(
                exit,
                DoctorReport::precondition("failed to resolve layout"),
                None,
            )
        }
    };

    match cortex_store::verify::verify_schema_version(&pool, cortex_core::SCHEMA_VERSION) {
        Ok(report) if report.is_ok() => {
            // Schema v2 atomic cutover (ADR 0018, ADR 0033 §1):
            // * Pre-cutover binaries left `SCHEMA_VERSION = 1` and never
            //   required the boundary.
            // * Post-cutover binaries (`SCHEMA_VERSION >= 2`) require the
            //   boundary **only when the active JSONL log contains v1-framed
            //   rows that pre-date the cutover**. A freshly initialised v2
            //   store has no v1 rows and therefore no boundary; demanding one
            //   would refuse correct fresh-v2 stores. A migrated v1->v2
            //   store has v1 rows + exactly one boundary row.
            let needs_boundary = match (cortex_core::SCHEMA_VERSION >= 2)
                .then(|| contains_pre_cutover_v1_rows(&layout.event_log_path))
            {
                Some(Ok(value)) => value,
                Some(Err(err)) => {
                    let detail =
                        format!("failed to inspect event log for pre-cutover v1 rows: {err}");
                    if !json {
                        eprintln!("cortex doctor: {detail}");
                    }
                    return finish(
                        Exit::PreconditionUnmet,
                        DoctorReport::precondition(detail),
                        None,
                    );
                }
                None => false,
            };
            match verify_schema_migration_v1_to_v2_boundary(&layout.event_log_path, needs_boundary)
            {
                Ok(boundary_report) if boundary_report.ok() => {}
                Ok(boundary_report) => {
                    let mut details = Vec::new();
                    for failure in &boundary_report.failures {
                        let line = format!("{}: {:?}", failure.invariant, failure.detail);
                        if !json {
                            eprintln!("cortex doctor: {line}");
                        }
                        details.push(line);
                    }
                    if !json {
                        eprintln!(
                            "cortex doctor: hint: if this is a fresh v2 store, the boundary check \
                             may be a false positive — run `cortex doctor --repair` to confirm; \
                             for a migrated store run `cortex migrate v2 --backup-manifest <path>` \
                             to complete the upgrade"
                        );
                    }
                    return finish(
                        Exit::SchemaMismatch,
                        DoctorReport::schema_mismatch(
                            "boundary verification failed — hint: if this is a fresh v2 store run \
                             `cortex doctor --repair` to confirm; for a migrated store run \
                             `cortex migrate v2 --backup-manifest <path>` to complete the upgrade",
                            details,
                        ),
                        None,
                    );
                }
                Err(err) => {
                    let detail = format!("failed to verify schema boundary events: {err}");
                    if !json {
                        eprintln!("cortex doctor: {detail}");
                    }
                    return finish(
                        Exit::PreconditionUnmet,
                        DoctorReport::precondition(detail),
                        None,
                    );
                }
            }
            let checked_tables: Vec<String> = report
                .checked_tables
                .iter()
                .map(|s| s.to_string())
                .collect();
            if !json {
                println!(
                    "cortex doctor: ok: schema shape is present and schema_version matches code version {} across {} tables",
                    report.expected,
                    checked_tables.len()
                );
            }
            finish(
                Exit::Ok,
                DoctorReport::ok(report.expected, checked_tables),
                None,
            )
        }
        Ok(report) => {
            let mut details = Vec::new();
            for failure in &report.failures {
                let line = format!("{}: {}", failure.invariant(), failure.detail());
                if !json {
                    eprintln!("cortex doctor: {line}");
                }
                details.push(line);
            }
            if !json {
                eprintln!(
                    "cortex doctor: hint: run `cortex doctor --repair` to apply pending \
                     migrations, or `cortex migrate v2 --backup-manifest <path>` for a major \
                     version upgrade"
                );
            }
            finish(
                Exit::SchemaMismatch,
                DoctorReport::schema_mismatch(
                    "schema_version mismatch — hint: run `cortex doctor --repair` to apply \
                     pending migrations, or `cortex migrate v2 --backup-manifest <path>` for a \
                     major version upgrade",
                    details,
                ),
                None,
            )
        }
        Err(err) => {
            let detail = format!("failed to verify schema version: {err}");
            if !json {
                eprintln!("cortex doctor: {detail}");
            }
            finish(
                Exit::PreconditionUnmet,
                DoctorReport::precondition(detail),
                None,
            )
        }
    }
}

/// Outcome of `cortex doctor --strict` / `--repair`. Separately serialized
/// from the human-output path so the JSON envelope and the prose lines describe
/// the same decision.
#[derive(Debug, Serialize)]
struct DoctorReport {
    status: &'static str,
    detail: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    schema_version: Option<u16>,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    checked_tables: Vec<String>,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    failures: Vec<String>,
    /// For `--repair`: names of migrations that were applied (or would be
    /// applied under `--dry-run`). Empty when the store was already current.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    migrations_applied: Vec<String>,
    /// For `--repair`: number of migrations applied (legacy scalar kept for
    /// back-compat with existing tooling that reads this field).
    #[serde(skip_serializing_if = "Option::is_none")]
    migrations_applied_count: Option<usize>,
    /// For `--repair`: schema version (count of applied migrations) before the
    /// repair ran.
    #[serde(skip_serializing_if = "Option::is_none")]
    schema_version_before: Option<usize>,
    /// For `--repair`: schema version (count of applied migrations) after the
    /// repair ran. Equal to `schema_version_before` on `--dry-run`.
    #[serde(skip_serializing_if = "Option::is_none")]
    schema_version_after: Option<usize>,
    /// For `--repair --dry-run`: `true` when no mutations were written.
    #[serde(skip_serializing_if = "Option::is_none")]
    dry_run: Option<bool>,
    /// Stable invariant token when nothing needed to be applied.
    #[serde(skip_serializing_if = "Option::is_none")]
    invariant: Option<&'static str>,
}

/// Stable invariant emitted when `--repair` finds nothing to apply.
pub const INVARIANT_NO_MIGRATIONS_PENDING: &str = "doctor.repair.no_migrations_pending";

impl DoctorReport {
    fn ok(schema_version: u16, checked_tables: Vec<String>) -> Self {
        Self {
            status: "ok",
            detail: "schema shape is present and schema_version matches the running binary"
                .to_string(),
            schema_version: Some(schema_version),
            checked_tables,
            failures: Vec::new(),
            migrations_applied: Vec::new(),
            migrations_applied_count: None,
            schema_version_before: None,
            schema_version_after: None,
            dry_run: None,
            invariant: None,
        }
    }

    fn repair_ok(
        detail: impl Into<String>,
        applied_names: Vec<String>,
        schema_version_before: usize,
        schema_version_after: usize,
        dry_run: bool,
    ) -> Self {
        let count = applied_names.len();
        let invariant = if count == 0 {
            Some(INVARIANT_NO_MIGRATIONS_PENDING)
        } else {
            None
        };
        Self {
            status: "ok",
            detail: detail.into(),
            schema_version: None,
            checked_tables: Vec::new(),
            failures: Vec::new(),
            migrations_applied: applied_names,
            migrations_applied_count: Some(count),
            schema_version_before: Some(schema_version_before),
            schema_version_after: Some(schema_version_after),
            dry_run: Some(dry_run),
            invariant,
        }
    }

    fn schema_mismatch(detail: impl Into<String>, failures: Vec<String>) -> Self {
        Self {
            status: "schema_mismatch",
            detail: detail.into(),
            schema_version: None,
            checked_tables: Vec::new(),
            failures,
            migrations_applied: Vec::new(),
            migrations_applied_count: None,
            schema_version_before: None,
            schema_version_after: None,
            dry_run: None,
            invariant: None,
        }
    }

    fn precondition(detail: impl Into<String>) -> Self {
        Self {
            status: "precondition_unmet",
            detail: detail.into(),
            schema_version: None,
            checked_tables: Vec::new(),
            failures: Vec::new(),
            migrations_applied: Vec::new(),
            migrations_applied_count: None,
            schema_version_before: None,
            schema_version_after: None,
            dry_run: None,
            invariant: None,
        }
    }

    fn usage(detail: impl Into<String>) -> Self {
        Self {
            status: "usage",
            detail: detail.into(),
            schema_version: None,
            checked_tables: Vec::new(),
            failures: Vec::new(),
            migrations_applied: Vec::new(),
            migrations_applied_count: None,
            schema_version_before: None,
            schema_version_after: None,
            dry_run: None,
            invariant: None,
        }
    }
}

fn finish(exit: Exit, report: DoctorReport, outcome_override: Option<Outcome>) -> Exit {
    if !output::json_enabled() {
        return exit;
    }
    let mut envelope = Envelope::new("cortex.doctor", exit, report);
    if let Some(outcome) = outcome_override {
        envelope = envelope.with_outcome(outcome);
    }
    output::emit(&envelope, exit)
}

/// Return `true` when the JSONL log contains at least one event row whose
/// Returns `true` iff the JSONL log contains a schema-migration v1→v2
/// boundary marker — meaning the store was actually migrated in-place from v1.
///
/// The authoritative signal is the presence of a
/// `SCHEMA_MIGRATION_V1_TO_V2_BOUNDARY` event in the JSONL log, not the
/// `schema_version` field on ingested events. A fresh v2 store may contain
/// events with `schema_version = 1` (e.g. ingested from historical session
/// files) without ever going through a v1→v2 in-place migration. Inspecting
/// the `schema_version` field was wrong and caused `cortex doctor --strict`
/// to incorrectly demand a boundary row for valid fresh-v2 stores.
///
/// Used by `doctor --strict` to decide whether to require the v1→v2 boundary
/// row: only stores that went through an in-place migration have that marker.
fn contains_pre_cutover_v1_rows(event_log_path: &Path) -> Result<bool, cortex_ledger::JsonlError> {
    let report = verify_schema_migration_v1_to_v2_boundary(event_log_path, false)?;
    Ok(!report.boundary_rows.is_empty())
}

/// `cortex doctor --repair [--dry-run]` — apply pending migrations idempotently
/// and report.
///
/// This is the prescribed fix for operators who hit:
///   "duplicate column name: source_attestation_json"
/// when opening a live store whose `events` table already had
/// `source_attestation_json` (added by `apply_expand_backfill_skeleton`) before
/// migration `003_schema_v2_expand` was bundled into `apply_pending`.
///
/// After this release the migration is guarded with `PRAGMA table_info` so the
/// error cannot recur; `--repair` re-runs `apply_pending` to record the
/// migration as applied and bring the `_migrations` table up to date.
///
/// With `--dry-run` nothing is written; the pending list is shown and the
/// function exits 0.
fn run_repair(json: bool, dry_run: bool) -> Exit {
    let flag = if dry_run {
        "cortex doctor --repair --dry-run"
    } else {
        "cortex doctor --repair"
    };

    let layout = match DataLayout::resolve(None, None) {
        Ok(layout) => layout,
        Err(exit) => {
            let detail = "failed to resolve data layout";
            if !json {
                eprintln!("{flag}: {detail}");
            }
            return finish(exit, DoctorReport::precondition(detail), None);
        }
    };

    if !layout.db_path.exists() {
        let detail = format!(
            "database {} does not exist; run `cortex init` first",
            layout.db_path.display()
        );
        if !json {
            eprintln!("{flag}: {detail}");
        }
        return finish(
            Exit::PreconditionUnmet,
            DoctorReport::precondition(detail),
            None,
        );
    }

    let pool = match rusqlite::Connection::open(&layout.db_path) {
        Ok(pool) => pool,
        Err(err) => {
            let detail = format!("failed to open database: {err}");
            if !json {
                eprintln!("{flag}: {detail}");
            }
            return finish(
                Exit::PreconditionUnmet,
                DoctorReport::precondition(detail),
                None,
            );
        }
    };

    // --- apply (or dry-run preview) ---
    // apply_pending is idempotent: returns the count of migrations applied (0 if already up to date).
    // We don't have a separate "list pending" query; known_migration_names() lists all known
    // migrations and serves as the inventory for the dry-run display.
    let known_names: Vec<String> = cortex_store::migrate::known_migration_names()
        .iter()
        .map(|s| s.to_string())
        .collect();

    if dry_run {
        let detail = format!(
            "dry-run: {} known migration(s) defined; run without --dry-run to apply any pending",
            known_names.len()
        );
        if !json {
            println!("{flag}: ok: {detail}");
            for name in &known_names {
                println!("  - {name}");
            }
        }
        return finish(
            Exit::Ok,
            DoctorReport::repair_ok(detail, known_names, 0, 0, true),
            None,
        );
    }

    match cortex_store::migrate::apply_pending(&pool) {
        Ok(applied) => {
            let detail = if applied == 0 {
                "store is already fully migrated; no changes needed".to_string()
            } else {
                format!("applied {applied} pending migration(s); store is now up to date")
            };
            if !json {
                if applied == 0 {
                    println!("{flag}: ok: {detail} [invariant: {INVARIANT_NO_MIGRATIONS_PENDING}]");
                } else {
                    println!("{flag}: ok: {detail}");
                }
            }
            let applied_names: Vec<String> = known_names.into_iter().take(applied).collect();
            finish(
                Exit::Ok,
                DoctorReport::repair_ok(
                    detail,
                    applied_names,
                    0,
                    applied,
                    false,
                ),
                None,
            )
        }
        Err(err) => {
            let detail = format!(
                "migration failed: {err}\n\
                 If you see 'duplicate column name: source_attestation_json', \
                 upgrade to a cortex binary >= this release — the migration is \
                 now idempotent."
            );
            if !json {
                eprintln!("{flag}: {detail}");
            }
            finish(Exit::Internal, DoctorReport::precondition(detail), None)
        }
    }
}

/// `cortex doctor --print-deployment-id` — print the deterministic deployment id.
///
/// The deployment id binds a `RESTORE_INTENT` payload to a specific data
/// directory (the verifier rejects payloads whose `deployment_id` does
/// not match the runtime layout). This surface lets an operator mint the
/// correct value without re-deriving the BLAKE3-of-canonical-path by
/// hand — closing one of the
/// `docs/RUNBOOK_PRODUCTION_RESTORE_DRILL.md` §9 known gaps.
///
/// Read-only: never touches SQLite or the JSONL log. The deployment id
/// is computed purely from `std::fs::canonicalize(data_dir)`; the only
/// failure mode is layout resolution itself.
fn run_print_deployment_id(json: bool) -> Exit {
    let layout = match DataLayout::resolve(None, None) {
        Ok(layout) => layout,
        Err(exit) => {
            return finish(
                exit,
                DoctorReport::precondition("failed to resolve data layout"),
                None,
            );
        }
    };
    let deployment_id = deployment_id_for(&layout);
    let data_dir = layout.data_dir.display().to_string();

    if !json {
        println!("{deployment_id}");
        return Exit::Ok;
    }

    let payload = serde_json::json!({
        "status": "ok",
        "deployment_id": deployment_id,
        "data_dir": data_dir,
        "detail": "deterministic BLAKE3 of canonicalize(data_dir).to_string_lossy(); mint this into RESTORE_INTENT.deployment_id verbatim.",
    });
    let envelope = Envelope::new("cortex.doctor", Exit::Ok, payload);
    output::emit(&envelope, Exit::Ok)
}