cellos-cortex 0.1.0

//! Cortex → CellOS dispatch.
//!
//! [`CortexCellRunner`] translates a [`crate::ContextPack`] into a CellOS
//! [`ExecutionCellDocument`] and submits it through a [`CellSubmitter`].
//!
//! The submitter is a trait, not a direct dependency on the supervisor's
//! internals, so:
//!
//! - tests can substitute an in-process fake;
//! - the bridge stays decoupled from whichever supervisor frontend
//!   (HTTP, in-process, NATS) is wired in at the composition root;
//! - both Cortex and CellOS can evolve their internal APIs as long as
//!   the submitter contract holds.

use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

use async_trait::async_trait;
use cellos_core::events::cloud_event_v1_cortex_dispatched;
use cellos_core::ports::EventSink;
use cellos_core::types::{
    AuthorityBundle, CloudEventV1, ExecutionCellDocument, ExecutionCellSpec, Lifetime, RunSpec,
};
use serde::{Deserialize, Serialize};

use crate::context::ContextPack;
use crate::policy::{apply_policy, DoctrineAuthorityPolicy};

/// CloudEvent `source` stamped onto Cortex-bridge events emitted by this runner.
const CORTEX_EVENT_SOURCE: &str = "cellos-cortex";

/// CloudEvent `type` URN for the lifecycle-destroyed event the result reception
/// path keys on. Mirrors the constant in `cellos-core::events` so the bridge
/// does not need to depend on a re-exported symbol that may move.
const LIFECYCLE_DESTROYED_TYPE: &str = "dev.cellos.events.cell.lifecycle.v1.destroyed";

/// CloudEvent `type` URN for the per-process exit-code event. Per the FC-23
/// invariant the destroyed payload does *not* carry an `exitCode`; the
/// authenticated exit code lives only on `cell.command.v1.completed`. The
/// receiver picks the latest matching command-completed event and threads its
/// `exitCode` into [`CortexCellResult`].
const COMMAND_COMPLETED_TYPE: &str = "dev.cellos.events.cell.command.v1.completed";

/// CloudEvent `type` URNs for export-receipt events. The v1 payload carries
/// `destinationRelative`; the v2 payload nests destination inside a `receipt`
/// object. Both are accepted so callers see whichever the supervisor emits.
const EXPORT_COMPLETED_V1_TYPE: &str = "dev.cellos.events.cell.export.v1.completed";
const EXPORT_COMPLETED_V2_TYPE: &str = "dev.cellos.events.cell.export.v2.completed";

/// Environment variable that, when set, points the result-reception path at a
/// JSONL CloudEvent stream the supervisor is appending to. Matches the
/// supervisor's existing `CELL_OS_JSONL_EVENTS` knob so a single export wires
/// up the whole bridge without per-runner plumbing.
///
/// # Trust model (red-team finding C-F4)
///
/// This path is **operator-trusted**: the value is read from the process
/// environment, which is operator-controlled at supervisor launch time. The
/// `wait_for_result_from_jsonl` function does not canonicalise the path or
/// reject symlinks — that is the operator's responsibility. An attacker who
/// can already set arbitrary env vars on the supervisor process has many more
/// powerful primitives than redirecting this read; defending against that
/// threat model belongs at the process-launch boundary, not here.
pub const CELL_OS_JSONL_EVENTS_ENV: &str = "CELL_OS_JSONL_EVENTS";

/// Final result of a Cortex-dispatched cell run — the thing a Cortex caller
/// actually wants back once a cell terminates.
///
/// This is the bridge-shaped projection of "what happened to that cell?":
///
/// - `cell_id` / `pack_id` correlate this result with the originating dispatch.
/// - `exit_code` / `success` are the run outcome. `exit_code` is sourced from
///   the latest `cell.command.v1.completed` event for `cell_id`; when the
///   supervisor never emits one (forced teardown, supervisor-owned spawn
///   without an in-VM bridge), it defaults to `0` on success and `-1` on
///   failure so callers can rely on the field being present.
/// - `lifecycle_destroyed_at` is the destroyed event's `time` parsed to unix
///   milliseconds. `0` when the supervisor omitted the timestamp.
/// - `export_paths` is the list of `destinationRelative` (v1) /
///   `receipt.destination` (v2) strings for every export-completed event
///   tagged with this cell's id, in emission order.
/// - `doctrine_refs` is propagated verbatim from the dispatched
///   [`ContextPack`] — the destroyed payload does not carry doctrine ids, so
///   the receiver threads them in from the caller.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CortexCellResult {
    pub cell_id: String,
    pub pack_id: String,
    pub exit_code: i32,
    pub success: bool,
    /// Unix milliseconds. `0` when the destroyed event omitted `time`.
    pub lifecycle_destroyed_at: u64,
    pub export_paths: Vec<String>,
    pub doctrine_refs: Vec<String>,
}

/// Outcome of dispatching a [`ContextPack`] into a CellOS cell.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CellSubmissionOutcome {
    /// The `spec.id` the supervisor saw (echoed back for correlation).
    pub cell_id: String,
    /// Exit code reported by the supervisor, if execution completed.
    /// `None` when the call was fire-and-forget or the supervisor did not
    /// surface an exit code (e.g. cell still running).
    pub exit_code: Option<i32>,
    /// CellOS lifecycle events captured during this submission, in emission order.
    /// Empty when the submitter is fire-and-forget.
    pub lifecycle_events: Vec<CloudEventV1>,
}

/// The recorded translation of a Cortex pack into a CellOS spec — useful for
/// audit ("which cell ran for which pack?") and tests.
#[derive(Debug, Clone)]
pub struct ContextPackTranslation {
    pub pack: ContextPack,
    pub document: ExecutionCellDocument,
}

/// Abstract submission contract — implemented by whatever frontend is wired
/// into the supervisor at the composition root.
#[async_trait]
pub trait CellSubmitter: Send + Sync {
    /// Submit a fully-formed execution cell document and return the outcome.
    async fn submit(
        &self,
        document: &ExecutionCellDocument,
    ) -> Result<CellSubmissionOutcome, anyhow::Error>;
}

/// Translate Cortex `ContextPack`s into CellOS `ExecutionCellDocument`s and
/// dispatch them through a [`CellSubmitter`].
pub struct CortexCellRunner {
    submitter: Arc<dyn CellSubmitter>,
    agent_argv: Vec<String>,
    api_version: String,
    kind: String,
    default_ttl_seconds: u64,
    policy: DoctrineAuthorityPolicy,
    /// Optional CellOS event sink. When set, the runner emits a
    /// `dev.cellos.events.cell.cortex.v1.dispatched` CloudEvent after every
    /// successful submission (best-effort — emit failures are logged but do
    /// not fail the dispatch).
    event_sink: Option<Arc<dyn EventSink>>,
}

impl CortexCellRunner {
    /// Build a runner that dispatches packs to `submitter`, running the
    /// supplied `agent_argv` inside each cell. The pack's `task` is appended
    /// as the final argv element so agents can receive it as `$1`.
    ///
    /// The runner is initialised with the built-in
    /// [`DoctrineAuthorityPolicy`] (ADR-0009); use [`Self::with_policy`] to
    /// substitute an operator-overridden table.
    pub fn new(submitter: Arc<dyn CellSubmitter>, agent_argv: Vec<String>) -> Self {
        Self {
            submitter,
            agent_argv,
            api_version: "cellos.io/v1".to_string(),
            kind: "ExecutionCell".to_string(),
            default_ttl_seconds: 300,
            policy: DoctrineAuthorityPolicy::built_in(),
            event_sink: None,
        }
    }

    /// Attach a CellOS [`EventSink`] so the runner emits a
    /// `dev.cellos.events.cell.cortex.v1.dispatched` event after each
    /// successful submission.
    ///
    /// The emit is best-effort: failures are logged via `tracing::warn!` and
    /// never fail the underlying dispatch. Leaving this unset (the default)
    /// preserves the legacy fire-and-forget behaviour.
    pub fn with_event_sink(mut self, event_sink: Arc<dyn EventSink>) -> Self {
        self.event_sink = Some(event_sink);
        self
    }

    /// Override the apiVersion stamped onto generated cell documents.
    pub fn with_api_version(mut self, api_version: impl Into<String>) -> Self {
        self.api_version = api_version.into();
        self
    }

    /// Override the default TTL (seconds) applied when the pack does not
    /// supply an `expires_at`.
    pub fn with_default_ttl_seconds(mut self, ttl: u64) -> Self {
        self.default_ttl_seconds = ttl;
        self
    }

    /// Override the doctrine-authority policy applied during translation.
    /// See ADR-0009 for the mapping table and override semantics.
    pub fn with_policy(mut self, policy: DoctrineAuthorityPolicy) -> Self {
        self.policy = policy;
        self
    }

    /// Translate a pack into an [`ExecutionCellDocument`] without dispatching.
    ///
    /// Useful for tests, dry-runs, and the audit path: a Cortex caller can
    /// inspect exactly which spec a pack would have produced before any host
    /// effects are committed.
    pub fn translate(&self, pack: &ContextPack) -> ContextPackTranslation {
        let cell_id = pack_to_cell_id(pack);
        let mut argv = self.agent_argv.clone();
        argv.push(pack.task.clone());

        let ttl = self.ttl_for_pack(pack);

        let mut spec = ExecutionCellSpec {
            id: cell_id,
            correlation: None,
            ingress: None,
            environment: None,
            placement: None,
            policy: None,
            identity: None,
            run: Some(RunSpec {
                argv,
                working_directory: None,
                timeout_ms: Some(ttl.saturating_mul(1000)),
                limits: None,
                secret_delivery: Default::default(),
            }),
            authority: AuthorityBundle::default(),
            lifetime: Lifetime { ttl_seconds: ttl },
            export: None,
            telemetry: None,
        };

        // ADR-0009: layer doctrine-driven authority constraints on top of
        // the structural translation. All mutations are monotonic toward
        // least authority; doctrine ids that match no rule are no-ops.
        apply_policy(pack, &mut spec, &self.policy);

        let document = ExecutionCellDocument {
            api_version: self.api_version.clone(),
            kind: self.kind.clone(),
            spec,
        };

        ContextPackTranslation {
            pack: pack.clone(),
            document,
        }
    }

    /// Dispatch a pack and return the submitter's outcome.
    ///
    /// On success, if an [`EventSink`] was wired via [`Self::with_event_sink`],
    /// the runner emits a single
    /// `dev.cellos.events.cell.cortex.v1.dispatched` CloudEvent recording the
    /// pack id, the supervisor-side cell id, and the doctrine refs cited by
    /// the pack. Emission is best-effort: a sink failure is logged at
    /// `warn` level and never converted into a dispatch failure — the cell
    /// has already been submitted by the time we get here, and the audit
    /// event must not become a critical-path dependency.
    pub async fn dispatch(
        &self,
        pack: &ContextPack,
    ) -> Result<CellSubmissionOutcome, anyhow::Error> {
        let translation = self.translate(pack);
        let outcome = self.submitter.submit(&translation.document).await?;

        if let Some(ref sink) = self.event_sink {
            let event = cloud_event_v1_cortex_dispatched(
                CORTEX_EVENT_SOURCE,
                &chrono::Utc::now().to_rfc3339(),
                &pack.memory_digest,
                &outcome.cell_id,
                &pack.doctrine_refs,
            );
            if let Err(e) = sink.emit(&event).await {
                tracing::warn!(
                    target: "cellos.cortex.runner",
                    cell_id = %outcome.cell_id,
                    error = %e,
                    "cortex_dispatched CloudEvent emit failed (best-effort, dispatch already succeeded)"
                );
            }
        }

        Ok(outcome)
    }

    /// Wait for a Cortex-dispatched cell to terminate and return its result.
    ///
    /// Resolution order (first success wins):
    ///
    /// 1. **In-memory lifecycle events** — if the submitter captured a
    ///    `cell.lifecycle.v1.destroyed` envelope on the `outcome` itself
    ///    (the in-process / sync-submitter path), build the result from that
    ///    immediately. No polling required.
    /// 2. **JSONL events file** — if `$CELL_OS_JSONL_EVENTS` is set, poll the
    ///    file at that path until a destroyed event for `outcome.cell_id`
    ///    appears or `timeout` elapses.
    ///
    /// `pack` is needed because the destroyed payload deliberately does not
    /// carry doctrine ids (the supervisor has no use for them) — the receiver
    /// threads them in from the dispatched pack so the returned result is
    /// audit-complete on its own.
    ///
    /// On timeout, returns `Err("cell result timeout after N seconds")`. On
    /// success, returns a fully-formed [`CortexCellResult`].
    ///
    /// # Blocking behaviour (red-team wave 2, HIGH-W2A-1)
    ///
    /// **This function is synchronous and may block the calling thread for
    /// the full `timeout` duration on the JSONL slow path.** The poll loop
    /// inside [`wait_for_result_from_jsonl`] uses `std::thread::sleep` and
    /// `std::fs::read_to_string`, neither of which yields to a tokio
    /// runtime.
    ///
    /// **Async callers MUST wrap this in [`tokio::task::spawn_blocking`]
    /// (or the equivalent on their runtime)** — calling it directly from a
    /// tokio worker thread will park that worker for the whole timeout,
    /// starving every other task scheduled on it. Under the
    /// `current_thread` flavour this deadlocks the entire runtime.
    ///
    /// The in-memory fast path (when `outcome.lifecycle_events` already
    /// contains a destroyed event) returns synchronously without polling,
    /// so callers who can guarantee the fast path is taken — synchronous
    /// submitters that capture lifecycle events inline — are safe to call
    /// directly.
    pub fn wait_for_result(
        &self,
        pack: &ContextPack,
        outcome: &CellSubmissionOutcome,
        timeout: Duration,
    ) -> Result<CortexCellResult, anyhow::Error> {
        // Fast path: synchronous submitters often capture the full lifecycle
        // tail inline. Use those events directly so we never block on disk.
        if !outcome.lifecycle_events.is_empty() {
            if let Some(result) = build_result_from_events(
                &outcome.cell_id,
                &pack.memory_digest,
                &pack.doctrine_refs,
                &outcome.lifecycle_events,
            ) {
                return Ok(result);
            }
        }

        // Slow path: poll the supervisor's JSONL event stream if one is
        // configured. Without a configured stream we cannot wait for an
        // out-of-process result, so fail fast and tell the operator what to
        // wire up rather than block silently.
        let Some(jsonl_path) = std::env::var(CELL_OS_JSONL_EVENTS_ENV)
            .ok()
            .map(PathBuf::from)
        else {
            anyhow::bail!(
                "wait_for_result: no in-memory lifecycle event captured and \
                 ${CELL_OS_JSONL_EVENTS_ENV} is unset — cannot observe cell result"
            );
        };

        let mut result = wait_for_result_from_jsonl(&outcome.cell_id, &jsonl_path, timeout)?;
        // The JSONL path has no way to learn doctrine refs (events don't carry
        // them); thread them in from the dispatched pack, and overwrite
        // pack_id from the same source so the result is consistent regardless
        // of which path produced it.
        result.doctrine_refs = pack.doctrine_refs.clone();
        result.pack_id = pack.memory_digest.clone();
        Ok(result)
    }

    fn ttl_for_pack(&self, pack: &ContextPack) -> u64 {
        let Some(expires_at_ms) = pack.expires_at else {
            return self.default_ttl_seconds;
        };
        let now_ms = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .map(|d| d.as_millis() as u64)
            .unwrap_or(0);
        if expires_at_ms <= now_ms {
            // Already expired — collapse to a minimal but non-zero TTL so the
            // supervisor still sees a valid spec and rejects/expires it
            // cleanly rather than parsing-fails on `ttlSeconds: 0`.
            return 1;
        }
        let remaining = (expires_at_ms - now_ms) / 1000;
        remaining.max(1).min(self.default_ttl_seconds)
    }
}

/// Deterministic cell id derived from the pack's task + digest.
///
/// Not cryptographically meaningful — purely a stable correlation key so the
/// same pack dispatched twice does not collide visibly in operator dashboards.
fn pack_to_cell_id(pack: &ContextPack) -> String {
    // Keep this simple and dependency-free: we don't need a real hash here,
    // the supervisor will enforce its own uniqueness. The first 12 chars of
    // the digest plus a short task fingerprint is fine.
    //
    // Both fragments are filtered to the cell-id grammar
    // (`[A-Za-z0-9._-]`) so the resulting id passes the validator in
    // `cellos_core::spec_validation` — canonical digests like
    // `"sha256:abc..."` carry a `:` that would otherwise be rejected at
    // admission time.
    let digest_prefix: String = pack
        .memory_digest
        .chars()
        .filter(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-'))
        .take(12)
        .collect();
    let task_prefix: String = pack
        .task
        .chars()
        .filter(|c| c.is_ascii_alphanumeric())
        .take(8)
        .collect::<String>()
        .to_lowercase();
    if digest_prefix.is_empty() && task_prefix.is_empty() {
        "cortex-cell".to_string()
    } else if digest_prefix.is_empty() {
        format!("cortex-{task_prefix}")
    } else if task_prefix.is_empty() {
        format!("cortex-{digest_prefix}")
    } else {
        format!("cortex-{digest_prefix}-{task_prefix}")
    }
}

/// Poll a CellOS JSONL CloudEvent stream until a
/// `cell.lifecycle.v1.destroyed` event for `cell_id` is observed (or the
/// timeout elapses) and project it into a [`CortexCellResult`].
///
/// This is the *standalone* variant of [`CortexCellRunner::wait_for_result`]
/// for callers that hold a cell id and a JSONL path but no runner instance
/// (e.g. an out-of-process auditor catching up to a finished cell). It does
/// not block on inotify — it re-reads the file on a short interval, which is
/// the only portable strategy that works against both file-backed and
/// rotated-during-read JSONL streams.
///
/// Pre-existing events in the file before this call are honoured, so a
/// caller that arrives after the cell finished still gets its result on the
/// first poll instead of timing out.
///
/// `doctrine_refs` cannot be recovered from a destroyed event (the supervisor
/// strips them); callers needing audit-complete results should overwrite the
/// field from their dispatched pack, as the runner method does. The standalone
/// path leaves it empty.
///
/// # Blocking behaviour (red-team wave 2, HIGH-W2A-1)
///
/// **This function blocks the calling thread for up to `timeout`.** Internally
/// it loops on `std::thread::sleep(25ms)` and `std::fs::read_to_string`,
/// neither of which yields to a tokio runtime.
///
/// **From async code, ALWAYS call via [`tokio::task::spawn_blocking`]:**
///
/// ```ignore
/// let result = tokio::task::spawn_blocking(move || {
///     wait_for_result_from_jsonl(&cell_id, &jsonl_path, timeout)
/// })
/// .await??;
/// ```
///
/// Direct calls from a tokio worker park that worker for the full timeout
/// and starve every other task scheduled on it. Under the `current_thread`
/// runtime flavour this deadlocks the runtime entirely.
pub fn wait_for_result_from_jsonl(
    cell_id: &str,
    jsonl_path: &Path,
    timeout: Duration,
) -> Result<CortexCellResult, anyhow::Error> {
    // Re-poll interval. Small enough that a synchronous test sees the result
    // almost immediately; large enough that we don't busy-spin a CPU when
    // the supervisor hasn't emitted yet.
    const POLL_INTERVAL: Duration = Duration::from_millis(25);

    let started = Instant::now();
    loop {
        // Read the whole file each pass. JSONL event streams in CellOS are
        // bounded (one cell run = a handful of events) so the cost is
        // negligible, and re-reading from byte 0 means we correctly handle
        // streams that the supervisor rotated or truncated mid-poll.
        if let Ok(contents) = std::fs::read_to_string(jsonl_path) {
            let events = parse_jsonl_events(&contents);
            if let Some(result) =
                build_result_from_events(cell_id, /* pack_id */ "", &[], &events)
            {
                return Ok(result);
            }
        }

        if started.elapsed() >= timeout {
            anyhow::bail!(
                "cell result timeout after {} seconds (cell_id={cell_id})",
                timeout.as_secs(),
            );
        }
        std::thread::sleep(POLL_INTERVAL);
    }
}

/// Parse a JSONL stream of CloudEvents into a `Vec<CloudEventV1>`. Lines that
/// fail to parse are skipped — the supervisor's JSONL stream is best-effort
/// and a malformed line should never block result reception for the lines
/// that *did* parse correctly.
fn parse_jsonl_events(contents: &str) -> Vec<CloudEventV1> {
    contents
        .lines()
        .filter(|line| !line.trim().is_empty())
        .filter_map(|line| serde_json::from_str::<CloudEventV1>(line).ok())
        .collect()
}

/// Build a [`CortexCellResult`] from a slice of CloudEvents.
///
/// Returns `None` when no `cell.lifecycle.v1.destroyed` event matches
/// `cell_id` — the caller is expected to keep polling or time out.
///
/// Field provenance:
///
/// - `exit_code` ← last matching `cell.command.v1.completed`'s `exitCode`,
///   falling back to `0` / `-1` from `success` when no command-completed
///   event is in the stream (forced teardown, host-spawn path without an
///   in-VM bridge).
/// - `success` ← destroyed event's `outcome == "succeeded"`.
/// - `lifecycle_destroyed_at` ← destroyed event's top-level `time` parsed as
///   RFC3339 → unix milliseconds. `0` when absent or unparseable.
/// - `export_paths` ← destination strings from every matching
///   `cell.export.v1.completed` (`destinationRelative`) and
///   `cell.export.v2.completed` (`receipt.destination`) event, preserving
///   emission order.
fn build_result_from_events(
    cell_id: &str,
    pack_id: &str,
    doctrine_refs: &[String],
    events: &[CloudEventV1],
) -> Option<CortexCellResult> {
    let destroyed = events.iter().rev().find(|ev| {
        ev.ty == LIFECYCLE_DESTROYED_TYPE
            && event_data_cell_id(ev)
                .map(|id| id == cell_id)
                .unwrap_or(false)
    })?;

    let success = destroyed
        .data
        .as_ref()
        .and_then(|d| d.get("outcome"))
        .and_then(|v| v.as_str())
        .map(|s| s == "succeeded")
        .unwrap_or(false);

    let lifecycle_destroyed_at = destroyed
        .time
        .as_deref()
        .and_then(parse_rfc3339_to_unix_ms)
        .unwrap_or(0);

    // Pick the most recent command-completed event for this cell. The
    // supervisor emits this between `started` and `destroyed` on the clean
    // exit path; on forced teardown it may be absent entirely.
    let exit_code = events
        .iter()
        .rev()
        .find(|ev| {
            ev.ty == COMMAND_COMPLETED_TYPE
                && event_data_cell_id(ev)
                    .map(|id| id == cell_id)
                    .unwrap_or(false)
        })
        .and_then(|ev| ev.data.as_ref())
        .and_then(|d| d.get("exitCode"))
        .and_then(|v| v.as_i64())
        .map(|n| n as i32)
        .unwrap_or(if success { 0 } else { -1 });

    let export_paths = events
        .iter()
        .filter(|ev| {
            (ev.ty == EXPORT_COMPLETED_V1_TYPE || ev.ty == EXPORT_COMPLETED_V2_TYPE)
                && event_data_cell_id(ev)
                    .map(|id| id == cell_id)
                    .unwrap_or(false)
        })
        .filter_map(|ev| {
            let data = ev.data.as_ref()?;
            // v1: `destinationRelative` at top level.
            if let Some(p) = data.get("destinationRelative").and_then(|v| v.as_str()) {
                return Some(p.to_string());
            }
            // v2: `receipt.destination` (or `receipt.destinationRelative` if
            // the receipt mirrors v1 naming). Accept either to stay tolerant
            // of receipt-shape evolution.
            let receipt = data.get("receipt")?;
            receipt
                .get("destination")
                .and_then(|v| v.as_str())
                .or_else(|| receipt.get("destinationRelative").and_then(|v| v.as_str()))
                .map(|s| s.to_string())
        })
        .collect();

    Some(CortexCellResult {
        cell_id: cell_id.to_string(),
        pack_id: pack_id.to_string(),
        exit_code,
        success,
        lifecycle_destroyed_at,
        export_paths,
        doctrine_refs: doctrine_refs.to_vec(),
    })
}

/// Extract `data.cellId` from a CloudEventV1, if present.
fn event_data_cell_id(ev: &CloudEventV1) -> Option<&str> {
    ev.data.as_ref()?.get("cellId")?.as_str()
}

/// Parse an RFC3339 timestamp string into unix milliseconds. Returns `None`
/// when the string fails to parse; callers default to `0` in that case.
fn parse_rfc3339_to_unix_ms(s: &str) -> Option<u64> {
    let parsed = chrono::DateTime::parse_from_rfc3339(s).ok()?;
    let ms = parsed.timestamp_millis();
    if ms < 0 {
        None
    } else {
        Some(ms as u64)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::Mutex;

    struct CapturingSubmitter {
        captured: Mutex<Vec<ExecutionCellDocument>>,
    }

    #[async_trait]
    impl CellSubmitter for CapturingSubmitter {
        async fn submit(
            &self,
            document: &ExecutionCellDocument,
        ) -> Result<CellSubmissionOutcome, anyhow::Error> {
            self.captured.lock().unwrap().push(document.clone());
            Ok(CellSubmissionOutcome {
                cell_id: document.spec.id.clone(),
                exit_code: Some(0),
                lifecycle_events: Vec::new(),
            })
        }
    }

    fn submitter() -> (Arc<CapturingSubmitter>, Arc<dyn CellSubmitter>) {
        let cap = Arc::new(CapturingSubmitter {
            captured: Mutex::new(Vec::new()),
        });
        let dyn_cap: Arc<dyn CellSubmitter> = cap.clone();
        (cap, dyn_cap)
    }

    #[test]
    fn translate_produces_runspec_with_task_appended_to_argv() {
        let (_cap, sub) = submitter();
        let runner =
            CortexCellRunner::new(sub, vec!["agent".into(), "--mode".into(), "run".into()]);
        let pack = ContextPack {
            memory_digest: "blake3:abcdef0123456789".into(),
            doctrine_refs: vec!["p1".into()],
            task: "do the thing".into(),
            expires_at: None,
        };
        let t = runner.translate(&pack);
        let run = t.document.spec.run.expect("runspec present");
        assert_eq!(run.argv, vec!["agent", "--mode", "run", "do the thing"]);
        assert_eq!(t.document.spec.lifetime.ttl_seconds, 300);
        assert!(t.document.spec.id.starts_with("cortex-"));
    }

    #[test]
    fn translate_clamps_ttl_to_default_when_pack_unbounded() {
        let (_cap, sub) = submitter();
        let runner = CortexCellRunner::new(sub, vec!["bin".into()]).with_default_ttl_seconds(42);
        let pack = ContextPack::new("t");
        let t = runner.translate(&pack);
        assert_eq!(t.document.spec.lifetime.ttl_seconds, 42);
    }

    #[test]
    fn translate_clamps_ttl_below_default_when_pack_expires_soon() {
        let (_cap, sub) = submitter();
        let runner = CortexCellRunner::new(sub, vec!["bin".into()]).with_default_ttl_seconds(1000);
        // expires_at = now + ~5s
        let now_ms = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap()
            .as_millis() as u64;
        let pack = ContextPack {
            memory_digest: String::new(),
            doctrine_refs: Vec::new(),
            task: "soon".into(),
            expires_at: Some(now_ms + 5_000),
        };
        let t = runner.translate(&pack);
        let ttl = t.document.spec.lifetime.ttl_seconds;
        assert!((1..=6).contains(&ttl), "ttl was {ttl}");
    }

    #[test]
    fn expired_pack_collapses_to_minimal_ttl() {
        let (_cap, sub) = submitter();
        let runner = CortexCellRunner::new(sub, vec!["bin".into()]);
        let pack = ContextPack {
            memory_digest: String::new(),
            doctrine_refs: Vec::new(),
            task: "stale".into(),
            expires_at: Some(0),
        };
        let t = runner.translate(&pack);
        assert_eq!(t.document.spec.lifetime.ttl_seconds, 1);
    }

    #[tokio::test]
    async fn dispatch_submits_translated_document() {
        let (cap, sub) = submitter();
        let runner = CortexCellRunner::new(sub, vec!["agent".into()]);
        let pack = ContextPack::new("ship it");
        let outcome = runner.dispatch(&pack).await.expect("dispatch ok");
        assert_eq!(outcome.exit_code, Some(0));
        let captured = cap.captured.lock().unwrap();
        assert_eq!(captured.len(), 1);
        assert_eq!(
            captured[0].spec.run.as_ref().unwrap().argv.last().unwrap(),
            "ship it"
        );
    }

    /// Capturing event sink for cortex-dispatched assertions.
    struct CaptureSink(std::sync::Mutex<Vec<CloudEventV1>>);

    #[async_trait]
    impl cellos_core::ports::EventSink for CaptureSink {
        async fn emit(&self, event: &CloudEventV1) -> Result<(), cellos_core::error::CellosError> {
            self.0.lock().unwrap().push(event.clone());
            Ok(())
        }
    }

    #[tokio::test]
    async fn dispatch_emits_cortex_dispatched_event_when_sink_wired() {
        let (_cap, sub) = submitter();
        let sink: Arc<CaptureSink> = Arc::new(CaptureSink(std::sync::Mutex::new(Vec::new())));
        let dyn_sink: Arc<dyn cellos_core::ports::EventSink> = sink.clone();
        let runner = CortexCellRunner::new(sub, vec!["agent".into()]).with_event_sink(dyn_sink);

        let pack = ContextPack {
            memory_digest: "pack-abc".into(),
            doctrine_refs: vec!["D1".into(), "D5".into()],
            task: "ship it".into(),
            expires_at: None,
        };
        let outcome = runner.dispatch(&pack).await.expect("dispatch ok");

        let events = sink.0.lock().unwrap();
        assert_eq!(events.len(), 1, "exactly one cortex_dispatched event");
        let ev = &events[0];
        assert_eq!(ev.ty, "dev.cellos.events.cell.cortex.v1.dispatched");
        assert_eq!(ev.source, CORTEX_EVENT_SOURCE);
        let data = ev.data.as_ref().expect("data present");
        assert_eq!(
            data.get("packId").and_then(|v| v.as_str()),
            Some("pack-abc")
        );
        assert_eq!(
            data.get("cellId").and_then(|v| v.as_str()),
            Some(outcome.cell_id.as_str())
        );
        let refs = data
            .get("doctrineRefs")
            .and_then(|v| v.as_array())
            .expect("doctrineRefs array");
        assert_eq!(refs.len(), 2);
        assert_eq!(refs[0].as_str(), Some("D1"));
        assert_eq!(refs[1].as_str(), Some("D5"));
    }

    #[tokio::test]
    async fn dispatch_without_sink_does_not_emit() {
        // Default constructor leaves event_sink = None — must remain
        // legacy fire-and-forget for callers that haven't opted in.
        let (_cap, sub) = submitter();
        let runner = CortexCellRunner::new(sub, vec!["agent".into()]);
        let pack = ContextPack::new("no-sink");
        let outcome = runner.dispatch(&pack).await.expect("dispatch ok");
        assert_eq!(outcome.exit_code, Some(0));
        // No sink to assert against; the test just verifies the no-sink
        // path stays a hot zero-cost no-op (no panic, no error).
    }

    // ── CortexCellResult / wait_for_result tests ─────────────────────────

    /// Build a CloudEventV1 with arbitrary `ty` / `data` for tests.
    fn ev(ty: &str, time: Option<&str>, data: serde_json::Value) -> CloudEventV1 {
        CloudEventV1 {
            specversion: "1.0".into(),
            id: format!("test-{ty}"),
            source: "test".into(),
            ty: ty.into(),
            datacontenttype: Some("application/json".into()),
            data: Some(data),
            time: time.map(|s| s.to_string()),
            traceparent: None,
        }
    }

    /// CHK[4]: a JSONL file that never grows a destroyed event must produce
    /// a timeout error from `wait_for_result_from_jsonl`.
    #[test]
    fn wait_for_result_timeout_returns_err() {
        let dir = tempdir_for_test();
        let path = dir.join("events.jsonl");
        // Pre-write an unrelated event so the file exists and parses; the
        // poll loop must still time out because no destroyed event for the
        // target cell appears.
        std::fs::write(
            &path,
            serde_json::to_string(&ev(
                "dev.cellos.events.cell.lifecycle.v1.started",
                Some("2026-01-01T00:00:00Z"),
                serde_json::json!({ "cellId": "other-cell" }),
            ))
            .unwrap()
                + "\n",
        )
        .unwrap();

        let err = wait_for_result_from_jsonl("target-cell", &path, Duration::from_millis(150))
            .expect_err("must time out");
        let msg = format!("{err}");
        assert!(
            msg.contains("timeout") && msg.contains("target-cell"),
            "unexpected error message: {msg}"
        );
    }

    /// CHK[5]: a JSONL file containing a destroyed event for the target cell
    /// — plus a command-completed event with a real exit code and two export
    /// receipts — must parse cleanly into a `CortexCellResult`.
    #[test]
    fn wait_for_result_parses_destroyed_event() {
        let dir = tempdir_for_test();
        let path = dir.join("events.jsonl");

        let lines: Vec<String> = [
            // An unrelated cell's destroyed event — must be ignored.
            ev(
                LIFECYCLE_DESTROYED_TYPE,
                Some("2026-01-01T00:00:00Z"),
                serde_json::json!({ "cellId": "noise", "outcome": "failed" }),
            ),
            // Export receipts for the target cell (v1 + v2 shapes).
            ev(
                EXPORT_COMPLETED_V1_TYPE,
                Some("2026-01-01T00:00:01Z"),
                serde_json::json!({
                    "cellId": "target-cell",
                    "destinationRelative": "out/report.md"
                }),
            ),
            ev(
                EXPORT_COMPLETED_V2_TYPE,
                Some("2026-01-01T00:00:02Z"),
                serde_json::json!({
                    "cellId": "target-cell",
                    "receipt": { "destination": "out/audit.json" }
                }),
            ),
            // Authenticated exit code for the target cell.
            ev(
                COMMAND_COMPLETED_TYPE,
                Some("2026-01-01T00:00:03Z"),
                serde_json::json!({
                    "cellId": "target-cell",
                    "exitCode": 0,
                    "durationMs": 1234,
                    "argv": ["agent", "do-thing"]
                }),
            ),
            // Destroyed event for the target cell.
            ev(
                LIFECYCLE_DESTROYED_TYPE,
                Some("2026-01-01T00:00:04Z"),
                serde_json::json!({
                    "cellId": "target-cell",
                    "outcome": "succeeded"
                }),
            ),
        ]
        .iter()
        .map(|e| serde_json::to_string(e).unwrap())
        .collect();

        std::fs::write(&path, lines.join("\n") + "\n").unwrap();

        let result = wait_for_result_from_jsonl("target-cell", &path, Duration::from_secs(2))
            .expect("destroyed event must parse");

        assert_eq!(result.cell_id, "target-cell");
        assert!(result.success);
        assert_eq!(result.exit_code, 0);
        // 2026-01-01T00:00:04Z = 1767225604000ms.
        assert_eq!(result.lifecycle_destroyed_at, 1_767_225_604_000);
        assert_eq!(
            result.export_paths,
            vec!["out/report.md".to_string(), "out/audit.json".to_string()]
        );
        // Standalone JSONL path: doctrine_refs left empty (no source).
        assert!(result.doctrine_refs.is_empty());
        // Standalone JSONL path: pack_id left empty (no source).
        assert_eq!(result.pack_id, "");
    }

    /// Bonus coverage: the runner's `wait_for_result` should short-circuit
    /// off `outcome.lifecycle_events` when those are present, without
    /// touching the filesystem at all.
    #[test]
    fn wait_for_result_uses_inline_events_fast_path() {
        let (_cap, sub) = submitter();
        let runner = CortexCellRunner::new(sub, vec!["agent".into()]);

        let pack = ContextPack {
            memory_digest: "pack-xyz".into(),
            doctrine_refs: vec!["D5".into()],
            task: "ship it".into(),
            expires_at: None,
        };

        let outcome = CellSubmissionOutcome {
            cell_id: "inline-cell".into(),
            exit_code: Some(0),
            lifecycle_events: vec![
                ev(
                    COMMAND_COMPLETED_TYPE,
                    Some("2026-01-01T00:00:03Z"),
                    serde_json::json!({
                        "cellId": "inline-cell",
                        "exitCode": 7,
                        "durationMs": 1,
                        "argv": ["agent"]
                    }),
                ),
                ev(
                    LIFECYCLE_DESTROYED_TYPE,
                    Some("2026-01-01T00:00:04Z"),
                    serde_json::json!({
                        "cellId": "inline-cell",
                        "outcome": "failed"
                    }),
                ),
            ],
        };

        let result = runner
            .wait_for_result(&pack, &outcome, Duration::from_millis(10))
            .expect("inline path must succeed without polling");

        assert_eq!(result.cell_id, "inline-cell");
        assert_eq!(result.pack_id, "pack-xyz");
        assert!(!result.success);
        assert_eq!(result.exit_code, 7);
        assert_eq!(result.doctrine_refs, vec!["D5".to_string()]);
    }

    /// Helper: per-test temp directory under `std::env::temp_dir()`. We don't
    /// pull in the `tempfile` crate just for two tests — the workspace
    /// already depends on `chrono` for nanos and the directory leaks at most
    /// a few JSONL bytes on a panicking test, which CI cleans up.
    fn tempdir_for_test() -> std::path::PathBuf {
        let nanos = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        let dir =
            std::env::temp_dir().join(format!("cellos-cortex-wait-{nanos}-{}", std::process::id()));
        std::fs::create_dir_all(&dir).unwrap();
        dir
    }
}