car-reason 0.18.0

//! `ReasoningInferenceHandle` — narrow trait surface for the engine
//! capabilities a `ReasoningSession` reaches beyond what
//! `InferenceHandle` already covers.
//!
//! Motivation: pre-#189, `ReasoningSession` held `Arc<InferenceEngine>`
//! directly and called three engine internals that `InferenceHandle`
//! (added in #188) intentionally omits:
//!
//! - `engine.generate_tracked(req)` — the rich variant of `generate`
//!   that returns `InferenceResult` (trace_id, model_used, latency_ms)
//!   instead of bare text. The session records each action's trace
//!   in its `ActionOutcome` for the post-hoc learning loop.
//! - `engine.unified_registry.find_by_name(name)` — used by
//!   `pick_model_for_tier` to verify a tier-candidate model is
//!   actually registered before naming it in a generation request.
//! - `engine.outcome_tracker().write().await` →
//!   `resolve_pending_from_signals` — the post-action learning step
//!   that writes back inferred outcomes for each action's trace.
//!
//! That direct coupling meant `cmd_reason` had to construct a full
//! in-process `InferenceEngine` even when the daemon was reachable —
//! the v0.7 holdover #186 was trying to eliminate.
//!
//! This trait is `InferenceHandle + Send + Sync` with three extra
//! methods, scoped to what car-reason actually uses. Two
//! implementations:
//!
//! 1. **In-process** — `impl ReasoningInferenceHandle for InferenceEngine`
//!    in this crate. Delegates to existing engine methods. Behavior
//!    matches the pre-refactor path exactly so existing tests against
//!    the in-process engine continue to pass.
//! 2. **Daemon-routed** — `impl ReasoningInferenceHandle for
//!    DaemonInferenceHandle` lives in `car-cli/src/daemon_handle.rs`
//!    next to the existing `InferenceHandle` impl from #188.
//!    `generate_tracked` reuses the daemon's `infer` JSON-RPC method
//!    (which already returns the full `InferenceResult` shape).
//!    `find_model_by_name` lazily caches a `models.list_unified`
//!    snapshot. `record_inferred_outcomes` is best-effort — the
//!    daemon doesn't yet have an `outcomes.resolve_pending` endpoint,
//!    so the daemon path no-ops (with an opt-in `eprintln!` gated by
//!    the `CAR_REASON_DEBUG` env var) and returns successfully.
//!
//! Tracked in Parslee-ai/car#189.

use std::sync::Arc;

use car_inference::{
    GenerateRequest, InferenceEngine, InferenceError, InferenceHandle, InferenceResult, ModelInfo,
};

/// Inference operations a `ReasoningSession` needs beyond the
/// `InferenceHandle` core (generate + embed).
///
/// Implementors are `Send + Sync` because the session holds the
/// handle in an `Arc` and may dispatch from `tokio::spawn` tasks.
#[async_trait::async_trait]
pub trait ReasoningInferenceHandle: InferenceHandle {
    /// Run a generation request and return the rich `InferenceResult`
    /// (text + trace_id + model_used + latency_ms). Same contract as
    /// `InferenceEngine::generate_tracked`; the session records the
    /// trace_id and latency into each `ActionOutcome` so the post-hoc
    /// learning loop can correlate outcomes back to model picks.
    async fn generate_tracked(
        &self,
        req: GenerateRequest,
    ) -> Result<InferenceResult, InferenceError>;

    /// Look up a model in the unified registry by name. Returns
    /// `None` when the model isn't registered (the tier-picker in
    /// `executor::pick_model_for_tier` uses this to skip candidates
    /// whose name isn't known to the engine before naming them).
    ///
    /// Async because the daemon impl lazily fetches the
    /// `models.list_unified` snapshot on first call — making this
    /// async lets the daemon path do that without a blocking-from-
    /// async-context shortcut. The in-process impl is still
    /// constant-time underneath (HashMap lookup) and just wraps the
    /// answer in a ready future.
    async fn find_model_by_name(&self, name: &str) -> Option<ModelInfo>;

    /// Record post-hoc inferred outcomes for a sequence of action
    /// traces. The caller passes the flat
    /// `(trace_id, success, confidence, output)` tuples produced by
    /// the session; the implementation runs the inference rules
    /// (`OutcomeTracker::infer_outcomes_from_action_sequence`, which
    /// is pure — it doesn't read tracker state) and writes the
    /// results back.
    ///
    /// In-process impl takes the write lock on the engine's
    /// `outcome_tracker` and calls `resolve_pending_from_signals`
    /// with the inferred outcomes.
    ///
    /// Daemon impl is best-effort: no corresponding JSON-RPC method
    /// exists yet (would need `outcomes.resolve_pending`), so it
    /// emits a debug log and returns `Ok(())`. The reasoning result
    /// itself is unaffected — this learning loop is a refinement,
    /// not a correctness gate. Tracked as a daemon-side follow-up.
    async fn record_inferred_outcomes(
        &self,
        action_results: Vec<(String, bool, f64, String)>,
    ) -> Result<(), InferenceError>;
}

#[async_trait::async_trait]
impl ReasoningInferenceHandle for InferenceEngine {
    async fn generate_tracked(
        &self,
        req: GenerateRequest,
    ) -> Result<InferenceResult, InferenceError> {
        InferenceEngine::generate_tracked(self, req).await
    }

    async fn find_model_by_name(&self, name: &str) -> Option<ModelInfo> {
        self.unified_registry
            .find_by_name(name)
            .map(ModelInfo::from)
    }

    async fn record_inferred_outcomes(
        &self,
        action_results: Vec<(String, bool, f64, String)>,
    ) -> Result<(), InferenceError> {
        let mut tracker = self.outcome_tracker.write().await;
        let inferred = tracker.infer_outcomes_from_action_sequence(&action_results);
        tracker.resolve_pending_from_signals(inferred);
        Ok(())
    }
}

/// Convenience alias for the common shape `ReasoningSession::new`
/// expects. Callers usually have an `Arc<InferenceEngine>` or
/// `Arc<DaemonInferenceHandle>`; both coerce to this.
pub type SharedReasoningHandle = Arc<dyn ReasoningInferenceHandle>;