car-inference 0.23.0

//! Proactive model concierge — turns "your hardware can run a great model for
//! X, but you have none installed" into a single, non-nagging suggestion.
//!
//! Sibling of [`crate::nudge`]: the upgrade nudge handles the *installed →
//! newer* case; the concierge handles the *acquisition gap* — a use-case lane
//! with no working model at all. Both are pure decision functions the daemon
//! drives on its periodic tick, and both reuse [`NudgeState`] for throttle +
//! dismissal bookkeeping and the [`UpdatePolicy`] `Off` switch, so the two
//! together stay quiet rather than each nagging on its own schedule. Keeping
//! the decision here makes it pure and unit-testable: the caller injects
//! `now_secs` and `throttle_secs`.
//!
//! Rules:
//! - `policy == Off` → do nothing.
//! - Never interrupt active inference — defer to a later, idle tick.
//! - A use-case lane is "served" when any model the recommender ranks for it
//!   is already installed; served lanes never produce a suggestion. This is
//!   what keeps the concierge distinct from the upgrade nudge — it only fires
//!   when the user has *nothing* for a lane, never to push a marginal upgrade.
//! - Only on-device, memory-fitting picks are suggested — an acquisition the
//!   user can act on immediately, with no cloud consent and no "needs more
//!   RAM" caveat.
//! - At most one suggestion round per `throttle_secs` (shared with the nudge),
//!   and a suggestion the user dismissed is never repeated.

use serde::{Deserialize, Serialize};

use crate::hardware::HardwareInfo;
use crate::intent::{Privacy, QualityTier, UseCase};
use crate::nudge::NudgeState;
use crate::recommend::{recommend, FitStatus, Recommendation};
use crate::schema::ModelSchema;
use crate::update_prefs::{UpdatePolicy, UpdatePreferences};

/// A single, plain-language acquisition suggestion for one unserved lane.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ConciergeSuggestion {
    /// The use-case lane that has no installed model.
    pub use_case: UseCase,
    /// Registry id of the model to acquire — the recommender's top on-device,
    /// memory-fitting pick for this lane. The caller pulls by this.
    pub model_id: String,
    /// Human-readable name shown to the user.
    pub display_name: String,
    /// Download size in MB.
    pub download_mb: u64,
    /// One-line, jargon-free message: "You have no model for … — install it?".
    pub message: String,
    /// Stable key the client echoes back to dismiss this suggestion. Encodes
    /// the lane and target model, so a *different* later pick can still fire
    /// even after this one is dismissed.
    pub dismiss_key: String,
}

/// Default throttle: at most one acquisition suggestion per week. Deliberately
/// rarer than the upgrade nudge's daily cadence — "you have nothing for X" is a
/// bigger, more deliberate prompt than "a newer version exists", and should not
/// nag. Independent of the nudge throttle (separate `NudgeState` field).
pub const DEFAULT_CONCIERGE_THROTTLE_SECS: u64 = 7 * 24 * 60 * 60;

/// The default lanes the concierge watches when the caller passes none.
///
/// Core interactive uses that almost every user benefits from — deliberately
/// NOT the specialist lanes (`Vision` / `Transcription` / `Search`), which
/// many users never touch; suggesting those unprompted would nag. A caller
/// that knows the user's actual interests should pass them explicitly.
pub const DEFAULT_WATCHED_USE_CASES: &[UseCase] = &[UseCase::Assistant, UseCase::Coding];

/// Stable string slug for a use-case lane. Used in the persisted dismiss key,
/// so it MUST be stable across releases — never `{:?}`/`Debug`, which is not a
/// stability contract (a variant rename would silently un-dismiss). Exhaustive
/// so a new lane is a compile error here, not a silently-changed key.
fn use_case_slug(use_case: UseCase) -> &'static str {
    match use_case {
        UseCase::Assistant => "assistant",
        UseCase::Coding => "coding",
        UseCase::Summarize => "summarize",
        UseCase::Vision => "vision",
        UseCase::Transcription => "transcription",
        UseCase::Search => "search",
    }
}

/// Stable dismiss key for a (lane, target) pair. The `concierge:` prefix keeps
/// the namespace disjoint from the upgrade nudge's bare `from=>to` keys, which
/// share the same `NudgeState.dismissed` Vec.
fn dismiss_key(use_case: UseCase, model_id: &str) -> String {
    format!("concierge:{}=>{model_id}", use_case_slug(use_case))
}

/// Decide which acquisition suggestions to surface, given the registry,
/// hardware, lanes to watch, prefs, and bookkeeping. Pure over its inputs.
///
/// Does **not** mutate `state` — the caller records `last_nudge_secs` when it
/// actually surfaces a suggestion (so a dropped one can re-fire), and appends
/// to `dismissed` when the user waves one away.
#[allow(clippy::too_many_arguments)]
pub fn decide_concierge(
    models: &[&ModelSchema],
    hw: &HardwareInfo,
    use_cases: &[UseCase],
    tier: QualityTier,
    prefs: &UpdatePreferences,
    state: &NudgeState,
    now_secs: u64,
    throttle_secs: u64,
    inference_active: bool,
) -> Vec<ConciergeSuggestion> {
    // Defer entirely while inference is running (memory/compute contention) or
    // when the user has turned proactive prompts off.
    if inference_active || matches!(prefs.policy, UpdatePolicy::Off) {
        return Vec::new();
    }

    // Throttle: at most one round per window, off the concierge's OWN
    // `last_concierge_secs` — independent of the upgrade nudge's
    // `last_nudge_secs`. The two have different cadences ("you have nothing"
    // should be rarer and more deliberate than "newer exists") and must not
    // starve each other: sharing one field let whichever ran first each tick
    // burn the window forever.
    let throttled = state.last_concierge_secs != 0
        && now_secs.saturating_sub(state.last_concierge_secs) < throttle_secs;
    if throttled {
        return Vec::new();
    }

    let mut out = Vec::new();
    for &use_case in use_cases {
        // On-device only — a suggestion the user can act on now.
        let set = recommend(models, hw, use_case, tier, Privacy::OnDevice);

        // Lane already served by an installed model → nothing to suggest. This
        // is the line that separates "you have nothing" (concierge) from
        // "something better exists" (upgrade nudge).
        if set.picks.iter().any(|p| p.already_installed) {
            continue;
        }

        // The acquisition: first on-device, memory-fitting, not-installed pick.
        let pick = match set
            .picks
            .iter()
            .find(|p| !p.already_installed && p.is_local && p.fit == FitStatus::Fits)
        {
            Some(p) => p,
            None => continue,
        };

        let key = dismiss_key(use_case, &pick.model_id);
        if state.dismissed.iter().any(|k| k == &key) {
            continue;
        }

        out.push(ConciergeSuggestion {
            use_case,
            model_id: pick.model_id.clone(),
            display_name: pick.display_name.clone(),
            download_mb: pick.download_mb,
            message: suggestion_message(use_case, pick),
            dismiss_key: key,
        });
    }
    out
}

/// One plain-language line. No model ids, quant, or repo jargon — exhaustive
/// over `UseCase` so a new lane is a compile error here, not a silent gap.
fn suggestion_message(use_case: UseCase, pick: &Recommendation) -> String {
    let purpose = match use_case {
        UseCase::Assistant => "chat & general help",
        UseCase::Coding => "coding",
        UseCase::Summarize => "summarizing",
        UseCase::Vision => "understanding images",
        UseCase::Transcription => "transcription",
        UseCase::Search => "semantic search",
    };
    let mb = pick.download_mb;
    let size = if mb >= 1024 {
        format!("{:.1} GB", mb as f64 / 1024.0)
    } else {
        format!("{mb} MB")
    };
    format!(
        "You have no model for {purpose}. {} fits your machine ({size}) — install it?",
        pick.display_name
    )
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::hardware::GpuBackend;
    use crate::schema::{
        CostModel, ModelCapability, ModelSchema, ModelSource, PerformanceEnvelope, TrustTier,
    };

    // A local model schema parameterized by id, install state, download size,
    // and capabilities. `available == installed`. Mirrors recommend.rs's test
    // builder so the recommender ranks it the same way.
    fn model(id: &str, installed: bool, download_mb: u64, caps: &[ModelCapability]) -> ModelSchema {
        ModelSchema {
            id: id.into(),
            name: id.into(),
            provider: "qwen".into(),
            family: "qwen3".into(),
            version: String::new(),
            capabilities: caps.to_vec(),
            context_length: 32768,
            param_count: "4B".into(),
            quantization: Some("Q4_K_M".into()),
            performance: PerformanceEnvelope::default(),
            cost: CostModel {
                size_mb: Some(download_mb),
                ram_mb: Some(download_mb),
                ..Default::default()
            },
            source: ModelSource::Local {
                hf_repo: "x/y".into(),
                hf_filename: "m.gguf".into(),
                tokenizer_repo: "x/y".into(),
            },
            tags: vec![],
            supported_params: vec![],
            public_benchmarks: vec![],
            trust_tier: TrustTier::Curated,
            deprecated: false,
            available: installed,
        }
    }

    fn prefs(policy: UpdatePolicy) -> UpdatePreferences {
        UpdatePreferences {
            policy,
            ..Default::default()
        }
    }

    // 16 GB Apple Silicon (Metal ⇒ Apple tier) so everything here fits.
    fn hw() -> HardwareInfo {
        HardwareInfo {
            os: "test".into(),
            arch: "test".into(),
            cpu_cores: 8,
            total_ram_mb: 16_384,
            gpu_backend: GpuBackend::Metal,
            gpu_memory_mb: None,
            gpu_devices: vec![],
            recommended_model: String::new(),
            recommended_context: 4096,
            max_model_mb: 0,
        }
    }

    #[test]
    fn off_policy_suggests_nothing() {
        let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
        let refs: Vec<&ModelSchema> = m.iter().collect();
        let out = decide_concierge(
            &refs,
            &hw(),
            &[UseCase::Assistant],
            QualityTier::Balanced,
            &prefs(UpdatePolicy::Off),
            &NudgeState::default(),
            100,
            10,
            false,
        );
        assert!(out.is_empty());
    }

    #[test]
    fn active_inference_defers() {
        let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
        let refs: Vec<&ModelSchema> = m.iter().collect();
        let out = decide_concierge(
            &refs,
            &hw(),
            &[UseCase::Assistant],
            QualityTier::Balanced,
            &prefs(UpdatePolicy::Notify),
            &NudgeState::default(),
            100,
            10,
            true,
        );
        assert!(out.is_empty());
    }

    #[test]
    fn suggests_when_lane_unserved() {
        let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
        let refs: Vec<&ModelSchema> = m.iter().collect();
        let out = decide_concierge(
            &refs,
            &hw(),
            &[UseCase::Assistant],
            QualityTier::Balanced,
            &prefs(UpdatePolicy::Notify),
            &NudgeState::default(),
            100,
            10,
            false,
        );
        assert_eq!(out.len(), 1);
        assert_eq!(out[0].use_case, UseCase::Assistant);
        assert_eq!(out[0].model_id, "chat-a");
        assert!(out[0].message.contains("install it?"));
    }

    #[test]
    fn served_lane_suggests_nothing() {
        // An installed model for the lane → no acquisition suggestion.
        let m = [model("chat-a", true, 2000, &[ModelCapability::Generate])];
        let refs: Vec<&ModelSchema> = m.iter().collect();
        let out = decide_concierge(
            &refs,
            &hw(),
            &[UseCase::Assistant],
            QualityTier::Balanced,
            &prefs(UpdatePolicy::Notify),
            &NudgeState::default(),
            100,
            10,
            false,
        );
        assert!(out.is_empty());
    }

    #[test]
    fn dismissed_suggestion_not_repeated() {
        let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
        let refs: Vec<&ModelSchema> = m.iter().collect();
        let mut state = NudgeState::default();
        state.dismiss(&dismiss_key(UseCase::Assistant, "chat-a"));
        let out = decide_concierge(
            &refs,
            &hw(),
            &[UseCase::Assistant],
            QualityTier::Balanced,
            &prefs(UpdatePolicy::Notify),
            &state,
            100,
            10,
            false,
        );
        assert!(out.is_empty());
    }

    #[test]
    fn throttled_within_window() {
        let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
        let refs: Vec<&ModelSchema> = m.iter().collect();
        let state = NudgeState {
            last_concierge_secs: 95,
            ..Default::default()
        };
        // now=100, last=95, throttle=10 → still throttled.
        let out = decide_concierge(
            &refs,
            &hw(),
            &[UseCase::Assistant],
            QualityTier::Balanced,
            &prefs(UpdatePolicy::Notify),
            &state,
            100,
            10,
            false,
        );
        assert!(out.is_empty());
    }

    #[test]
    fn not_throttled_by_upgrade_nudge_field() {
        // Regression guard for the shared-throttle starvation bug: a recent
        // *upgrade nudge* (last_nudge_secs) must NOT throttle the concierge,
        // which throttles only on its own last_concierge_secs.
        let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
        let refs: Vec<&ModelSchema> = m.iter().collect();
        let state = NudgeState {
            last_nudge_secs: 99, // upgrade nudge fired one tick ago
            last_concierge_secs: 0, // concierge never has
            ..Default::default()
        };
        let out = decide_concierge(
            &refs,
            &hw(),
            &[UseCase::Assistant],
            QualityTier::Balanced,
            &prefs(UpdatePolicy::Notify),
            &state,
            100,
            10,
            false,
        );
        assert_eq!(out.len(), 1, "concierge must fire regardless of the upgrade nudge's window");
    }

    #[test]
    fn dismiss_key_is_stable_slug_not_debug() {
        // The persisted key must use the stable slug, not Debug formatting.
        assert_eq!(dismiss_key(UseCase::Assistant, "chat-a"), "concierge:assistant=>chat-a");
        assert_eq!(dismiss_key(UseCase::Coding, "code-x"), "concierge:coding=>code-x");
    }
}