car-inference 0.22.0

//! Upstream-aware upgrade detection.
//!
//! The registry's `available_upgrades()` only fires on hand-authored rules in
//! `model-upgrades.json` — the *curated*, verified tier. This module unifies
//! that with *upstream* discovery: for an installed model it can ask the Hub
//! whether a newer revision exists. Findings are tagged by trust tier and
//! source so the UI (and auto-apply policy) can treat verified curated
//! upgrades differently from unverified upstream ones.
//!
//! Properties required by the design:
//! - **Channel-aware**: upstream probing runs only on the `Latest` channel;
//!   `Stable` is curated-only.
//! - **Offline-safe**: any probe error (no network, Hub down, rate limit)
//!   degrades silently to curated-only — never an error to the caller.
//! - **Cached / rate-limited**: upstream results are cached with a TTL so we
//!   don't hit the Hub on every check.
//!
//! The probe is a trait so the orchestration is unit-testable without a
//! network (inject a fake), and the real Hub implementation stays thin.

use std::future::Future;
use std::path::{Path, PathBuf};

use serde::{Deserialize, Serialize};

use crate::registry::ModelUpgrade;
use crate::schema::{ModelSchema, ModelSource, TrustTier};
use crate::update_prefs::{UpdateChannel, UpdatePreferences};

/// Where an upgrade finding came from.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum UpgradeSource {
    /// A vetted rule in `model-upgrades.json`.
    Curated,
    /// A newer revision discovered upstream on the Hub. Unverified.
    Upstream,
}

/// A single "something newer is available" result, unifying curated rules and
/// upstream discoveries.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct UpgradeFinding {
    pub from_id: String,
    pub from_name: String,
    pub to_id: String,
    pub to_name: String,
    /// Plain-language reason to show the user.
    pub reason: String,
    /// `Curated` (verified) or `Community` (upstream, unverified).
    pub trust_tier: TrustTier,
    pub source: UpgradeSource,
    /// Whether CAR can pull the target directly (local/MLX) vs needs setup.
    pub target_pullable: bool,
}

impl UpgradeFinding {
    fn from_curated(u: ModelUpgrade) -> Self {
        UpgradeFinding {
            from_id: u.from_id,
            from_name: u.from_name,
            to_id: u.to_id,
            to_name: u.to_name,
            reason: u.reason,
            trust_tier: TrustTier::Curated,
            source: UpgradeSource::Curated,
            target_pullable: u.target_pullable,
        }
    }
}

/// Asks whether a newer upstream revision exists for an installed model.
/// Implementations must be offline-safe: return `None` on any failure rather
/// than erroring.
pub trait UpstreamProbe {
    /// `Some(reason)` if a newer revision exists upstream; `None` if not, or
    /// if it can't be determined (offline, uncached, error).
    fn newer_revision(&self, schema: &ModelSchema) -> impl Future<Output = Option<String>> + Send;
}

/// Cached upstream findings with a freshness timestamp, persisted so repeated
/// checks within the TTL don't hit the Hub.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct UpgradeCache {
    /// Unix seconds of the last successful upstream check.
    #[serde(default)]
    pub checked_at_secs: u64,
    /// Fingerprint of the installed-model set the cache was built for. If the
    /// user installs/removes a model the fingerprint changes, invalidating the
    /// cache so the new model gets probed before the TTL expires.
    #[serde(default)]
    pub models_fingerprint: String,
    #[serde(default)]
    pub upstream: Vec<UpgradeFinding>,
}

impl UpgradeCache {
    pub fn default_path() -> PathBuf {
        std::env::var("HOME")
            .map(PathBuf::from)
            .unwrap_or_else(|_| PathBuf::from("."))
            .join(".car")
            .join("upgrade-cache.json")
    }

    pub fn load_from(path: &Path) -> Self {
        std::fs::read_to_string(path)
            .ok()
            .and_then(|s| serde_json::from_str(&s).ok())
            .unwrap_or_default()
    }

    pub fn save_to(&self, path: &Path) -> Result<(), String> {
        if let Some(parent) = path.parent() {
            std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
        }
        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
        std::fs::write(path, json).map_err(|e| e.to_string())
    }

    /// Fresh if the last check was within `ttl_secs` of `now_secs`.
    pub fn is_fresh(&self, now_secs: u64, ttl_secs: u64) -> bool {
        self.checked_at_secs != 0 && now_secs.saturating_sub(self.checked_at_secs) < ttl_secs
    }
}

/// Default cache TTL: re-probe the Hub at most once a day.
pub const DEFAULT_TTL_SECS: u64 = 24 * 60 * 60;

/// Detect upgrades for `installed` models, combining curated rules with
/// upstream discovery. Pure-ish: caller supplies the curated rules, the probe,
/// the cache path, and `now_secs`, so it's fully testable offline.
///
/// - Curated findings are always included (the trusted tier).
/// - Upstream probing runs only when `prefs.channel == Latest`, is cached for
///   `ttl_secs`, and degrades to the cached/empty set on any probe failure.
pub async fn detect_upgrades<P: UpstreamProbe>(
    curated: Vec<ModelUpgrade>,
    installed: &[&ModelSchema],
    prefs: &UpdatePreferences,
    probe: &P,
    cache_path: &Path,
    now_secs: u64,
    ttl_secs: u64,
) -> Vec<UpgradeFinding> {
    let mut findings: Vec<UpgradeFinding> =
        curated.into_iter().map(UpgradeFinding::from_curated).collect();

    if prefs.channel == UpdateChannel::Latest && prefs.checks_enabled() {
        let upstream = upstream_findings(installed, probe, cache_path, now_secs, ttl_secs).await;
        // Dedup: a curated rule for the same from_id wins (it's verified).
        for f in upstream {
            if !findings.iter().any(|c| c.from_id == f.from_id) {
                findings.push(f);
            }
        }
    }

    findings.sort_by(|a, b| a.from_id.cmp(&b.from_id).then(a.to_id.cmp(&b.to_id)));
    findings.dedup_by(|a, b| a.from_id == b.from_id && a.to_id == b.to_id);
    findings
}

/// Upstream findings, served from cache when fresh, else re-probed and cached.
async fn upstream_findings<P: UpstreamProbe>(
    installed: &[&ModelSchema],
    probe: &P,
    cache_path: &Path,
    now_secs: u64,
    ttl_secs: u64,
) -> Vec<UpgradeFinding> {
    let fingerprint = installed_fingerprint(installed);
    let cache = UpgradeCache::load_from(cache_path);
    // Serve the cache only when it's both fresh AND built for the same
    // installed set — so installing a new model re-probes immediately.
    if cache.is_fresh(now_secs, ttl_secs) && cache.models_fingerprint == fingerprint {
        return cache.upstream;
    }

    // Probes run sequentially (one Hub request at a time) and only on a
    // cache miss (≤ once per TTL window), so the Hub request rate is inherently
    // bounded by the installed-model count, not the call rate.
    let mut found = Vec::new();
    for schema in installed {
        // Only locally-installed models with a Hub repo can have an upstream.
        if !schema.available || repo_of(schema).is_none() {
            continue;
        }
        if let Some(reason) = probe.newer_revision(schema).await {
            found.push(UpgradeFinding {
                from_id: schema.id.clone(),
                from_name: schema.name.clone(),
                // Upstream = same model line, newer revision; target is the
                // same id (re-pull refreshes the cache to the new revision).
                to_id: schema.id.clone(),
                to_name: schema.name.clone(),
                reason,
                trust_tier: TrustTier::Community,
                source: UpgradeSource::Upstream,
                target_pullable: matches!(
                    schema.source,
                    ModelSource::Local { .. } | ModelSource::Mlx { .. }
                ),
            });
        }
    }

    // Persist (best-effort; a write failure must not break detection).
    // Empty results are cached too — a fresh empty cache suppresses re-probing.
    let _ = UpgradeCache {
        checked_at_secs: now_secs,
        models_fingerprint: fingerprint,
        upstream: found.clone(),
    }
    .save_to(cache_path);
    found
}

/// Stable fingerprint of the installed-model set (sorted ids of available
/// models). Changes when a model is installed or removed.
fn installed_fingerprint(installed: &[&ModelSchema]) -> String {
    use std::collections::hash_map::DefaultHasher;
    use std::hash::{Hash, Hasher};
    let mut ids: Vec<&str> = installed
        .iter()
        .filter(|m| m.available)
        .map(|m| m.id.as_str())
        .collect();
    ids.sort_unstable();
    let mut h = DefaultHasher::new();
    ids.hash(&mut h);
    format!("{:x}", h.finish())
}

/// The Hub repo for a model, if it has one.
fn repo_of(schema: &ModelSchema) -> Option<&str> {
    match &schema.source {
        ModelSource::Local { hf_repo, .. } | ModelSource::Mlx { hf_repo, .. } => Some(hf_repo),
        _ => None,
    }
}

// --- real Hub probe --------------------------------------------------------

/// Probes the HuggingFace Hub for a newer commit than the one cached locally.
/// Compares the locally cached `refs/main` sha against the repo's current sha
/// from the Hub model-info API. Fully offline-safe: any error → `None`.
pub struct HuggingFaceProbe {
    client: reqwest::Client,
}

impl Default for HuggingFaceProbe {
    fn default() -> Self {
        Self::new()
    }
}

impl HuggingFaceProbe {
    pub fn new() -> Self {
        let client = reqwest::Client::builder()
            .timeout(std::time::Duration::from_secs(8))
            .build()
            .unwrap_or_default();
        HuggingFaceProbe { client }
    }

    async fn remote_sha(&self, repo: &str) -> Option<String> {
        let url = format!("https://huggingface.co/api/models/{repo}");
        let resp = self.client.get(&url).send().await.ok()?;
        if !resp.status().is_success() {
            return None;
        }
        let json: serde_json::Value = resp.json().await.ok()?;
        json.get("sha")?.as_str().map(|s| s.to_string())
    }
}

impl UpstreamProbe for HuggingFaceProbe {
    async fn newer_revision(&self, schema: &ModelSchema) -> Option<String> {
        let repo = repo_of(schema)?;
        let local_sha = local_main_sha(repo)?; // not cached ⇒ can't compare
        let remote_sha = self.remote_sha(repo).await?; // offline ⇒ None
        if remote_sha != local_sha {
            Some(format!(
                "A newer revision of {repo} is available on Hugging Face."
            ))
        } else {
            None
        }
    }
}

/// Read the locally cached `refs/main` sha for a Hub repo, if present.
fn local_main_sha(repo: &str) -> Option<String> {
    let cache_root = std::env::var("HF_HOME")
        .map(PathBuf::from)
        .unwrap_or_else(|_| {
            std::env::var("HOME")
                .map(PathBuf::from)
                .unwrap_or_else(|_| PathBuf::from("."))
                .join(".cache")
                .join("huggingface")
        })
        .join("hub");
    let ref_path = cache_root
        .join(format!("models--{}", repo.replace('/', "--")))
        .join("refs")
        .join("main");
    std::fs::read_to_string(ref_path)
        .ok()
        .map(|s| s.trim().to_string())
        .filter(|s| !s.is_empty())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::schema::{CostModel, ModelCapability, PerformanceEnvelope};

    fn local_schema(id: &str, available: bool) -> ModelSchema {
        ModelSchema {
            id: id.into(),
            name: id.into(),
            provider: "qwen".into(),
            family: "qwen3".into(),
            version: String::new(),
            capabilities: vec![ModelCapability::Generate],
            context_length: 8192,
            param_count: "4B".into(),
            quantization: None,
            performance: PerformanceEnvelope::default(),
            cost: CostModel::default(),
            source: ModelSource::Local {
                hf_repo: format!("org/{id}"),
                hf_filename: "m.gguf".into(),
                tokenizer_repo: format!("org/{id}"),
            },
            tags: vec![],
            supported_params: vec![],
            public_benchmarks: vec![],
            trust_tier: TrustTier::Curated,
            deprecated: false,
            available,
        }
    }

    struct FakeProbe {
        newer: bool,
    }
    impl UpstreamProbe for FakeProbe {
        async fn newer_revision(&self, _schema: &ModelSchema) -> Option<String> {
            if self.newer {
                Some("newer upstream".into())
            } else {
                None
            }
        }
    }

    /// A probe that panics if called — proves we didn't hit the network.
    struct NeverProbe;
    impl UpstreamProbe for NeverProbe {
        async fn newer_revision(&self, _schema: &ModelSchema) -> Option<String> {
            panic!("probe must not be called");
        }
    }

    fn tmp_cache(tag: &str) -> PathBuf {
        std::env::temp_dir().join(format!("car-upgrade-{tag}-{}.json", std::process::id()))
    }

    #[tokio::test]
    async fn stable_channel_is_curated_only_and_never_probes() {
        let prefs = UpdatePreferences::default(); // Stable
        let installed = local_schema("qwen3-4b", true);
        let cache = tmp_cache("stable");
        let findings = detect_upgrades(
            vec![],
            &[&installed],
            &prefs,
            &NeverProbe, // would panic if probed
            &cache,
            1000,
            DEFAULT_TTL_SECS,
        )
        .await;
        assert!(findings.is_empty());
        let _ = std::fs::remove_file(&cache);
    }

    #[tokio::test]
    async fn latest_channel_adds_upstream_findings() {
        let prefs = UpdatePreferences {
            channel: UpdateChannel::Latest,
            ..Default::default()
        };
        let installed = local_schema("qwen3-4b", true);
        let cache = tmp_cache("latest");
        let _ = std::fs::remove_file(&cache);
        let findings = detect_upgrades(
            vec![],
            &[&installed],
            &prefs,
            &FakeProbe { newer: true },
            &cache,
            1000,
            DEFAULT_TTL_SECS,
        )
        .await;
        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].source, UpgradeSource::Upstream);
        assert_eq!(findings[0].trust_tier, TrustTier::Community);
        let _ = std::fs::remove_file(&cache);
    }

    #[tokio::test]
    async fn uninstalled_models_are_not_probed() {
        let prefs = UpdatePreferences {
            channel: UpdateChannel::Latest,
            ..Default::default()
        };
        let installed = local_schema("qwen3-4b", false); // not installed
        let cache = tmp_cache("uninstalled");
        let _ = std::fs::remove_file(&cache);
        let findings = detect_upgrades(
            vec![],
            &[&installed],
            &prefs,
            &NeverProbe, // skipped before probe because !available
            &cache,
            1000,
            DEFAULT_TTL_SECS,
        )
        .await;
        assert!(findings.is_empty());
        let _ = std::fs::remove_file(&cache);
    }

    #[tokio::test]
    async fn fresh_cache_is_served_without_probing() {
        let prefs = UpdatePreferences {
            channel: UpdateChannel::Latest,
            ..Default::default()
        };
        let installed = local_schema("qwen3-4b", true);
        let cache = tmp_cache("fresh");
        // Seed a fresh cache with a finding, matching the installed-set
        // fingerprint so it isn't invalidated.
        UpgradeCache {
            checked_at_secs: 1000,
            models_fingerprint: installed_fingerprint(&[&installed]),
            upstream: vec![UpgradeFinding {
                from_id: "qwen3-4b".into(),
                from_name: "qwen3-4b".into(),
                to_id: "qwen3-4b".into(),
                to_name: "qwen3-4b".into(),
                reason: "cached".into(),
                trust_tier: TrustTier::Community,
                source: UpgradeSource::Upstream,
                target_pullable: true,
            }],
        }
        .save_to(&cache)
        .unwrap();
        // now within TTL of checked_at ⇒ NeverProbe must not be called.
        let findings = detect_upgrades(
            vec![],
            &[&installed],
            &prefs,
            &NeverProbe,
            &cache,
            1500,
            DEFAULT_TTL_SECS,
        )
        .await;
        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].reason, "cached");
        let _ = std::fs::remove_file(&cache);
    }

    #[tokio::test]
    async fn fresh_cache_for_a_different_model_set_is_invalidated() {
        // A fresh cache built for a DIFFERENT installed set must not be served;
        // the newly installed model has to be probed.
        let prefs = UpdatePreferences {
            channel: UpdateChannel::Latest,
            ..Default::default()
        };
        let installed = local_schema("qwen3-8b", true); // different model
        let cache = tmp_cache("fingerprint");
        UpgradeCache {
            checked_at_secs: 1000,
            models_fingerprint: "stale-different-set".into(),
            upstream: vec![],
        }
        .save_to(&cache)
        .unwrap();
        let findings = detect_upgrades(
            vec![],
            &[&installed],
            &prefs,
            &FakeProbe { newer: true }, // must be called → fingerprint mismatch
            &cache,
            1500,
            DEFAULT_TTL_SECS,
        )
        .await;
        assert_eq!(findings.len(), 1, "stale-fingerprint cache must re-probe");
        let _ = std::fs::remove_file(&cache);
    }

    #[tokio::test]
    async fn curated_wins_over_upstream_for_same_model() {
        let prefs = UpdatePreferences {
            channel: UpdateChannel::Latest,
            ..Default::default()
        };
        let installed = local_schema("qwen3-4b", true);
        let cache = tmp_cache("dedup");
        let _ = std::fs::remove_file(&cache);
        let curated = vec![ModelUpgrade {
            from_id: "qwen3-4b".into(),
            from_name: "qwen3-4b".into(),
            to_id: "qwen3-8b".into(),
            to_name: "qwen3-8b".into(),
            reason: "curated replacement".into(),
            target_runtime: None,
            target_runtime_requirement: None,
            minimum_runtimes: vec![],
            target_available: true,
            target_pullable: true,
            remove_old_supported: true,
        }];
        let findings = detect_upgrades(
            curated,
            &[&installed],
            &prefs,
            &FakeProbe { newer: true },
            &cache,
            1000,
            DEFAULT_TTL_SECS,
        )
        .await;
        // Only the curated finding for qwen3-4b; upstream for same from_id dropped.
        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].source, UpgradeSource::Curated);
        let _ = std::fs::remove_file(&cache);
    }
}