car-inference 0.24.1

Local model inference for CAR — Candle backend with Qwen3 models
//! Provider model auto-discovery (Phase E2).
//!
//! Keeps the catalog current *without a release*: queries a remote provider's
//! model list (OpenAI `/v1/models`) and registers previously-unknown
//! chat/reasoning models as [`TrustTier::Community`] entries. Each discovered
//! model clones the schema of a curated same-provider model as a template (the
//! caller picks the most-capable one), so routing metadata (capabilities,
//! endpoint, key env, protocol) is inherited and the actual quality is learned
//! over time via outcomes.
//!
//! Community trust means discovered models are *routable* (incl. under the
//! `prefer_quality` workload) but never background-auto-applied — a deliberate
//! curated/discovered split. The HTTP fetch is the only impure part; the
//! filter and schema-mapping are pure and unit-tested below.

use std::path::{Path, PathBuf};

use serde::{Deserialize, Serialize};

use crate::schema::{CostModel, ModelSchema, TrustTier};

/// Cached discovered models, loaded into the registry at startup (alongside the
/// built-in and signed catalogs). Unsigned/best-effort: a corrupt file is
/// ignored, never fatal.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct DiscoveryDoc {
    #[serde(default)]
    pub models: Vec<ModelSchema>,
}

/// Where discovered models are cached, given the models dir.
pub fn cache_path(models_dir: &Path) -> PathBuf {
    models_dir.join("discovered_models.json")
}

/// Load cached discovered models. Best-effort: returns empty on missing/corrupt.
pub fn load_cache(path: &Path) -> Vec<ModelSchema> {
    std::fs::read_to_string(path)
        .ok()
        .and_then(|s| serde_json::from_str::<DiscoveryDoc>(&s).ok())
        .map(|d| d.models)
        .unwrap_or_default()
}

/// Persist discovered models (overwrites; discovery is idempotent).
pub fn save_cache(path: &Path, models: &[ModelSchema]) -> Result<(), String> {
    let doc = DiscoveryDoc { models: models.to_vec() };
    let json = serde_json::to_string_pretty(&doc).map_err(|e| e.to_string())?;
    std::fs::write(path, json).map_err(|e| e.to_string())
}

/// Heuristic: is this OpenAI model id a chat/reasoning *generation* model, vs.
/// an embedding/audio/image/moderation endpoint we don't route generations to?
///
/// Allow `gpt-*`, `chatgpt-*`, and `o<digit>*` (o3, o4-mini, …); deny anything
/// whose id names a non-generation modality. Conservative-but-permissive: a new
/// `gpt-5.5` passes; `text-embedding-3-large`, `gpt-4o-audio`, `dall-e-3`,
/// `whisper-1`, `omni-moderation-*` are rejected.
pub fn is_chat_model(id: &str) -> bool {
    let id = id.to_ascii_lowercase();
    let is_gen_family = id.starts_with("gpt-")
        || id.starts_with("chatgpt-")
        || (id.starts_with('o') && id[1..].chars().next().is_some_and(|c| c.is_ascii_digit()));
    if !is_gen_family {
        return false;
    }
    const DENY: &[&str] = &[
        "embedding", "audio", "realtime", "transcribe", "tts", "image",
        "moderation", "search", "dall", "whisper", "instruct",
    ];
    !DENY.iter().any(|d| id.contains(d))
}

/// Derive a provider's `/models` listing URL from a chat/completions endpoint
/// by truncating at the `/v1` API-version segment. Returns None if there's no
/// recognizable version segment to anchor on.
pub fn models_url_from_endpoint(endpoint: &str) -> Option<String> {
    let marker = "/v1";
    let idx = endpoint.find(marker)?;
    let base = &endpoint[..idx + marker.len()];
    Some(format!("{base}/models"))
}

/// Build a Community-tier schema for a newly-discovered model by cloning a
/// template (a curated same-provider model the caller selects — see
/// `discover_models`, which picks the most-capable one) and overriding
/// identity + trust. Pricing is cleared (unknown until curated/learned); the
/// router damps Community quality and treats unknown cost as non-free, and
/// observed quality is learned over time.
pub fn discovered_schema(provider: &str, bare_id: &str, template: &ModelSchema) -> ModelSchema {
    let mut s = template.clone();
    s.id = format!("{provider}/{bare_id}:latest");
    s.name = bare_id.to_string();
    s.version = bare_id.to_string();
    s.trust_tier = TrustTier::Community;
    s.cost = CostModel::default();
    s.public_benchmarks = vec![];
    s.deprecated = false;
    s.available = false; // refresh_availability() decides at registration
    s
}

/// Fetch the provider's model ids from its `/models` endpoint (OpenAI shape:
/// `{"data":[{"id":...}, ...]}`). Impure (network); kept thin so the logic
/// above stays testable.
pub async fn fetch_model_ids(
    http: &reqwest::Client,
    models_url: &str,
    api_key: &str,
) -> Result<Vec<String>, String> {
    let resp = http
        .get(models_url)
        .bearer_auth(api_key)
        .send()
        .await
        .map_err(|e| format!("discovery fetch: {e}"))?;
    if !resp.status().is_success() {
        return Err(format!("discovery fetch: HTTP {}", resp.status()));
    }
    let body: serde_json::Value =
        resp.json().await.map_err(|e| format!("discovery parse: {e}"))?;
    Ok(body
        .get("data")
        .and_then(|d| d.as_array())
        .map(|arr| {
            arr.iter()
                .filter_map(|m| m.get("id").and_then(|i| i.as_str()).map(String::from))
                .collect()
        })
        .unwrap_or_default())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn chat_models_pass_non_generation_rejected() {
        for ok in ["gpt-5.5", "gpt-5.5-mini", "gpt-4.1", "o3", "o4-mini", "chatgpt-4o-latest"] {
            assert!(is_chat_model(ok), "{ok} should be a chat model");
        }
        for no in [
            "text-embedding-3-large", "gpt-4o-audio-preview", "gpt-realtime",
            "dall-e-3", "whisper-1", "omni-moderation-latest", "gpt-4o-transcribe",
            "tts-1", "gpt-image-1", "babbage-002",
        ] {
            assert!(!is_chat_model(no), "{no} should be rejected");
        }
    }

    #[test]
    fn models_url_derives_from_chat_endpoint() {
        assert_eq!(
            models_url_from_endpoint("https://api.openai.com/v1/chat/completions").as_deref(),
            Some("https://api.openai.com/v1/models"),
        );
        assert_eq!(
            models_url_from_endpoint("https://api.openai.com/v1/responses").as_deref(),
            Some("https://api.openai.com/v1/models"),
        );
        assert_eq!(models_url_from_endpoint("https://example.com/openai"), None);
    }

    #[test]
    fn discovered_schema_inherits_template_and_marks_community() {
        // Use the real gpt-5.4 entry as the template (clone-and-override).
        let catalog: Vec<ModelSchema> =
            serde_json::from_str(include_str!("builtin_catalog.json")).unwrap();
        let template = catalog
            .iter()
            .find(|m| m.id == "openai/gpt-5.4:latest")
            .expect("gpt-5.4 present in builtin catalog");

        let s = discovered_schema("openai", "gpt-5.5", template);
        assert_eq!(s.id, "openai/gpt-5.5:latest");
        assert_eq!(s.name, "gpt-5.5");
        assert_eq!(s.provider, "openai"); // inherited from template
        assert_eq!(s.trust_tier, TrustTier::Community);
        assert!(s.cost.input_per_mtok.is_none()); // pricing cleared (unknown)
        assert!(!s.available);
        assert!(!s.capabilities.is_empty()); // capabilities inherited
    }
}