crtx 0.1.0 - Docs.rs

//! `cortex models list` — discover locally available LLM models.
//!
//! ## Subcommand surface
//!
//! ```text
//! cortex models list [--backend ollama|openai-compat] [--json]
//! ```
//!
//! When `--backend ollama` (the default when Ollama is configured): issues a
//! `GET {endpoint}/api/tags` request to the Ollama HTTP API and renders the
//! returned model list. Fields rendered:
//! `name`, `size`, `parameter_size`, `quantization_level`.
//!
//! When `--backend openai-compat`: issues a `GET {base_url}/v1/models`
//! request using the OpenAI-compatible models endpoint. Only `id` is available
//! from that API; size and parameter fields are absent.
//!
//! ## Backend auto-selection
//!
//! When `--backend` is omitted the command auto-selects based on the resolved
//! `[llm]` config:
//!
//! - `backend = "ollama"` → Ollama path.
//! - `backend = "openai-compat"` → OpenAI-compat path.
//! - `backend = "offline"` / no `[llm]` section → prints a guidance message
//!   and exits `0`. No network request is made.
//!
//! ## Configured-for annotation
//!
//! The human-readable table marks which model is currently configured as the
//! reflection backend (`← reflection`) and which, if any, is the embeddings
//! backend (`← embeddings`). The JSON output expresses this as
//! `"configured_for": ["reflection"]` on the matching model entry.
//!
//! ## No authentication
//!
//! Neither Ollama `/api/tags` nor OpenAI `/v1/models` require an API key when
//! running locally. This command deliberately does not read or transmit
//! credentials.

use clap::{Args, Subcommand, ValueEnum};
use serde::Serialize;
use tracing::debug;

use crate::config::{LlmBackend, DEFAULT_OLLAMA_ENDPOINT};
use crate::exit::Exit;
use crate::output::{self, Envelope};

// ──────────────────────────────────────────────────────────────────────────────
// Public CLI types
// ──────────────────────────────────────────────────────────────────────────────

/// Top-level `cortex models` subcommand dispatcher.
#[derive(Debug, Clone, Subcommand)]
pub enum ModelsSub {
    /// List models available from the configured (or specified) LLM backend.
    List(ListArgs),
}

/// `cortex models list` arguments.
#[derive(Debug, Clone, Args)]
pub struct ListArgs {
    /// Override the LLM backend to query. When omitted the resolved `[llm]`
    /// config section is used. Accepted values: `ollama`, `openai-compat`.
    #[arg(long = "backend", value_name = "BACKEND")]
    pub backend: Option<BackendChoice>,
}

/// Selectable backend values for `--backend`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum BackendChoice {
    /// Query a local Ollama instance via `GET {endpoint}/api/tags`.
    Ollama,
    /// Query an OpenAI-compatible server via `GET {base_url}/v1/models`.
    #[value(name = "openai-compat")]
    OpenAiCompat,
}

// ──────────────────────────────────────────────────────────────────────────────
// Wire format types (deserialized from API responses)
// ──────────────────────────────────────────────────────────────────────────────

#[derive(Debug, serde::Deserialize)]
struct OllamaTagsResponse {
    models: Vec<OllamaModelEntry>,
}

#[derive(Debug, serde::Deserialize)]
struct OllamaModelEntry {
    name: String,
    #[serde(default)]
    digest: String,
    #[serde(default)]
    size: u64,
    #[serde(default)]
    details: Option<OllamaModelDetails>,
}

#[derive(Debug, serde::Deserialize)]
struct OllamaModelDetails {
    #[serde(default)]
    parameter_size: String,
    #[serde(default)]
    quantization_level: String,
}

#[derive(Debug, serde::Deserialize)]
struct OpenAiModelsResponse {
    data: Vec<OpenAiModelEntry>,
}

#[derive(Debug, serde::Deserialize)]
struct OpenAiModelEntry {
    id: String,
    /// Wire-format field; present in the response but unused beyond deserialization.
    #[serde(default)]
    #[allow(dead_code)]
    object: String,
}

// ──────────────────────────────────────────────────────────────────────────────
// JSON output types (serialized to the operator)
// ──────────────────────────────────────────────────────────────────────────────

/// JSON report emitted by `cortex models list --json`.
#[derive(Debug, Serialize)]
pub struct ModelsListReport {
    /// Which backend was queried.
    pub backend: String,
    /// Effective endpoint / base URL that was contacted.
    pub endpoint: String,
    /// Discovered models.
    pub models: Vec<ModelEntry>,
}

/// A single model entry in the JSON report.
#[derive(Debug, Serialize)]
pub struct ModelEntry {
    /// Model name as returned by the backend.
    pub name: String,
    /// Content-digest of the model blob, empty string when unavailable.
    pub digest: String,
    /// Raw size in bytes; `0` when unavailable (OpenAI-compat).
    pub size_bytes: u64,
    /// Human-readable parameter count string (e.g. `"8B"`), empty when
    /// unavailable.
    pub parameter_size: String,
    /// Quantization label (e.g. `"Q4_0"`), empty when unavailable.
    pub quantization: String,
    /// Which roles this model is currently configured for in `cortex.toml`.
    /// Possible values: `"reflection"`.
    pub configured_for: Vec<String>,
}

// ──────────────────────────────────────────────────────────────────────────────
// Entry point
// ──────────────────────────────────────────────────────────────────────────────

/// Run `cortex models <sub>`.
pub fn run(sub: ModelsSub) -> Exit {
    match sub {
        ModelsSub::List(args) => run_list(args),
    }
}

fn run_list(args: ListArgs) -> Exit {
    // Resolve the effective backend. When `--backend` is given explicitly it
    // overrides config; otherwise we inspect the resolved `[llm]` config.
    let llm_backend = LlmBackend::resolve();

    match args.backend {
        Some(BackendChoice::Ollama) => {
            // Explicit --backend ollama: use resolved endpoint or the default.
            let endpoint = ollama_endpoint_from_backend(&llm_backend);
            let reflection_model = reflection_model_from_backend(&llm_backend);
            run_ollama_list(&endpoint, reflection_model.as_deref())
        }
        Some(BackendChoice::OpenAiCompat) => {
            let base_url = openai_compat_base_url_from_backend(&llm_backend);
            let reflection_model = reflection_model_from_backend(&llm_backend);
            run_openai_compat_list(&base_url, reflection_model.as_deref())
        }
        None => {
            // Auto-select from config.
            match &llm_backend {
                LlmBackend::Ollama { endpoint, .. } => {
                    let endpoint = endpoint.clone();
                    let reflection_model = reflection_model_from_backend(&llm_backend);
                    run_ollama_list(&endpoint, reflection_model.as_deref())
                }
                LlmBackend::OpenAiCompat { base_url, .. } => {
                    let base_url = base_url.clone();
                    let reflection_model = reflection_model_from_backend(&llm_backend);
                    run_openai_compat_list(&base_url, reflection_model.as_deref())
                }
                LlmBackend::Claude { .. } => {
                    // Claude is a remote API — this command is for local model
                    // discovery only.
                    eprintln!(
                        "cortex models list: backend 'claude' does not expose a local model list."
                    );
                    eprintln!("  Use --backend ollama or --backend openai-compat to query a local server.");
                    Exit::Usage
                }
                LlmBackend::Offline => {
                    print_no_backend_guidance();
                    Exit::Ok
                }
            }
        }
    }
}

// ──────────────────────────────────────────────────────────────────────────────
// Ollama path
// ──────────────────────────────────────────────────────────────────────────────

fn run_ollama_list(endpoint: &str, reflection_model: Option<&str>) -> Exit {
    let url = format!("{endpoint}/api/tags");
    debug!(url, "querying Ollama tags endpoint");

    let response = match ureq::get(&url).call() {
        Ok(r) => r,
        Err(ureq::Error::Status(code, resp)) => {
            let body = resp.into_string().unwrap_or_default();
            eprintln!("cortex models list: Ollama returned HTTP {code}: {body}");
            return Exit::Internal;
        }
        Err(ureq::Error::Transport(t)) => {
            eprintln!("cortex models list: could not reach Ollama at {endpoint}: {t}");
            eprintln!("  Is Ollama running? Try: ollama serve");
            return Exit::PreconditionUnmet;
        }
    };

    let tags: OllamaTagsResponse = match response.into_json() {
        Ok(t) => t,
        Err(err) => {
            eprintln!("cortex models list: failed to parse Ollama response: {err}");
            return Exit::Internal;
        }
    };

    let models: Vec<ModelEntry> = tags
        .models
        .into_iter()
        .map(|m| {
            let (parameter_size, quantization) = m
                .details
                .as_ref()
                .map(|d| (d.parameter_size.clone(), d.quantization_level.clone()))
                .unwrap_or_default();

            let configured_for = configured_for_roles(&m.name, reflection_model);

            ModelEntry {
                name: m.name,
                digest: m.digest,
                size_bytes: m.size,
                parameter_size,
                quantization,
                configured_for,
            }
        })
        .collect();

    let report = ModelsListReport {
        backend: "ollama".to_string(),
        endpoint: endpoint.to_string(),
        models,
    };

    if output::json_enabled() {
        let envelope = Envelope::new("cortex.models.list", Exit::Ok, &report);
        return output::emit(&envelope, Exit::Ok);
    }

    print_ollama_table(&report, reflection_model);
    Exit::Ok
}

// ──────────────────────────────────────────────────────────────────────────────
// OpenAI-compat path
// ──────────────────────────────────────────────────────────────────────────────

fn run_openai_compat_list(base_url: &str, reflection_model: Option<&str>) -> Exit {
    let url = format!("{base_url}/v1/models");
    debug!(url, "querying OpenAI-compat models endpoint");

    let response = match ureq::get(&url).call() {
        Ok(r) => r,
        Err(ureq::Error::Status(code, resp)) => {
            let body = resp.into_string().unwrap_or_default();
            eprintln!("cortex models list: OpenAI-compat server returned HTTP {code}: {body}");
            return Exit::Internal;
        }
        Err(ureq::Error::Transport(t)) => {
            eprintln!(
                "cortex models list: could not reach OpenAI-compat server at {base_url}: {t}"
            );
            eprintln!("  Is the server running?");
            return Exit::PreconditionUnmet;
        }
    };

    let api_resp: OpenAiModelsResponse = match response.into_json() {
        Ok(r) => r,
        Err(err) => {
            eprintln!("cortex models list: failed to parse OpenAI-compat response: {err}");
            return Exit::Internal;
        }
    };

    let models: Vec<ModelEntry> = api_resp
        .data
        .into_iter()
        .map(|m| {
            let configured_for = configured_for_roles(&m.id, reflection_model);
            ModelEntry {
                name: m.id,
                digest: String::new(),
                size_bytes: 0,
                parameter_size: String::new(),
                quantization: String::new(),
                configured_for,
            }
        })
        .collect();

    let report = ModelsListReport {
        backend: "openai-compat".to_string(),
        endpoint: base_url.to_string(),
        models,
    };

    if output::json_enabled() {
        let envelope = Envelope::new("cortex.models.list", Exit::Ok, &report);
        return output::emit(&envelope, Exit::Ok);
    }

    print_openai_compat_table(&report, reflection_model);
    Exit::Ok
}

// ──────────────────────────────────────────────────────────────────────────────
// Human-readable rendering
// ──────────────────────────────────────────────────────────────────────────────

fn print_ollama_table(report: &ModelsListReport, reflection_model: Option<&str>) {
    println!("Available models (ollama @ {})\n", report.endpoint);

    if report.models.is_empty() {
        println!("  (no models found — run `ollama pull <model>` to download one)");
        println!();
        print_ollama_tip(&report.endpoint);
        return;
    }

    // Column widths: NAME is padded to min 30.
    let name_width = report
        .models
        .iter()
        .map(|m| m.name.len())
        .max()
        .unwrap_or(0)
        .max(30);

    println!(
        "  {:<name_width$}  {:<8}  {:<8}  {:<14}  CONFIGURED",
        "NAME",
        "SIZE",
        "PARAMS",
        "QUANTIZATION",
        name_width = name_width,
    );

    for m in &report.models {
        let size_str = if m.size_bytes == 0 {
            "-".to_string()
        } else {
            format_bytes(m.size_bytes)
        };
        let params = if m.parameter_size.is_empty() {
            "-"
        } else {
            &m.parameter_size
        };
        let quant = if m.quantization.is_empty() {
            "-"
        } else {
            &m.quantization
        };
        let configured = format_configured(&m.configured_for);

        println!(
            "  {:<name_width$}  {:<8}  {:<8}  {:<14}  {}",
            m.name,
            size_str,
            params,
            quant,
            configured,
            name_width = name_width,
        );
    }

    println!();
    print_ollama_tip(&report.endpoint);

    if let Some(model) = reflection_model {
        // Strip @sha256:<digest> for the tip.
        let display = model
            .split_once("@sha256:")
            .map(|(name, _)| name)
            .unwrap_or(model);
        println!("\nCurrently configured reflection model: {display}");
    }
}

fn print_openai_compat_table(report: &ModelsListReport, reflection_model: Option<&str>) {
    println!("Available models (openai-compat @ {})\n", report.endpoint);

    if report.models.is_empty() {
        println!("  (no models found)");
        return;
    }

    let name_width = report
        .models
        .iter()
        .map(|m| m.name.len())
        .max()
        .unwrap_or(0)
        .max(30);

    println!(
        "  {:<name_width$}  CONFIGURED",
        "NAME",
        name_width = name_width,
    );

    for m in &report.models {
        let configured = format_configured(&m.configured_for);
        println!(
            "  {:<name_width$}  {}",
            m.name,
            configured,
            name_width = name_width,
        );
    }

    if let Some(model) = reflection_model {
        println!("\nCurrently configured reflection model: {model}");
    }
}

fn print_ollama_tip(endpoint: &str) {
    let _ = endpoint; // endpoint is shown in the header already
    println!("Tip: set as default in cortex.toml:");
    println!("  [llm.ollama]");
    println!("  model = \"<name>@sha256:<digest>\"");
}

fn print_no_backend_guidance() {
    println!("No LLM backend configured. Add to cortex.toml:\n");
    println!("  [llm]");
    println!("  backend = \"ollama\"\n");
    println!("  [llm.ollama]");
    println!("  endpoint = \"http://localhost:11434\"");
    println!("  model    = \"llama3.1:8b@sha256:<64-hex-digit-digest>\"");
}

// ──────────────────────────────────────────────────────────────────────────────
// Helpers
// ──────────────────────────────────────────────────────────────────────────────

/// Produce the list of roles a model is configured for.
///
/// At present only `"reflection"` is tracked (the `[llm.ollama].model`
/// value). The name match is done against the full configured string as well
/// as the name-only prefix before `@sha256:` so that
/// `llama3.1:8b@sha256:<digest>` matches the Ollama-returned tag `llama3.1:8b`.
fn configured_for_roles(model_name: &str, reflection_model: Option<&str>) -> Vec<String> {
    let mut roles = Vec::new();
    if let Some(ref_model) = reflection_model {
        let ref_name = ref_model
            .split_once("@sha256:")
            .map(|(n, _)| n)
            .unwrap_or(ref_model);
        if model_name == ref_model || model_name == ref_name {
            roles.push("reflection".to_string());
        }
    }
    roles
}

fn format_configured(roles: &[String]) -> String {
    if roles.is_empty() {
        String::new()
    } else {
        format!("\u{2190} {}", roles.join(", "))
    }
}

fn format_bytes(bytes: u64) -> String {
    const GB: u64 = 1_000_000_000;
    const MB: u64 = 1_000_000;
    const KB: u64 = 1_000;
    if bytes >= GB {
        format!("{:.1}GB", bytes as f64 / GB as f64)
    } else if bytes >= MB {
        format!("{:.0}MB", bytes as f64 / MB as f64)
    } else if bytes >= KB {
        format!("{:.0}KB", bytes as f64 / KB as f64)
    } else {
        format!("{bytes}B")
    }
}

/// Extract the Ollama endpoint from the resolved `LlmBackend`, falling back
/// to the loopback default.
fn ollama_endpoint_from_backend(backend: &LlmBackend) -> String {
    match backend {
        LlmBackend::Ollama { endpoint, .. } => endpoint.clone(),
        _ => DEFAULT_OLLAMA_ENDPOINT.to_string(),
    }
}

/// Extract the OpenAI-compat base URL from the resolved backend.
///
/// When `OpenAiCompat` is configured, returns the configured `base_url`.
/// When `Ollama` is configured, reuses the Ollama endpoint (Ollama exposes an
/// OpenAI-compatible shim at the same address). Falls back to `CORTEX_LLM_ENDPOINT`
/// or `http://localhost:1234` (LM Studio default) for all other cases.
fn openai_compat_base_url_from_backend(backend: &LlmBackend) -> String {
    match backend {
        LlmBackend::OpenAiCompat { base_url, .. } => base_url.clone(),
        LlmBackend::Ollama { endpoint, .. } => endpoint.clone(),
        _ => std::env::var("CORTEX_LLM_ENDPOINT")
            .ok()
            .filter(|s| !s.is_empty())
            .unwrap_or_else(|| "http://localhost:1234".to_string()),
    }
}

/// Extract the model name configured for reflection from the resolved backend.
fn reflection_model_from_backend(backend: &LlmBackend) -> Option<String> {
    match backend {
        LlmBackend::Ollama { model, .. } if !model.is_empty() => Some(model.clone()),
        LlmBackend::Claude { model, .. } if !model.is_empty() => Some(model.clone()),
        LlmBackend::OpenAiCompat { model, .. } if !model.is_empty() => Some(model.clone()),
        _ => None,
    }
}

// ──────────────────────────────────────────────────────────────────────────────
// Unit tests
// ──────────────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn format_bytes_produces_human_readable_strings() {
        assert_eq!(format_bytes(4_661_000_000), "4.7GB");
        assert_eq!(format_bytes(274_000_000), "274MB");
        assert_eq!(format_bytes(1_500), "2KB");
        assert_eq!(format_bytes(500), "500B");
    }

    #[test]
    fn configured_for_roles_matches_digest_pinned_ref() {
        let roles = configured_for_roles(
            "llama3.1:8b",
            Some("llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000"),
        );
        assert_eq!(roles, vec!["reflection".to_string()]);
    }

    #[test]
    fn configured_for_roles_returns_empty_when_no_match() {
        let roles = configured_for_roles("nomic-embed-text", Some("llama3.1:8b"));
        assert!(roles.is_empty());
    }

    #[test]
    fn configured_for_roles_returns_empty_when_no_reflection_model() {
        let roles = configured_for_roles("llama3.1:8b", None);
        assert!(roles.is_empty());
    }

    #[test]
    fn format_configured_empty_returns_empty_string() {
        assert_eq!(format_configured(&[]), "");
    }

    #[test]
    fn format_configured_single_role() {
        let s = format_configured(&["reflection".to_string()]);
        assert!(s.contains("reflection"));
    }
}