use clap::{Args, Subcommand, ValueEnum};
use serde::Serialize;
use tracing::debug;
use crate::config::{LlmBackend, DEFAULT_OLLAMA_ENDPOINT};
use crate::exit::Exit;
use crate::output::{self, Envelope};
#[derive(Debug, Clone, Subcommand)]
pub enum ModelsSub {
List(ListArgs),
}
#[derive(Debug, Clone, Args)]
pub struct ListArgs {
#[arg(long = "backend", value_name = "BACKEND")]
pub backend: Option<BackendChoice>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum BackendChoice {
Ollama,
#[value(name = "openai-compat")]
OpenAiCompat,
}
#[derive(Debug, serde::Deserialize)]
struct OllamaTagsResponse {
models: Vec<OllamaModelEntry>,
}
#[derive(Debug, serde::Deserialize)]
struct OllamaModelEntry {
name: String,
#[serde(default)]
digest: String,
#[serde(default)]
size: u64,
#[serde(default)]
details: Option<OllamaModelDetails>,
}
#[derive(Debug, serde::Deserialize)]
struct OllamaModelDetails {
#[serde(default)]
parameter_size: String,
#[serde(default)]
quantization_level: String,
}
#[derive(Debug, serde::Deserialize)]
struct OpenAiModelsResponse {
data: Vec<OpenAiModelEntry>,
}
#[derive(Debug, serde::Deserialize)]
struct OpenAiModelEntry {
id: String,
#[serde(default)]
#[allow(dead_code)]
object: String,
}
#[derive(Debug, Serialize)]
pub struct ModelsListReport {
pub backend: String,
pub endpoint: String,
pub models: Vec<ModelEntry>,
}
#[derive(Debug, Serialize)]
pub struct ModelEntry {
pub name: String,
pub digest: String,
pub size_bytes: u64,
pub parameter_size: String,
pub quantization: String,
pub configured_for: Vec<String>,
}
pub fn run(sub: ModelsSub) -> Exit {
match sub {
ModelsSub::List(args) => run_list(args),
}
}
fn run_list(args: ListArgs) -> Exit {
let llm_backend = LlmBackend::resolve();
match args.backend {
Some(BackendChoice::Ollama) => {
let endpoint = ollama_endpoint_from_backend(&llm_backend);
let reflection_model = reflection_model_from_backend(&llm_backend);
run_ollama_list(&endpoint, reflection_model.as_deref())
}
Some(BackendChoice::OpenAiCompat) => {
let base_url = openai_compat_base_url_from_backend(&llm_backend);
let reflection_model = reflection_model_from_backend(&llm_backend);
run_openai_compat_list(&base_url, reflection_model.as_deref())
}
None => {
match &llm_backend {
LlmBackend::Ollama { endpoint, .. } => {
let endpoint = endpoint.clone();
let reflection_model = reflection_model_from_backend(&llm_backend);
run_ollama_list(&endpoint, reflection_model.as_deref())
}
LlmBackend::OpenAiCompat { base_url, .. } => {
let base_url = base_url.clone();
let reflection_model = reflection_model_from_backend(&llm_backend);
run_openai_compat_list(&base_url, reflection_model.as_deref())
}
LlmBackend::Claude { .. } => {
eprintln!(
"cortex models list: backend 'claude' does not expose a local model list."
);
eprintln!(" Use --backend ollama or --backend openai-compat to query a local server.");
Exit::Usage
}
LlmBackend::Offline => {
print_no_backend_guidance();
Exit::Ok
}
}
}
}
}
fn run_ollama_list(endpoint: &str, reflection_model: Option<&str>) -> Exit {
let url = format!("{endpoint}/api/tags");
debug!(url, "querying Ollama tags endpoint");
let response = match ureq::get(&url).call() {
Ok(r) => r,
Err(ureq::Error::Status(code, resp)) => {
let body = resp.into_string().unwrap_or_default();
eprintln!("cortex models list: Ollama returned HTTP {code}: {body}");
return Exit::Internal;
}
Err(ureq::Error::Transport(t)) => {
eprintln!("cortex models list: could not reach Ollama at {endpoint}: {t}");
eprintln!(" Is Ollama running? Try: ollama serve");
return Exit::PreconditionUnmet;
}
};
let tags: OllamaTagsResponse = match response.into_json() {
Ok(t) => t,
Err(err) => {
eprintln!("cortex models list: failed to parse Ollama response: {err}");
return Exit::Internal;
}
};
let models: Vec<ModelEntry> = tags
.models
.into_iter()
.map(|m| {
let (parameter_size, quantization) = m
.details
.as_ref()
.map(|d| (d.parameter_size.clone(), d.quantization_level.clone()))
.unwrap_or_default();
let configured_for = configured_for_roles(&m.name, reflection_model);
ModelEntry {
name: m.name,
digest: m.digest,
size_bytes: m.size,
parameter_size,
quantization,
configured_for,
}
})
.collect();
let report = ModelsListReport {
backend: "ollama".to_string(),
endpoint: endpoint.to_string(),
models,
};
if output::json_enabled() {
let envelope = Envelope::new("cortex.models.list", Exit::Ok, &report);
return output::emit(&envelope, Exit::Ok);
}
print_ollama_table(&report, reflection_model);
Exit::Ok
}
fn run_openai_compat_list(base_url: &str, reflection_model: Option<&str>) -> Exit {
let url = format!("{base_url}/v1/models");
debug!(url, "querying OpenAI-compat models endpoint");
let response = match ureq::get(&url).call() {
Ok(r) => r,
Err(ureq::Error::Status(code, resp)) => {
let body = resp.into_string().unwrap_or_default();
eprintln!("cortex models list: OpenAI-compat server returned HTTP {code}: {body}");
return Exit::Internal;
}
Err(ureq::Error::Transport(t)) => {
eprintln!(
"cortex models list: could not reach OpenAI-compat server at {base_url}: {t}"
);
eprintln!(" Is the server running?");
return Exit::PreconditionUnmet;
}
};
let api_resp: OpenAiModelsResponse = match response.into_json() {
Ok(r) => r,
Err(err) => {
eprintln!("cortex models list: failed to parse OpenAI-compat response: {err}");
return Exit::Internal;
}
};
let models: Vec<ModelEntry> = api_resp
.data
.into_iter()
.map(|m| {
let configured_for = configured_for_roles(&m.id, reflection_model);
ModelEntry {
name: m.id,
digest: String::new(),
size_bytes: 0,
parameter_size: String::new(),
quantization: String::new(),
configured_for,
}
})
.collect();
let report = ModelsListReport {
backend: "openai-compat".to_string(),
endpoint: base_url.to_string(),
models,
};
if output::json_enabled() {
let envelope = Envelope::new("cortex.models.list", Exit::Ok, &report);
return output::emit(&envelope, Exit::Ok);
}
print_openai_compat_table(&report, reflection_model);
Exit::Ok
}
fn print_ollama_table(report: &ModelsListReport, reflection_model: Option<&str>) {
println!("Available models (ollama @ {})\n", report.endpoint);
if report.models.is_empty() {
println!(" (no models found — run `ollama pull <model>` to download one)");
println!();
print_ollama_tip(&report.endpoint);
return;
}
let name_width = report
.models
.iter()
.map(|m| m.name.len())
.max()
.unwrap_or(0)
.max(30);
println!(
" {:<name_width$} {:<8} {:<8} {:<14} CONFIGURED",
"NAME",
"SIZE",
"PARAMS",
"QUANTIZATION",
name_width = name_width,
);
for m in &report.models {
let size_str = if m.size_bytes == 0 {
"-".to_string()
} else {
format_bytes(m.size_bytes)
};
let params = if m.parameter_size.is_empty() {
"-"
} else {
&m.parameter_size
};
let quant = if m.quantization.is_empty() {
"-"
} else {
&m.quantization
};
let configured = format_configured(&m.configured_for);
println!(
" {:<name_width$} {:<8} {:<8} {:<14} {}",
m.name,
size_str,
params,
quant,
configured,
name_width = name_width,
);
}
println!();
print_ollama_tip(&report.endpoint);
if let Some(model) = reflection_model {
let display = model
.split_once("@sha256:")
.map(|(name, _)| name)
.unwrap_or(model);
println!("\nCurrently configured reflection model: {display}");
}
}
fn print_openai_compat_table(report: &ModelsListReport, reflection_model: Option<&str>) {
println!("Available models (openai-compat @ {})\n", report.endpoint);
if report.models.is_empty() {
println!(" (no models found)");
return;
}
let name_width = report
.models
.iter()
.map(|m| m.name.len())
.max()
.unwrap_or(0)
.max(30);
println!(
" {:<name_width$} CONFIGURED",
"NAME",
name_width = name_width,
);
for m in &report.models {
let configured = format_configured(&m.configured_for);
println!(
" {:<name_width$} {}",
m.name,
configured,
name_width = name_width,
);
}
if let Some(model) = reflection_model {
println!("\nCurrently configured reflection model: {model}");
}
}
fn print_ollama_tip(endpoint: &str) {
let _ = endpoint; println!("Tip: set as default in cortex.toml:");
println!(" [llm.ollama]");
println!(" model = \"<name>@sha256:<digest>\"");
}
fn print_no_backend_guidance() {
println!("No LLM backend configured. Add to cortex.toml:\n");
println!(" [llm]");
println!(" backend = \"ollama\"\n");
println!(" [llm.ollama]");
println!(" endpoint = \"http://localhost:11434\"");
println!(" model = \"llama3.1:8b@sha256:<64-hex-digit-digest>\"");
}
fn configured_for_roles(model_name: &str, reflection_model: Option<&str>) -> Vec<String> {
let mut roles = Vec::new();
if let Some(ref_model) = reflection_model {
let ref_name = ref_model
.split_once("@sha256:")
.map(|(n, _)| n)
.unwrap_or(ref_model);
if model_name == ref_model || model_name == ref_name {
roles.push("reflection".to_string());
}
}
roles
}
fn format_configured(roles: &[String]) -> String {
if roles.is_empty() {
String::new()
} else {
format!("\u{2190} {}", roles.join(", "))
}
}
fn format_bytes(bytes: u64) -> String {
const GB: u64 = 1_000_000_000;
const MB: u64 = 1_000_000;
const KB: u64 = 1_000;
if bytes >= GB {
format!("{:.1}GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.0}MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.0}KB", bytes as f64 / KB as f64)
} else {
format!("{bytes}B")
}
}
fn ollama_endpoint_from_backend(backend: &LlmBackend) -> String {
match backend {
LlmBackend::Ollama { endpoint, .. } => endpoint.clone(),
_ => DEFAULT_OLLAMA_ENDPOINT.to_string(),
}
}
fn openai_compat_base_url_from_backend(backend: &LlmBackend) -> String {
match backend {
LlmBackend::OpenAiCompat { base_url, .. } => base_url.clone(),
LlmBackend::Ollama { endpoint, .. } => endpoint.clone(),
_ => std::env::var("CORTEX_LLM_ENDPOINT")
.ok()
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "http://localhost:1234".to_string()),
}
}
fn reflection_model_from_backend(backend: &LlmBackend) -> Option<String> {
match backend {
LlmBackend::Ollama { model, .. } if !model.is_empty() => Some(model.clone()),
LlmBackend::Claude { model, .. } if !model.is_empty() => Some(model.clone()),
LlmBackend::OpenAiCompat { model, .. } if !model.is_empty() => Some(model.clone()),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn format_bytes_produces_human_readable_strings() {
assert_eq!(format_bytes(4_661_000_000), "4.7GB");
assert_eq!(format_bytes(274_000_000), "274MB");
assert_eq!(format_bytes(1_500), "2KB");
assert_eq!(format_bytes(500), "500B");
}
#[test]
fn configured_for_roles_matches_digest_pinned_ref() {
let roles = configured_for_roles(
"llama3.1:8b",
Some("llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000"),
);
assert_eq!(roles, vec!["reflection".to_string()]);
}
#[test]
fn configured_for_roles_returns_empty_when_no_match() {
let roles = configured_for_roles("nomic-embed-text", Some("llama3.1:8b"));
assert!(roles.is_empty());
}
#[test]
fn configured_for_roles_returns_empty_when_no_reflection_model() {
let roles = configured_for_roles("llama3.1:8b", None);
assert!(roles.is_empty());
}
#[test]
fn format_configured_empty_returns_empty_string() {
assert_eq!(format_configured(&[]), "");
}
#[test]
fn format_configured_single_role() {
let s = format_configured(&["reflection".to_string()]);
assert!(s.contains("reflection"));
}
}