use super::*;
pub async fn cmd_models_list(base_url: &str, json: bool) -> Result<(), Box<dyn std::error::Error>> {
let (DIM, BOLD, ACCENT, GREEN, YELLOW, RED, CYAN, RESET, MONO) = colors();
let (OK, ACTION, WARN, DETAIL, ERR) = icons();
let resp = super::http_client()?
.get(format!("{base_url}/api/config"))
.send()
.await?;
let config: serde_json::Value = resp.json().await?;
if json {
println!("{}", serde_json::to_string_pretty(&config)?);
return Ok(());
}
println!("\n {BOLD}Configured Models{RESET}\n");
let primary = config
.pointer("/models/primary")
.and_then(|v| v.as_str())
.unwrap_or("not set");
println!(" {:<12} {}", format!("{GREEN}primary{RESET}"), primary);
if let Some(fallbacks) = config
.pointer("/models/fallbacks")
.and_then(|v| v.as_array())
{
for (i, fb) in fallbacks.iter().enumerate() {
let name = fb.as_str().unwrap_or("?");
println!(
" {:<12} {}",
format!("{YELLOW}fallback {}{RESET}", i + 1),
name
);
}
}
let mode = config
.pointer("/models/routing/mode")
.and_then(|v| v.as_str())
.unwrap_or("rule");
let threshold = config
.pointer("/models/routing/confidence_threshold")
.and_then(|v| v.as_f64())
.unwrap_or(0.9);
let local_first = config
.pointer("/models/routing/local_first")
.and_then(|v| v.as_bool())
.unwrap_or(true);
println!();
println!(
" {DIM}Routing: mode={mode}, threshold={threshold}, local_first={local_first}{RESET}"
);
println!();
Ok(())
}
pub async fn cmd_models_scan(
base_url: &str,
provider: Option<&str>,
) -> Result<(), Box<dyn std::error::Error>> {
let (DIM, BOLD, ACCENT, GREEN, YELLOW, RED, CYAN, RESET, MONO) = colors();
let (OK, ACTION, WARN, DETAIL, ERR) = icons();
println!("\n {BOLD}Scanning for available models...{RESET}\n");
let resp = super::http_client()?
.get(format!("{base_url}/api/config"))
.send()
.await?;
let config: serde_json::Value = resp.json().await?;
let providers = config
.get("providers")
.and_then(|v| v.as_object())
.cloned()
.unwrap_or_default();
if providers.is_empty() {
println!(" No providers configured.");
println!();
return Ok(());
}
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(10))
.build()?;
for (name, prov_config) in &providers {
if let Some(filter) = provider
&& name != filter
{
continue;
}
let url = prov_config
.get("url")
.and_then(|v| v.as_str())
.unwrap_or("");
if url.is_empty() {
println!(" {YELLOW}{name}{RESET}: no URL configured");
continue;
}
let name_l = name.to_lowercase();
let url_l = url.to_lowercase();
let ollama_like = name_l.contains("ollama") || url_l.contains("11434");
let models_url = if ollama_like {
format!("{url}/api/tags")
} else {
format!("{url}/v1/models")
};
let scan_result =
super::spin_while(&format!("Probing {name}"), client.get(&models_url).send()).await;
print!(" {CYAN}{name}{RESET} ({url}): ");
match scan_result {
Ok(resp) if resp.status().is_success() => {
let body: serde_json::Value = resp.json().await.unwrap_or_default();
let models: Vec<String> =
if let Some(arr) = body.get("models").and_then(|v| v.as_array()) {
arr.iter()
.filter_map(|m| {
m.get("name")
.or_else(|| m.get("model"))
.and_then(|v| v.as_str())
})
.map(String::from)
.collect()
} else if let Some(arr) = body.get("data").and_then(|v| v.as_array()) {
arr.iter()
.filter_map(|m| m.get("id").and_then(|v| v.as_str()))
.map(String::from)
.collect()
} else {
vec![]
};
if models.is_empty() {
println!("no models found");
} else {
println!("{} model(s)", models.len());
for model in &models {
println!(" - {model}");
}
}
}
Ok(resp) => {
println!("{RED}error: {}{RESET}", resp.status());
}
Err(e) => {
println!("{RED}unreachable: {e}{RESET}");
}
}
}
println!();
Ok(())
}
pub async fn cmd_models_exercise(
base_url: &str,
model: &str,
iterations: usize,
) -> Result<(), Box<dyn std::error::Error>> {
let (_dim, bold, _accent, green, yellow, red, cyan, reset, _mono) = colors();
let (ok, _action, warn, _detail, err) = icons();
let total_prompts = roboticus_llm::exercise::EXERCISE_MATRIX.len() * iterations;
println!(
"\n {bold}Exercising model: {cyan}{model}{reset} ({iterations} iteration(s), {total_prompts} prompts)\n"
);
let (pass, fail) = exercise_single_model_iterations(base_url, model, iterations).await;
println!();
let fail_color = if fail > 0 { red } else { _dim };
println!(
" {bold}Results:{reset} {green}{pass} passed{reset}, {fail_color}{fail} failed{reset}",
);
let obs_per_cell = iterations * 5; println!(" Observations per intent class: {obs_per_cell}");
if fail == 0 {
println!(" {ok} Quality observations recorded for all {pass} prompts.");
} else {
println!(" {warn} Some prompts failed — partial observations recorded.");
}
println!();
Ok(())
}
pub async fn cmd_models_suggest(base_url: &str) -> Result<(), Box<dyn std::error::Error>> {
let (_dim, bold, _accent, green, _yellow, _red, cyan, reset, _mono) = colors();
let (_ok, _action, warn, _detail, _err) = icons();
println!("\n {bold}Scanning for available models...{reset}\n");
let resp = super::http_client()?
.get(format!("{base_url}/api/config"))
.send()
.await?;
let config: serde_json::Value = resp.json().await?;
let providers = config
.get("providers")
.and_then(|v| v.as_object())
.cloned()
.unwrap_or_default();
if providers.is_empty() {
println!(" {warn} No providers configured. Nothing to suggest.");
println!();
return Ok(());
}
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(10))
.build()?;
let mut available: Vec<(String, bool, f64)> = Vec::new();
for (name, prov_config) in &providers {
let url = prov_config
.get("url")
.and_then(|v| v.as_str())
.unwrap_or("");
if url.is_empty() {
continue;
}
let is_local = prov_config
.get("is_local")
.and_then(|v| v.as_bool())
.unwrap_or_else(|| {
let nl = name.to_lowercase();
nl.contains("ollama") || nl.contains("local") || nl.contains("lmstudio")
});
let cost = prov_config
.get("cost_per_input_token")
.and_then(|v| v.as_f64())
.unwrap_or(0.0)
+ prov_config
.get("cost_per_output_token")
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let name_l = name.to_lowercase();
let url_l = url.to_lowercase();
let ollama_like = name_l.contains("ollama") || url_l.contains("11434");
let models_url = if ollama_like {
format!("{url}/api/tags")
} else {
format!("{url}/v1/models")
};
if let Ok(resp) = client.get(&models_url).send().await
&& resp.status().is_success()
{
let body: serde_json::Value = resp.json().await.unwrap_or_default();
let models: Vec<String> =
if let Some(arr) = body.get("models").and_then(|v| v.as_array()) {
arr.iter()
.filter_map(|m| {
m.get("name")
.or_else(|| m.get("model"))
.and_then(|v| v.as_str())
})
.map(|m| format!("{name}/{m}"))
.collect()
} else if let Some(arr) = body.get("data").and_then(|v| v.as_array()) {
arr.iter()
.filter_map(|m| m.get("id").and_then(|v| v.as_str()))
.map(|m| format!("{name}/{m}"))
.collect()
} else {
vec![]
};
for model in models {
available.push((model, is_local, cost));
}
}
}
if available.is_empty() {
println!(" {warn} No models discovered from any provider.");
println!();
return Ok(());
}
available.sort_by(|a, b| {
b.1.cmp(&a.1)
.then(a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal))
});
println!(" {bold}Suggested fallback chain:{reset}\n");
for (i, (model, is_local, _cost)) in available.iter().take(6).enumerate() {
let role = if i == 0 {
"primary ".to_string()
} else {
format!("fallback{i}")
};
let locality = if *is_local {
format!("{green}local{reset}")
} else {
format!("{cyan}cloud{reset}")
};
println!(" {role:<10} {model} ({locality})");
}
println!("\n {_dim}TOML:{reset}\n");
if let Some((primary, _, _)) = available.first() {
println!(" [models]");
println!(" primary = \"{primary}\"");
let fallbacks: Vec<&str> = available
.iter()
.skip(1)
.take(5)
.map(|(m, _, _)| m.as_str())
.collect();
if !fallbacks.is_empty() {
println!(" fallbacks = {fallbacks:?}");
}
}
println!();
Ok(())
}
pub async fn cmd_models_reset(
base_url: &str,
model: Option<&str>,
) -> Result<(), Box<dyn std::error::Error>> {
let (_dim, bold, _accent, green, _yellow, _red, _cyan, reset, _mono) = colors();
let (ok, _action, _warn, _detail, _err) = icons();
let client = super::http_client()?;
let mut req = client.post(format!("{base_url}/api/models/reset"));
if let Some(m) = model {
req = req.query(&[("model", m)]);
}
let resp = req.send().await?;
let data: serde_json::Value = resp.json().await?;
let msg = data["message"].as_str().unwrap_or("done");
println!("\n {bold}{ok}{reset} {green}{msg}{reset}\n");
if model.is_some() {
println!(
" Run {bold}roboticus models exercise {}{reset} to re-benchmark.",
model.unwrap_or("?")
);
} else {
println!(" Run {bold}roboticus models exercise <model>{reset} per model to re-benchmark.");
}
println!();
Ok(())
}
pub async fn cmd_models_baseline(base_url: &str) -> Result<(), Box<dyn std::error::Error>> {
let (_dim, bold, _accent, green, yellow, red, cyan, reset, _mono) = colors();
let (ok, _action, warn, _detail, err) = icons();
println!("\n {bold}Step 1: Discovering available models...{reset}\n");
let resp = super::http_client()?
.get(format!("{base_url}/api/config"))
.send()
.await?;
let config: serde_json::Value = resp.json().await?;
let mut configured: Vec<String> = Vec::new();
if let Some(primary) = config.pointer("/models/primary").and_then(|v| v.as_str()) {
configured.push(primary.to_string());
}
if let Some(fbs) = config
.pointer("/models/fallbacks")
.and_then(|v| v.as_array())
{
for fb in fbs {
if let Some(name) = fb.as_str()
&& !name.is_empty()
&& !configured.contains(&name.to_string())
{
configured.push(name.to_string());
}
}
}
if configured.is_empty() {
println!(" {warn} No models configured. Nothing to baseline.");
return Ok(());
}
println!(
" Found {bold}{}{reset} configured model(s):\n",
configured.len()
);
for (i, model) in configured.iter().enumerate() {
let role = if i == 0 { "primary" } else { "fallback" };
println!(" {cyan}{role:<10}{reset} {model}");
}
println!();
print!(
" This will flush all quality scores and exercise each model \
across 20 prompts.\n Proceed? [Y/n] "
);
use std::io::Write;
std::io::stdout().flush().ok();
let mut input = String::new();
std::io::stdin().read_line(&mut input).ok();
let answer = input.trim().to_lowercase();
if !answer.is_empty() && !matches!(answer.as_str(), "y" | "yes") {
println!(" Cancelled.");
return Ok(());
}
println!("\n {bold}Step 2: Flushing all quality scores...{reset}");
let resp = super::http_client()?
.post(format!("{base_url}/api/models/reset"))
.send()
.await?;
let data: serde_json::Value = resp.json().await?;
let cleared = data["cleared"].as_u64().unwrap_or(0);
println!(" {ok} Cleared {cleared} observation entries.\n");
println!(" {bold}Step 3: Exercising models...{reset}\n");
let mut results: Vec<(String, usize, usize)> = Vec::new();
for model in &configured {
println!(" {cyan}--- {model} ---{reset}");
let (pass, fail) = exercise_single_model_iterations(base_url, model, 20).await;
results.push((model.clone(), pass, fail));
println!();
}
println!(" {bold}Baseline Results:{reset}\n");
for (model, pass, fail) in &results {
let status = if *fail == 0 {
format!("{green}{ok}{reset}")
} else {
format!("{yellow}{warn}{reset}")
};
println!(
" {status} {model}: {green}{pass} passed{reset}, {}{fail} failed{reset}",
if *fail > 0 { red } else { _dim }
);
}
println!();
Ok(())
}
async fn exercise_single_model_iterations(
base_url: &str,
model: &str,
iterations: usize,
) -> (usize, usize) {
let (_dim, bold, _accent, green, _yellow, red, _cyan, reset, _mono) = colors();
let (ok, _action, _warn, _detail, err) = icons();
let matrix = roboticus_llm::exercise::EXERCISE_MATRIX;
let client = match reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(180))
.build()
{
Ok(c) => c,
Err(_) => return (0, matrix.len() * iterations),
};
let mut pass = 0usize;
let mut fail = 0usize;
let total = matrix.len() * iterations;
let session_id: String = match client
.post(format!("{base_url}/api/sessions"))
.json(&serde_json::json!({}))
.send()
.await
{
Ok(resp) => resp
.json::<serde_json::Value>()
.await
.ok()
.and_then(|v| {
v.get("session_id")
.or_else(|| v.get("id"))
.and_then(|s| s.as_str())
.map(String::from)
})
.unwrap_or_default(),
Err(_) => String::new(),
};
let mut latencies: std::collections::HashMap<String, Vec<u64>> =
std::collections::HashMap::new();
for iter in 0..iterations {
for (i, prompt) in matrix.iter().enumerate() {
let n = iter * matrix.len() + i + 1;
let label = format!(
"[{n}/{total}] {}:{}",
prompt.complexity, prompt.intent_class
);
eprint!(" {_dim}{label}{reset} ... ");
let mut body = serde_json::json!({
"content": prompt.prompt,
"model_override": model,
});
if !session_id.is_empty() {
body["session_id"] = serde_json::Value::String(session_id.clone());
}
let started = std::time::Instant::now();
let result = tokio::time::timeout(
std::time::Duration::from_secs(600),
client
.post(format!("{base_url}/api/agent/message"))
.json(&body)
.send(),
)
.await;
let elapsed_ms = started.elapsed().as_millis() as u64;
match result {
Ok(Ok(resp)) if resp.status().is_success() => {
pass += 1;
latencies
.entry(prompt.intent_class.to_string())
.or_default()
.push(elapsed_ms);
let secs = elapsed_ms as f64 / 1000.0;
eprintln!("{green}{ok}{reset} {_dim}{secs:.1}s{reset}");
}
Ok(Ok(resp)) => {
fail += 1;
let status = resp.status();
eprintln!("{red}{err} {status}{reset}");
}
Ok(Err(e)) => {
fail += 1;
eprintln!("{red}{err} {e}{reset}");
}
Err(_) => {
fail += 1;
eprintln!("{red}{err} timeout (>600s){reset}");
}
}
}
}
if !latencies.is_empty() {
eprintln!();
eprintln!(" {_dim}┌──────────────────┬────────┬────────┬────────┐{reset}");
eprintln!(" {_dim}│ Intent Class │ Avg │ P50 │ P95 │{reset}");
eprintln!(" {_dim}├──────────────────┼────────┼────────┼────────┤{reset}");
let mut all_latencies: Vec<u64> = Vec::new();
let mut intents: Vec<_> = latencies.iter().collect();
intents.sort_by_key(|(k, _)| (*k).clone());
for (intent, times) in &intents {
all_latencies.extend(times.iter().copied());
let mut sorted = (*times).clone();
sorted.sort();
let avg = sorted.iter().sum::<u64>() as f64 / sorted.len() as f64 / 1000.0;
let p50 = sorted[sorted.len() / 2] as f64 / 1000.0;
let p95_idx = (sorted.len() as f64 * 0.95) as usize;
let p95 = sorted[p95_idx.min(sorted.len() - 1)] as f64 / 1000.0;
eprintln!(
" {_dim}│{reset} {:<16} {_dim}│{reset} {avg:5.1}s {_dim}│{reset} {p50:5.1}s {_dim}│{reset} {p95:5.1}s {_dim}│{reset}",
intent
);
}
all_latencies.sort();
if !all_latencies.is_empty() {
let avg_all =
all_latencies.iter().sum::<u64>() as f64 / all_latencies.len() as f64 / 1000.0;
let p50_all = all_latencies[all_latencies.len() / 2] as f64 / 1000.0;
let p95_idx = (all_latencies.len() as f64 * 0.95) as usize;
let p95_all = all_latencies[p95_idx.min(all_latencies.len() - 1)] as f64 / 1000.0;
eprintln!(" {_dim}├──────────────────┼────────┼────────┼────────┤{reset}");
eprintln!(
" {_dim}│{reset} {bold}ALL{reset} {_dim}│{reset} {avg_all:5.1}s {_dim}│{reset} {p50_all:5.1}s {_dim}│{reset} {p95_all:5.1}s {_dim}│{reset}"
);
}
eprintln!(" {_dim}└──────────────────┴────────┴────────┴────────┘{reset}");
}
(pass, fail)
}