use std::path::Path;
use std::time::Duration;
use harn_vm::llm::readiness::probe_provider_readiness;
use harn_vm::llm::{
normalize_ollama_keep_alive, ollama_readiness, warm_ollama_model_with_settings,
OllamaReadinessOptions, OllamaRuntimeSettings,
};
use harn_vm::llm_config::{self};
use serde::Serialize;
use crate::cli::LocalSwitchArgs;
use crate::commands::hardware::collect_hardware_snapshot;
use super::profile::defaults_for;
use super::runtime::{
local_provider_ids, ollama_unload_model, resolve_provider_def, snapshot_provider, terminate_pid,
};
use super::state::{clear_pid_record, read_pid_record, write_selection, LocalSelection};
#[derive(Debug, Serialize)]
struct SwitchResult {
provider: String,
model: String,
alias: Option<String>,
base_url: String,
ctx: u64,
keep_alive: String,
evicted: Vec<EvictionRecord>,
readiness: serde_json::Value,
rechecked: bool,
}
#[derive(Debug, Serialize)]
struct EvictionRecord {
provider: String,
target: String,
outcome: String,
}
pub(crate) async fn run(args: LocalSwitchArgs, base_dir: &Path) -> Result<(), String> {
let resolved = llm_config::resolve_model_info(&args.model);
let provider = args
.provider
.as_deref()
.map(str::trim)
.filter(|provider| !provider.is_empty())
.map(str::to_string)
.unwrap_or_else(|| resolved.provider.clone());
if !local_provider_ids(None).contains(&provider) {
return Err(format!(
"'{provider}' is not a local provider Harn manages (expected one of: {})",
local_provider_ids(None).join(", ")
));
}
let def = resolve_provider_def(&provider)?;
let base_url = llm_config::resolve_base_url(&def);
let hardware = collect_hardware_snapshot();
let defaults = defaults_for(&hardware);
let ctx = args.ctx.unwrap_or(defaults.ctx);
let keep_alive = args
.keep_alive
.clone()
.unwrap_or_else(|| defaults.keep_alive.to_string());
let evicted = if args.no_evict {
Vec::new()
} else {
evict_siblings(&provider, &resolved.id, base_dir).await
};
let (readiness, rechecked) = match provider.as_str() {
"ollama" => warm_ollama(&resolved.id, &base_url, ctx, &keep_alive, args.no_pull).await,
_ => warm_openai_compatible(&provider, &resolved.id, &base_url).await,
}?;
let selection = LocalSelection::now(
provider.clone(),
resolved.id.clone(),
resolved.alias.clone(),
base_url.clone(),
Some(ctx),
Some(keep_alive.clone()),
);
write_selection(base_dir, &selection)?;
let result = SwitchResult {
provider,
model: resolved.id,
alias: resolved.alias,
base_url,
ctx,
keep_alive,
evicted,
readiness,
rechecked,
};
if args.json {
println!(
"{}",
serde_json::to_string_pretty(&result)
.map_err(|error| format!("failed to render switch JSON: {error}"))?
);
} else {
println!(
"Activated {} via {} at {}",
result.model, result.provider, result.base_url
);
println!(
" ctx={} keep_alive={} (machine profile)",
result.ctx, result.keep_alive
);
for record in &result.evicted {
println!(
" evicted {}::{} -> {}",
record.provider, record.target, record.outcome
);
}
if result.rechecked {
println!(" readiness re-checked after warm");
}
}
Ok(())
}
async fn evict_siblings(
active_provider: &str,
active_model: &str,
base_dir: &Path,
) -> Vec<EvictionRecord> {
let mut evicted = Vec::new();
for provider in local_provider_ids(None) {
let Ok(snapshot) = snapshot_provider(&provider, base_dir).await else {
continue;
};
let keep_model = (provider == active_provider).then_some(active_model);
if provider == "ollama" {
drain_ollama(&snapshot, keep_model, &mut evicted).await;
}
if provider == active_provider {
continue;
}
if let Ok(Some(record)) = read_pid_record(base_dir, &provider) {
let outcome = match terminate_pid(record.pid) {
Ok(()) => "stopped".to_string(),
Err(error) => format!("error: {error}"),
};
let _ = clear_pid_record(base_dir, &provider);
evicted.push(EvictionRecord {
provider: provider.clone(),
target: format!("pid {}", record.pid),
outcome,
});
}
}
evicted
}
async fn drain_ollama(
snapshot: &super::runtime::LocalProviderSnapshot,
keep: Option<&str>,
evicted: &mut Vec<EvictionRecord>,
) {
for loaded in &snapshot.loaded_models {
if keep.is_some_and(|name| ollama_name_matches(&loaded.name, name)) {
continue;
}
let outcome = match ollama_unload_model(&snapshot.base_url, &loaded.name).await {
Ok(()) => "unloaded".to_string(),
Err(error) => format!("error: {error}"),
};
evicted.push(EvictionRecord {
provider: snapshot.provider.clone(),
target: loaded.name.clone(),
outcome,
});
}
}
fn ollama_name_matches(loaded_name: &str, requested: &str) -> bool {
loaded_name == requested
|| loaded_name.strip_suffix(":latest") == Some(requested)
|| loaded_name.starts_with(requested)
}
async fn warm_ollama(
model: &str,
base_url: &str,
ctx: u64,
keep_alive: &str,
no_pull: bool,
) -> Result<(serde_json::Value, bool), String> {
if !no_pull {
if let Err(error) = ensure_ollama_model_pulled(model).await {
eprintln!("warning: ollama pull skipped: {error}");
}
}
let mut probe = OllamaReadinessOptions::new(model);
probe.base_url = Some(base_url.to_string());
probe.warm = false;
let first = ollama_readiness(probe.clone()).await;
if !first.valid {
return Ok((
serde_json::to_value(&first).map_err(|error| error.to_string())?,
false,
));
}
let settings = OllamaRuntimeSettings {
num_ctx: ctx,
keep_alive: normalize_ollama_keep_alive(keep_alive)
.unwrap_or_else(|| serde_json::json!(keep_alive)),
};
if let Err(error) = warm_ollama_model_with_settings(model, Some(base_url), &settings).await {
return Ok((
serde_json::json!({
"valid": false,
"status": "warmup_failed",
"message": error,
}),
false,
));
}
let second = ollama_readiness(probe).await;
Ok((
serde_json::to_value(&second).map_err(|error| error.to_string())?,
true,
))
}
async fn warm_openai_compatible(
provider: &str,
model: &str,
base_url: &str,
) -> Result<(serde_json::Value, bool), String> {
let first = probe_provider_readiness(provider, Some(model), Some(base_url)).await;
if !first.ok {
return Ok((
serde_json::to_value(&first).map_err(|error| error.to_string())?,
false,
));
}
tokio::time::sleep(Duration::from_millis(250)).await;
let second = probe_provider_readiness(provider, Some(model), Some(base_url)).await;
Ok((
serde_json::to_value(&second).map_err(|error| error.to_string())?,
true,
))
}
async fn ensure_ollama_model_pulled(model: &str) -> Result<(), String> {
if which::which("ollama").is_err() {
return Err("ollama CLI not on PATH".to_string());
}
let status = tokio::process::Command::new("ollama")
.arg("pull")
.arg(model)
.status()
.await
.map_err(|error| format!("failed to spawn ollama pull: {error}"))?;
if status.success() {
Ok(())
} else {
Err(format!("ollama pull exited {status}"))
}
}
#[cfg(test)]
mod tests {
use super::ollama_name_matches;
#[test]
fn ollama_name_matches_accepts_exact_and_latest_suffix() {
assert!(ollama_name_matches("qwen3:30b", "qwen3:30b"));
assert!(ollama_name_matches("qwen3:30b:latest", "qwen3:30b"));
assert!(ollama_name_matches("qwen3:30b-a3b-instruct", "qwen3:30b"));
assert!(!ollama_name_matches("llama3.2", "qwen3:30b"));
}
}