use std::collections::HashMap;
use std::sync::Arc;
use serde::Serialize;
use serde_json::{Value, json};
use tracing::{debug, info, warn};
use crate::backend::{
Backend, LlamaCppBackend, LmStudioBackend, OllamaBackend, OpenRouterBackend, VeniceBackend,
VllmBackend,
};
use crate::config::{Backend as CfgBackend, Config};
const AGENT_VERSION: &str = env!("CARGO_PKG_VERSION");
fn kind_priority(kind: &str) -> u8 {
match kind {
"vllm" => 0,
"llamacpp" => 1,
"lmstudio" => 2,
"ollama" => 3,
"venice" => 4,
"openrouter" => 5,
_ => 100,
}
}
#[derive(Debug, Clone, Serialize)]
pub struct ResolvedModel {
pub model_id: String,
pub input_per_1m: u64,
pub output_per_1m: u64,
pub max_concurrent: u32,
pub backend: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub context_window: Option<u32>,
}
pub struct DiscoveredBackend {
pub backend: Arc<dyn Backend>,
pub models: Vec<ResolvedModel>,
}
pub struct DiscoveryResult {
pub backends: Vec<DiscoveredBackend>,
pub capability_models: Vec<ResolvedModel>,
}
impl DiscoveryResult {
pub fn to_capabilities(&self, cfg: &Config) -> Value {
let limits = json!({
"max_concurrent_total": cfg.limits.max_concurrent,
"max_tokens_per_minute": cfg.limits.max_tokens_per_minute,
});
json!({
"type": "capabilities",
"models": self.capability_models,
"limits": limits,
"metadata": {
"agent_version": AGENT_VERSION,
}
})
}
}
pub fn build_backend(cfg: &CfgBackend) -> Option<Arc<dyn Backend>> {
match cfg.kind.as_str() {
"vllm" => cfg
.url
.as_deref()
.map(|u| Arc::new(VllmBackend::new(u)) as Arc<dyn Backend>),
"llamacpp" => cfg
.url
.as_deref()
.map(|u| Arc::new(LlamaCppBackend::new(u)) as Arc<dyn Backend>),
"lmstudio" => cfg
.url
.as_deref()
.map(|u| Arc::new(LmStudioBackend::new(u)) as Arc<dyn Backend>),
"ollama" => cfg
.url
.as_deref()
.map(|u| Arc::new(OllamaBackend::new(u)) as Arc<dyn Backend>),
"openrouter" => match cfg
.api_key_env
.as_deref()
.map(OpenRouterBackend::from_env)
{
Some(Ok(b)) => Some(Arc::new(b) as Arc<dyn Backend>),
Some(Err(e)) => {
warn!(?e, "skipping openrouter backend (no api key)");
None
}
None => None,
},
"venice" => match cfg.api_key_env.as_deref().map(VeniceBackend::from_env) {
Some(Ok(b)) => Some(Arc::new(b) as Arc<dyn Backend>),
Some(Err(e)) => {
warn!(?e, "skipping venice backend (no api key)");
None
}
None => None,
},
other => {
warn!(kind = other, "unknown backend kind in config; skipping");
None
}
}
}
pub async fn run(cfg: &Config) -> DiscoveryResult {
let mut discovered: Vec<DiscoveredBackend> = Vec::new();
for cfg_backend in &cfg.backends {
let Some(backend) = build_backend(cfg_backend) else {
continue;
};
let health = match backend.health().await {
Ok(h) => h,
Err(e) => {
warn!(backend = backend.id(), ?e, "health check failed");
continue;
}
};
if !health.reachable {
warn!(
backend = backend.id(),
error = ?health.last_error,
"backend unreachable; skipping"
);
continue;
}
debug!(
backend = backend.id(),
latency_ms = ?health.latency_ms,
"backend healthy"
);
let models = match backend.list_models().await {
Ok(m) => m,
Err(e) => {
warn!(backend = backend.id(), ?e, "list_models failed");
continue;
}
};
let allow: Option<&Vec<String>> = cfg_backend.models.as_ref();
let resolved: Vec<ResolvedModel> = models
.into_iter()
.filter(|m| match allow {
Some(list) => list.iter().any(|x| x == &m.model_id),
None => true,
})
.filter_map(|m| {
let (input_per_1m, output_per_1m) = match cfg.pricing.models.get(&m.model_id) {
Some(p) => (p.input_per_1m, p.output_per_1m),
None => (
cfg.pricing.default_input_per_1m,
cfg.pricing.default_output_per_1m,
),
};
if input_per_1m == 0 || output_per_1m == 0 {
warn!(model = %m.model_id, "no pricing; dropping model");
return None;
}
Some(ResolvedModel {
model_id: m.model_id,
input_per_1m,
output_per_1m,
max_concurrent: cfg.limits.max_concurrent,
backend: backend.kind().to_string(),
context_window: m.context_window,
})
})
.collect();
info!(
backend = backend.id(),
kind = backend.kind(),
models = resolved.len(),
"backend discovered"
);
discovered.push(DiscoveredBackend {
backend,
models: resolved,
});
}
let capability_models = dedupe_by_priority(&discovered);
DiscoveryResult {
backends: discovered,
capability_models,
}
}
fn dedupe_by_priority(discovered: &[DiscoveredBackend]) -> Vec<ResolvedModel> {
let mut by_id: HashMap<String, ResolvedModel> = HashMap::new();
for db in discovered {
for m in &db.models {
match by_id.get(&m.model_id) {
Some(existing) if kind_priority(&existing.backend) <= kind_priority(&m.backend) => {
continue;
}
_ => {
by_id.insert(m.model_id.clone(), m.clone());
}
}
}
}
let mut out: Vec<ResolvedModel> = by_id.into_values().collect();
out.sort_by(|a, b| a.model_id.cmp(&b.model_id));
out
}