mod config;
mod cost;
mod hooks;
mod middleware;
pub mod policy;
mod proxy;
mod switcher;
mod telemetry;
pub(crate) mod types;
pub use config::{Config, ModelConfig, PolicyConfig};
pub use cost::SwitchCostTracker;
pub use hooks::HookRunner;
pub use middleware::{ModelSwitcherLayer, ModelSwitcherService};
pub use policy::{FifoPolicy, PolicyContext, PolicyDecision, ScheduleContext, SwitchPolicy};
pub use proxy::{ProxyState, proxy_handler};
pub use switcher::{InFlightGuard, ModelSwitcher};
pub use types::{SwitchError, SwitcherState};
use anyhow::Result;
use axum::routing::get;
use axum::{Json, Router};
use std::sync::Arc;
use tracing::info;
pub async fn build_app(config: Config) -> Result<(Router, ModelSwitcher)> {
info!("Building llmux with {} models", config.models.len());
let hooks = Arc::new(HookRunner::new(config.models.clone()));
let policy = config.policy.build_policy();
let switcher = ModelSwitcher::new(hooks, policy);
let _scheduler_handle = switcher.clone().spawn_scheduler();
let proxy_state = ProxyState::new();
let metrics_handle = telemetry::install();
let models_response = {
let mut data: Vec<_> = config
.models
.keys()
.map(|id| {
serde_json::json!({
"id": id,
"object": "model",
"created": 0,
"owned_by": "llmux"
})
})
.collect::<Vec<_>>();
data.sort_by(|a, b| a["id"].as_str().cmp(&b["id"].as_str()));
serde_json::json!({
"object": "list",
"data": data
})
};
let mut app = Router::new().route(
"/v1/models",
get(move || {
let resp = models_response.clone();
async move { Json(resp) }
}),
);
if let Some(handle) = metrics_handle {
app = app.route(
"/metrics",
get(move || {
let output = handle.render();
async move {
(
[(
axum::http::header::CONTENT_TYPE,
"text/plain; version=0.0.4; charset=utf-8",
)],
output,
)
}
}),
);
}
let app = app
.fallback(proxy_handler)
.with_state(proxy_state)
.layer(ModelSwitcherLayer::new(switcher.clone()));
Ok((app, switcher))
}