mod config;
pub mod control;
mod middleware;
pub mod object_store;
mod orchestrator;
mod policy;
mod switcher;
pub mod validate;
pub use config::{CheckpointConfig, Config, ModelConfig, ObjectStoreConfig, PolicyConfig};
pub use middleware::{ModelSwitcherLayer, ModelSwitcherService};
pub use orchestrator::{Orchestrator, OrchestratorError, ProcessState};
pub use policy::{
CostAwarePolicy, FifoPolicy, PolicyContext, PolicyDecision, ScheduleContext, SwitchPolicy,
TimeSlicePolicy,
};
pub use switcher::{
EvictionPolicy, ModelSwitcher, ProcessStrategy, SwitchError, SwitchMode, SwitcherState,
WeightStrategy,
};
use anyhow::Result;
use std::sync::Arc;
use tracing::info;
pub async fn build_app(
config: Config,
) -> Result<(axum::Router, Option<axum::Router>, axum::Router, ModelSwitcher)> {
info!("Building llmux with {} models", config.models.len());
let orchestrator = Arc::new(Orchestrator::with_options(
config.models.clone(),
config.vllm_command.clone(),
config.checkpoint.clone(),
));
let model_names: Vec<String> = config.models.keys().cloned().collect();
let policy = config.policy.build_policy(&model_names);
let switcher = ModelSwitcher::new(orchestrator.clone(), policy);
let _scheduler_handle = switcher.clone().spawn_scheduler();
let control = control::control_router(switcher.clone());
let targets = config.build_onwards_targets()?;
let onwards_state = onwards::AppState::new(targets);
let onwards_router = onwards::build_router(onwards_state);
let mut app = onwards_router.layer(ModelSwitcherLayer::new(switcher.clone()));
let metrics_router = if config.metrics_port > 0 {
let (prometheus_layer, handle) = onwards::build_metrics_layer_and_handle("llmux");
app = app.layer(prometheus_layer);
Some(onwards::build_metrics_router(handle))
} else {
None
};
Ok((app, metrics_router, control, switcher))
}
pub async fn run_warmup(switcher: &ModelSwitcher) -> Result<()> {
let orchestrator = switcher.orchestrator();
let models = orchestrator.registered_models();
info!(count = models.len(), "Starting warmup phase");
for model in &models {
let eviction = orchestrator
.eviction_policy_for(model)
.expect("model registered but no eviction policy");
let port = switcher
.orchestrator()
.model_port(model)
.expect("model registered but no port");
let model_path = switcher
.orchestrator()
.model_path(model)
.expect("model registered but no model_path");
info!(model = %model, "Warmup: starting");
orchestrator
.ensure_running(model)
.await
.map_err(|e| anyhow::anyhow!("warmup: failed to start {}: {}", model, e))?;
info!(model = %model, "Warmup: running inference");
validate::run_warmup_inference(port, &model_path).await?;
info!(model = %model, ?eviction, "Warmup: sleeping");
orchestrator
.sleep_model(model, eviction)
.await
.map_err(|e| anyhow::anyhow!("warmup: failed to sleep {}: {}", model, e))?;
info!(model = %model, "Warmup: complete");
}
info!("Warmup phase complete — all models warmed and sleeping");
Ok(())
}