use anyhow::{Context, Result};
use clap::Parser;
use llmux::Config;
use std::path::PathBuf;
use tokio::net::TcpListener;
use tracing::info;
use tracing_subscriber::EnvFilter;
#[derive(Parser, Debug)]
#[command(name = "llmux")]
#[command(about = "Zero-reload model switching for vLLM")]
struct Args {
#[arg(short, long, default_value = "config.json")]
config: PathBuf,
#[arg(short, long)]
port: Option<u16>,
#[arg(short, long)]
verbose: bool,
#[arg(long, value_name = "MODEL")]
validate: Option<String>,
#[arg(
long,
value_name = "LEVELS",
value_delimiter = ',',
requires = "validate"
)]
levels: Vec<u8>,
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
let filter = if args.verbose {
EnvFilter::new("llmux=debug,onwards=debug,tower_http=debug,vllm=debug")
} else {
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"))
};
tracing_subscriber::fmt()
.with_env_filter(filter)
.with_target(true)
.init();
info!("Starting llmux");
let mut config = Config::from_file(&args.config)
.await
.with_context(|| format!("Failed to load config from {}", args.config.display()))?;
if let Some(port) = args.port {
config.port = port;
}
info!(
models = ?config.models.keys().collect::<Vec<_>>(),
port = config.port,
"Configuration loaded"
);
if let Some(model_name) = args.validate {
let levels = if args.levels.is_empty() {
None
} else {
Some(args.levels)
};
let success =
llmux::validate::run_validation(&config, &model_name, levels.as_deref()).await?;
std::process::exit(if success { 0 } else { 1 });
}
let app = llmux::build_app(config.clone())
.await
.context("Failed to build application")?;
let addr = format!("0.0.0.0:{}", config.port);
let listener = TcpListener::bind(&addr)
.await
.with_context(|| format!("Failed to bind to {}", addr))?;
info!(addr = %addr, "Listening for requests");
axum::serve(listener, app).await.context("Server error")?;
Ok(())
}