llmsim 0.4.0

LLM Traffic Simulator - A lightweight, high-performance LLM API simulator
Documentation
//! CLI module for LLMSim server functionality.
//!
//! This module provides the `llmsim serve` command implementation.

mod config;
mod handlers;
mod state;
mod ws_handler;

pub use config::{Config, ConfigError};
pub use state::AppState;
pub use ws_handler::ws_responses;

use crate::script::Script;
use crate::stats::{new_shared_stats, SharedStats};
use axum::{
    routing::{get, post},
    Router,
};
use std::{net::SocketAddr, sync::Arc};
use tokio::signal;
use tower_http::{cors::CorsLayer, trace::TraceLayer};

/// Build the Axum router with all endpoints.
/// Exposed for integration testing.
pub fn build_router(state: Arc<AppState>) -> Router {
    Router::new()
        .route("/health", get(handlers::health))
        .route("/llmsim/stats", get(handlers::get_stats))
        // OpenAI API routes
        .route(
            "/openai/v1/chat/completions",
            post(handlers::chat_completions),
        )
        .route("/openai/v1/models", get(handlers::list_models))
        .route("/openai/v1/models/{model_id}", get(handlers::get_model))
        .route(
            "/openai/v1/responses",
            post(handlers::create_response).get(ws_handler::ws_responses),
        )
        // OpenResponses API routes (https://www.openresponses.org)
        .route(
            "/openresponses/v1/responses",
            post(handlers::create_openresponses_response),
        )
        .layer(TraceLayer::new_for_http())
        .layer(CorsLayer::permissive())
        .with_state(state)
}

/// Run the LLMSim server with the given configuration
pub async fn run_server(config: Config) -> Result<(), Box<dyn std::error::Error>> {
    run_server_with_stats(config, new_shared_stats()).await
}

/// Run the LLMSim server with the given configuration and shared stats
pub async fn run_server_with_stats(
    config: Config,
    stats: SharedStats,
) -> Result<(), Box<dyn std::error::Error>> {
    let addr: SocketAddr = format!("{}:{}", config.server.host, config.server.port)
        .parse()
        .expect("Invalid address");

    tracing::info!("Starting LLMSim server on {}", addr);
    tracing::info!(
        "Configuration: latency={:?}, generator={}, target_tokens={}",
        config.latency.profile.as_deref().unwrap_or("auto"),
        config.response.generator,
        config.response.target_tokens
    );
    tracing::info!("OpenAI endpoints: /openai/v1/...");
    tracing::info!(
        "WebSocket mode: ws://{}:{}/openai/v1/responses",
        config.server.host,
        config.server.port
    );
    tracing::info!(
        "OpenResponses endpoint: /openresponses/v1/responses (https://www.openresponses.org)"
    );
    tracing::info!("Stats endpoint: /llmsim/stats");

    let mut state = AppState::new(config, stats);
    if let Some(script_path) = state.config.response.script_path.clone() {
        let script =
            Script::from_file(&script_path).map_err(|e| -> Box<dyn std::error::Error> {
                Box::new(std::io::Error::other(format!(
                    "Failed to load script from {}: {}",
                    script_path, e
                )))
            })?;
        tracing::info!(
            "Scripted mode enabled: {} turns from {} (on_exhausted={:?})",
            script.len(),
            script_path,
            script.on_exhausted()
        );
        state = state.with_script(Arc::new(script));
    }
    let app = build_router(Arc::new(state));

    let listener = tokio::net::TcpListener::bind(addr).await?;

    axum::serve(listener, app)
        .with_graceful_shutdown(shutdown_signal())
        .await?;

    tracing::info!("Server shutdown complete");
    Ok(())
}

async fn shutdown_signal() {
    let ctrl_c = async {
        signal::ctrl_c()
            .await
            .expect("Failed to install Ctrl+C handler");
    };

    #[cfg(unix)]
    let terminate = async {
        signal::unix::signal(signal::unix::SignalKind::terminate())
            .expect("Failed to install signal handler")
            .recv()
            .await;
    };

    #[cfg(not(unix))]
    let terminate = std::future::pending::<()>();

    tokio::select! {
        _ = ctrl_c => {},
        _ = terminate => {},
    }

    tracing::info!("Shutdown signal received");
}