Skip to main content

nexus/api/
mod.rs

1//! # Core API Gateway
2//!
3//! OpenAI-compatible HTTP endpoints for the Nexus LLM orchestrator.
4//!
5//! This module implements the HTTP API server that provides OpenAI-compatible
6//! endpoints for chat completions, model listing, and health checks.
7//!
8//! ## Endpoints
9//!
10//! - `POST /v1/chat/completions` - Chat completion (non-streaming)
11//! - `GET /v1/models` - List available models from healthy backends
12//! - `GET /health` - System health status with backend counts
13//!
14//! ## Example
15//!
16//! ```no_run
17//! use nexus::api::{AppState, create_router};
18//! use nexus::config::NexusConfig;
19//! use nexus::registry::Registry;
20//! use std::sync::Arc;
21//!
22//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
23//! // Create registry and config
24//! let registry = Arc::new(Registry::new());
25//! let config = Arc::new(NexusConfig::default());
26//!
27//! // Create application state
28//! let state = Arc::new(AppState::new(registry, config));
29//!
30//! // Create router with all endpoints
31//! let app = create_router(state);
32//!
33//! // Start server
34//! let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await?;
35//! axum::serve(listener, app).await?;
36//! # Ok(())
37//! # }
38//! ```
39//!
40//! ## Architecture
41//!
42//! The API Gateway follows a simple request flow:
43//! 1. Request received and parsed into OpenAI-compatible types
44//! 2. Registry queried for backends supporting the requested model
45//! 3. Request proxied to first healthy backend
46//! 4. On failure, retry with next healthy backend (up to max_retries)
47//! 5. Response returned in OpenAI format or error returned
48//!
49//! ## Error Handling
50//!
51//! All errors are returned in OpenAI-compatible format:
52//! ```json
53//! {
54//!   "error": {
55//!     "message": "Model 'gpt-4' not found",
56//!     "type": "invalid_request_error",
57//!     "param": "model",
58//!     "code": "model_not_found"
59//!   }
60//! }
61//! ```
62
63mod completions;
64pub mod embeddings;
65pub mod error;
66pub mod headers;
67mod health;
68pub mod models;
69pub mod types;
70
71pub use types::*;
72
73use crate::config::NexusConfig;
74use crate::dashboard::history::RequestHistory;
75use crate::dashboard::types::WebSocketUpdate;
76use crate::metrics::MetricsCollector;
77use crate::registry::Registry;
78use crate::routing;
79use axum::{
80    routing::{get, post},
81    Router,
82};
83use std::sync::Arc;
84use tokio::sync::broadcast;
85use tower_http::limit::RequestBodyLimitLayer;
86
87use std::time::Instant;
88
89/// Maximum request body size (10 MB).
90const MAX_BODY_SIZE: usize = 10 * 1024 * 1024;
91
92/// Shared application state accessible to all handlers.
93pub struct AppState {
94    pub registry: Arc<Registry>,
95    pub config: Arc<NexusConfig>,
96    pub http_client: reqwest::Client,
97    pub router: Arc<routing::Router>,
98    /// Server startup time for uptime tracking
99    pub start_time: Instant,
100    /// Metrics collector for observability
101    pub metrics_collector: Arc<MetricsCollector>,
102    /// Request history ring buffer for dashboard
103    pub request_history: Arc<RequestHistory>,
104    /// WebSocket broadcast channel for dashboard real-time updates
105    pub ws_broadcast: broadcast::Sender<WebSocketUpdate>,
106    /// Pricing table for cloud cost estimation
107    pub pricing: Arc<crate::agent::pricing::PricingTable>,
108    /// Optional request queue for burst traffic (T030)
109    pub queue: Option<Arc<crate::queue::RequestQueue>>,
110}
111
112impl AppState {
113    /// Create new application state with the given registry and configuration.
114    pub fn new(registry: Arc<Registry>, config: Arc<NexusConfig>) -> Self {
115        let timeout_secs = config.server.request_timeout_seconds;
116
117        let http_client = reqwest::Client::builder()
118            .timeout(std::time::Duration::from_secs(timeout_secs))
119            .pool_max_idle_per_host(10)
120            .build()
121            .expect("Failed to create HTTP client");
122
123        let start_time = Instant::now();
124
125        // Create router from config, compiling traffic policies for privacy enforcement
126        let policy_matcher = crate::config::PolicyMatcher::compile(config.routing.policies.clone())
127            .unwrap_or_else(|e| {
128                tracing::warn!("Failed to compile traffic policies, using defaults: {}", e);
129                crate::config::PolicyMatcher::default()
130            });
131
132        let router = Arc::new(routing::Router::with_aliases_fallbacks_and_policies(
133            Arc::clone(&registry),
134            config.routing.strategy.into(),
135            config.routing.weights.clone().into(),
136            config.routing.aliases.clone(),
137            config.routing.fallbacks.clone(),
138            policy_matcher,
139            config.quality.clone(),
140        ));
141
142        // Initialize metrics (safe to call multiple times - will reuse existing if already set)
143        let prometheus_handle = crate::metrics::setup_metrics().unwrap_or_else(|e| {
144            // If metrics are already initialized (e.g., in tests), create a new handle
145            // by building a recorder without installing it globally
146            tracing::debug!("Metrics already initialized, creating new handle: {}", e);
147            crate::metrics::PrometheusBuilder::new()
148                .build_recorder()
149                .handle()
150        });
151
152        let metrics_collector = Arc::new(MetricsCollector::new(
153            Arc::clone(&registry),
154            start_time,
155            prometheus_handle,
156        ));
157
158        // Create request history ring buffer for dashboard
159        let request_history = Arc::new(RequestHistory::new());
160
161        // Create WebSocket broadcast channel for dashboard real-time updates
162        let (ws_broadcast, _) = broadcast::channel(1000);
163
164        // Create pricing table for cloud cost estimation
165        let pricing = Arc::new(crate::agent::pricing::PricingTable::new());
166
167        Self {
168            registry,
169            config,
170            http_client,
171            router,
172            start_time,
173            metrics_collector,
174            request_history,
175            ws_broadcast,
176            pricing,
177            queue: None,
178        }
179    }
180}
181
182/// Create the main API router with all endpoints configured.
183pub fn create_router(state: Arc<AppState>) -> Router {
184    Router::new()
185        // Dashboard routes
186        .route("/", get(crate::dashboard::dashboard_handler))
187        .route("/assets/*path", get(crate::dashboard::assets_handler))
188        .route("/ws", get(crate::dashboard::websocket_handler))
189        // API routes
190        .route("/v1/chat/completions", post(completions::handle))
191        .route("/v1/embeddings", post(embeddings::handle))
192        .route("/v1/models", get(models::handle))
193        .route("/v1/history", get(crate::dashboard::history_handler))
194        .route("/health", get(health::handle))
195        .route("/metrics", get(crate::metrics::handler::metrics_handler))
196        .route("/v1/stats", get(crate::metrics::handler::stats_handler))
197        .layer(RequestBodyLimitLayer::new(MAX_BODY_SIZE))
198        .with_state(state)
199}