nexus/api/mod.rs
1//! # Core API Gateway
2//!
3//! OpenAI-compatible HTTP endpoints for the Nexus LLM orchestrator.
4//!
5//! This module implements the HTTP API server that provides OpenAI-compatible
6//! endpoints for chat completions, model listing, and health checks.
7//!
8//! ## Endpoints
9//!
10//! - `POST /v1/chat/completions` - Chat completion (non-streaming)
11//! - `GET /v1/models` - List available models from healthy backends
12//! - `GET /health` - System health status with backend counts
13//!
14//! ## Example
15//!
16//! ```no_run
17//! use nexus::api::{AppState, create_router};
18//! use nexus::config::NexusConfig;
19//! use nexus::registry::Registry;
20//! use std::sync::Arc;
21//!
22//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
23//! // Create registry and config
24//! let registry = Arc::new(Registry::new());
25//! let config = Arc::new(NexusConfig::default());
26//!
27//! // Create application state
28//! let state = Arc::new(AppState::new(registry, config));
29//!
30//! // Create router with all endpoints
31//! let app = create_router(state);
32//!
33//! // Start server
34//! let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await?;
35//! axum::serve(listener, app).await?;
36//! # Ok(())
37//! # }
38//! ```
39//!
40//! ## Architecture
41//!
42//! The API Gateway follows a simple request flow:
43//! 1. Request received and parsed into OpenAI-compatible types
44//! 2. Registry queried for backends supporting the requested model
45//! 3. Request proxied to first healthy backend
46//! 4. On failure, retry with next healthy backend (up to max_retries)
47//! 5. Response returned in OpenAI format or error returned
48//!
49//! ## Error Handling
50//!
51//! All errors are returned in OpenAI-compatible format:
52//! ```json
53//! {
54//! "error": {
55//! "message": "Model 'gpt-4' not found",
56//! "type": "invalid_request_error",
57//! "param": "model",
58//! "code": "model_not_found"
59//! }
60//! }
61//! ```
62
63mod completions;
64pub mod embeddings;
65pub mod error;
66pub mod headers;
67mod health;
68pub mod models;
69pub mod types;
70
71pub use types::*;
72
73use crate::config::NexusConfig;
74use crate::dashboard::history::RequestHistory;
75use crate::dashboard::types::WebSocketUpdate;
76use crate::metrics::MetricsCollector;
77use crate::registry::Registry;
78use crate::routing;
79use axum::{
80 routing::{get, post},
81 Router,
82};
83use std::sync::Arc;
84use tokio::sync::broadcast;
85use tower_http::limit::RequestBodyLimitLayer;
86
87use std::time::Instant;
88
89/// Maximum request body size (10 MB).
90const MAX_BODY_SIZE: usize = 10 * 1024 * 1024;
91
92/// Shared application state accessible to all handlers.
93pub struct AppState {
94 pub registry: Arc<Registry>,
95 pub config: Arc<NexusConfig>,
96 pub http_client: reqwest::Client,
97 pub router: Arc<routing::Router>,
98 /// Server startup time for uptime tracking
99 pub start_time: Instant,
100 /// Metrics collector for observability
101 pub metrics_collector: Arc<MetricsCollector>,
102 /// Request history ring buffer for dashboard
103 pub request_history: Arc<RequestHistory>,
104 /// WebSocket broadcast channel for dashboard real-time updates
105 pub ws_broadcast: broadcast::Sender<WebSocketUpdate>,
106 /// Pricing table for cloud cost estimation
107 pub pricing: Arc<crate::agent::pricing::PricingTable>,
108 /// Optional request queue for burst traffic (T030)
109 pub queue: Option<Arc<crate::queue::RequestQueue>>,
110}
111
112impl AppState {
113 /// Create new application state with the given registry and configuration.
114 pub fn new(registry: Arc<Registry>, config: Arc<NexusConfig>) -> Self {
115 let timeout_secs = config.server.request_timeout_seconds;
116
117 let http_client = reqwest::Client::builder()
118 .timeout(std::time::Duration::from_secs(timeout_secs))
119 .pool_max_idle_per_host(10)
120 .build()
121 .expect("Failed to create HTTP client");
122
123 let start_time = Instant::now();
124
125 // Create router from config, compiling traffic policies for privacy enforcement
126 let policy_matcher = crate::config::PolicyMatcher::compile(config.routing.policies.clone())
127 .unwrap_or_else(|e| {
128 tracing::warn!("Failed to compile traffic policies, using defaults: {}", e);
129 crate::config::PolicyMatcher::default()
130 });
131
132 let router = Arc::new(routing::Router::with_aliases_fallbacks_and_policies(
133 Arc::clone(®istry),
134 config.routing.strategy.into(),
135 config.routing.weights.clone().into(),
136 config.routing.aliases.clone(),
137 config.routing.fallbacks.clone(),
138 policy_matcher,
139 config.quality.clone(),
140 ));
141
142 // Initialize metrics (safe to call multiple times - will reuse existing if already set)
143 let prometheus_handle = crate::metrics::setup_metrics().unwrap_or_else(|e| {
144 // If metrics are already initialized (e.g., in tests), create a new handle
145 // by building a recorder without installing it globally
146 tracing::debug!("Metrics already initialized, creating new handle: {}", e);
147 crate::metrics::PrometheusBuilder::new()
148 .build_recorder()
149 .handle()
150 });
151
152 let metrics_collector = Arc::new(MetricsCollector::new(
153 Arc::clone(®istry),
154 start_time,
155 prometheus_handle,
156 ));
157
158 // Create request history ring buffer for dashboard
159 let request_history = Arc::new(RequestHistory::new());
160
161 // Create WebSocket broadcast channel for dashboard real-time updates
162 let (ws_broadcast, _) = broadcast::channel(1000);
163
164 // Create pricing table for cloud cost estimation
165 let pricing = Arc::new(crate::agent::pricing::PricingTable::new());
166
167 Self {
168 registry,
169 config,
170 http_client,
171 router,
172 start_time,
173 metrics_collector,
174 request_history,
175 ws_broadcast,
176 pricing,
177 queue: None,
178 }
179 }
180}
181
182/// Create the main API router with all endpoints configured.
183pub fn create_router(state: Arc<AppState>) -> Router {
184 Router::new()
185 // Dashboard routes
186 .route("/", get(crate::dashboard::dashboard_handler))
187 .route("/assets/*path", get(crate::dashboard::assets_handler))
188 .route("/ws", get(crate::dashboard::websocket_handler))
189 // API routes
190 .route("/v1/chat/completions", post(completions::handle))
191 .route("/v1/embeddings", post(embeddings::handle))
192 .route("/v1/models", get(models::handle))
193 .route("/v1/history", get(crate::dashboard::history_handler))
194 .route("/health", get(health::handle))
195 .route("/metrics", get(crate::metrics::handler::metrics_handler))
196 .route("/v1/stats", get(crate::metrics::handler::stats_handler))
197 .layer(RequestBodyLimitLayer::new(MAX_BODY_SIZE))
198 .with_state(state)
199}