oxillama_server/config.rs
1//! Server configuration.
2
3use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7/// Configuration for the OxiLLaMa API server.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct ServerConfig {
10 // ── JWT authentication ────────────────────────────────────────────────
11 /// JWT verifier configuration (not serialized — set programmatically
12 /// at startup from file paths / environment variables).
13 ///
14 /// When `Some`, JWT verification is enabled and takes priority over
15 /// `api_keys` bearer-token auth. When `None`, the existing bearer-key
16 /// path is used.
17 #[serde(skip)]
18 #[cfg(feature = "jwt")]
19 pub jwt: Option<crate::jwt_auth::JwtConfig>,
20 /// Host address to bind to.
21 pub host: String,
22 /// Port number.
23 pub port: u16,
24 /// Maximum concurrent requests.
25 pub max_concurrent: usize,
26 /// Request timeout in seconds.
27 pub timeout_secs: u64,
28 /// Enable CORS headers.
29 pub cors_enabled: bool,
30 /// API keys for authentication (empty = no auth).
31 pub api_keys: Vec<String>,
32 /// Rate limit: maximum burst capacity (0.0 = no limit).
33 pub rate_limit_capacity: f64,
34 /// Rate limit: tokens per second refill rate.
35 pub rate_limit_rate: f64,
36 /// Maximum request body size in bytes (0 = no limit).
37 pub body_limit_bytes: usize,
38 /// Enable the /metrics Prometheus endpoint.
39 pub metrics_enabled: bool,
40 /// Enable structured request tracing middleware.
41 pub structured_tracing: bool,
42
43 // ── Router (multi-model pool) ─────────────────────────────────────────
44 /// Maximum number of concurrently loaded models (0 = 1, single-model mode).
45 pub router_capacity: usize,
46 /// Memory budget for the model pool in MiB (0 = unlimited).
47 pub router_mem_budget_mb: usize,
48 /// Model IDs to pre-load at startup.
49 pub router_preload: Vec<String>,
50
51 // ── Admin API ─────────────────────────────────────────────────────────
52 /// Bearer token required for all `/admin/*` routes.
53 ///
54 /// `None` = token-less mode (admin only accessible from loopback).
55 pub admin_bearer_token: Option<String>,
56 /// Address the admin interface is expected to listen on.
57 /// Used for the startup safety check: non-loopback + no token → fatal error.
58 pub admin_listen: String,
59
60 // ── Batch disk spool ──────────────────────────────────────────────────
61 /// Directory for disk-spooled batch jobs.
62 /// Defaults to `$TMPDIR/oxillama_batch_spool`.
63 pub batch_spool_dir: Option<String>,
64 /// Maximum pending bytes across all queued batch jobs.
65 pub batch_max_pending_bytes: usize,
66
67 // ── Per-API-key rate limiting ─────────────────────────────────────────
68 /// Per-key override map: `api_key → (capacity, rate_per_second)`.
69 ///
70 /// When a request carries an API key that appears in this map, the
71 /// override `(capacity, rate)` pair is used instead of the server
72 /// defaults. Keys absent from this map use `rate_limit_capacity` and
73 /// `rate_limit_rate` as their bucket parameters.
74 ///
75 /// `None` (the default) disables per-key rate limiting entirely.
76 pub per_key_rate_limits: Option<HashMap<String, (f64, f64)>>,
77}
78
79impl Default for ServerConfig {
80 fn default() -> Self {
81 Self {
82 #[cfg(feature = "jwt")]
83 jwt: None,
84
85 host: "127.0.0.1".to_string(),
86 port: 8080,
87 max_concurrent: 64,
88 timeout_secs: 300,
89 cors_enabled: true,
90 api_keys: Vec::new(),
91 rate_limit_capacity: 0.0,
92 rate_limit_rate: 10.0,
93 body_limit_bytes: 10 * 1024 * 1024,
94 metrics_enabled: true,
95 structured_tracing: true,
96
97 router_capacity: 1,
98 router_mem_budget_mb: 0,
99 router_preload: Vec::new(),
100
101 admin_bearer_token: None,
102 admin_listen: "127.0.0.1:8081".to_string(),
103
104 batch_spool_dir: None,
105 batch_max_pending_bytes: 1024 * 1024 * 1024, // 1 GiB
106
107 per_key_rate_limits: None,
108 }
109 }
110}