Skip to main content

oxillama_server/
config.rs

1//! Server configuration.
2
3use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7/// Configuration for the OxiLLaMa API server.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct ServerConfig {
10    // ── JWT authentication ────────────────────────────────────────────────
11    /// JWT verifier configuration (not serialized — set programmatically
12    /// at startup from file paths / environment variables).
13    ///
14    /// When `Some`, JWT verification is enabled and takes priority over
15    /// `api_keys` bearer-token auth.  When `None`, the existing bearer-key
16    /// path is used.
17    #[serde(skip)]
18    #[cfg(feature = "jwt")]
19    pub jwt: Option<crate::jwt_auth::JwtConfig>,
20    /// Host address to bind to.
21    pub host: String,
22    /// Port number.
23    pub port: u16,
24    /// Maximum concurrent requests.
25    pub max_concurrent: usize,
26    /// Request timeout in seconds.
27    pub timeout_secs: u64,
28    /// Enable CORS headers.
29    pub cors_enabled: bool,
30    /// API keys for authentication (empty = no auth).
31    pub api_keys: Vec<String>,
32    /// Rate limit: maximum burst capacity (0.0 = no limit).
33    pub rate_limit_capacity: f64,
34    /// Rate limit: tokens per second refill rate.
35    pub rate_limit_rate: f64,
36    /// Maximum request body size in bytes (0 = no limit).
37    pub body_limit_bytes: usize,
38    /// Enable the /metrics Prometheus endpoint.
39    pub metrics_enabled: bool,
40    /// Enable structured request tracing middleware.
41    pub structured_tracing: bool,
42
43    // ── Router (multi-model pool) ─────────────────────────────────────────
44    /// Maximum number of concurrently loaded models (0 = 1, single-model mode).
45    pub router_capacity: usize,
46    /// Memory budget for the model pool in MiB (0 = unlimited).
47    pub router_mem_budget_mb: usize,
48    /// Model IDs to pre-load at startup.
49    pub router_preload: Vec<String>,
50
51    // ── Admin API ─────────────────────────────────────────────────────────
52    /// Bearer token required for all `/admin/*` routes.
53    ///
54    /// `None` = token-less mode (admin only accessible from loopback).
55    pub admin_bearer_token: Option<String>,
56    /// Address the admin interface is expected to listen on.
57    /// Used for the startup safety check: non-loopback + no token → fatal error.
58    pub admin_listen: String,
59
60    // ── Batch disk spool ──────────────────────────────────────────────────
61    /// Directory for disk-spooled batch jobs.
62    /// Defaults to `$TMPDIR/oxillama_batch_spool`.
63    pub batch_spool_dir: Option<String>,
64    /// Maximum pending bytes across all queued batch jobs.
65    pub batch_max_pending_bytes: usize,
66
67    // ── Per-API-key rate limiting ─────────────────────────────────────────
68    /// Per-key override map: `api_key → (capacity, rate_per_second)`.
69    ///
70    /// When a request carries an API key that appears in this map, the
71    /// override `(capacity, rate)` pair is used instead of the server
72    /// defaults.  Keys absent from this map use `rate_limit_capacity` and
73    /// `rate_limit_rate` as their bucket parameters.
74    ///
75    /// `None` (the default) disables per-key rate limiting entirely.
76    pub per_key_rate_limits: Option<HashMap<String, (f64, f64)>>,
77}
78
79impl Default for ServerConfig {
80    fn default() -> Self {
81        Self {
82            #[cfg(feature = "jwt")]
83            jwt: None,
84
85            host: "127.0.0.1".to_string(),
86            port: 8080,
87            max_concurrent: 64,
88            timeout_secs: 300,
89            cors_enabled: true,
90            api_keys: Vec::new(),
91            rate_limit_capacity: 0.0,
92            rate_limit_rate: 10.0,
93            body_limit_bytes: 10 * 1024 * 1024,
94            metrics_enabled: true,
95            structured_tracing: true,
96
97            router_capacity: 1,
98            router_mem_budget_mb: 0,
99            router_preload: Vec::new(),
100
101            admin_bearer_token: None,
102            admin_listen: "127.0.0.1:8081".to_string(),
103
104            batch_spool_dir: None,
105            batch_max_pending_bytes: 1024 * 1024 * 1024, // 1 GiB
106
107            per_key_rate_limits: None,
108        }
109    }
110}