Skip to main content

modelexpress_server/
backend_config.rs

1// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Shared backend selection for distributed stores (Redis and Kubernetes CRDs).
5//!
6//! A single `BackendConfig` type and a single env var (`MX_METADATA_BACKEND`) drive both
7//! the P2P metadata backend and the model registry backend. Deployments that need one
8//! always need the other, so decoupling them would just be surface area without a use
9//! case — the server crashes at startup if either can't connect.
10//!
11//! The trait implementations themselves live in `p2p::backend` and `registry::backend`.
12
13/// Configuration for a distributed backend (Redis or Kubernetes CRDs).
14#[derive(Debug, Clone)]
15pub enum BackendConfig {
16    /// Redis backend — persistent, horizontally scalable
17    Redis { url: String },
18    /// Kubernetes CRD backend — native K8s integration
19    Kubernetes { namespace: String },
20}
21
22impl std::fmt::Display for BackendConfig {
23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24        match self {
25            Self::Redis { .. } => write!(f, "redis"),
26            Self::Kubernetes { .. } => write!(f, "kubernetes"),
27        }
28    }
29}
30
31impl BackendConfig {
32    /// Create backend config from `MX_METADATA_BACKEND`. Used by both the P2P state
33    /// manager and the registry manager so they share a single env-var contract.
34    ///
35    /// Valid values: `redis`, `kubernetes` | `k8s` | `crd`.
36    ///
37    /// Errors if `MX_METADATA_BACKEND` is unset/invalid, or if the connection env for
38    /// the selected backend is missing (no silent fallback to `localhost:6379` /
39    /// `default` namespace, since those mask misconfig in production).
40    pub fn from_env() -> Result<Self, String> {
41        let backend_type = std::env::var("MX_METADATA_BACKEND").unwrap_or_default();
42        match backend_type.to_lowercase().as_str() {
43            "redis" => Ok(Self::Redis {
44                url: Self::redis_url_from_env()?,
45            }),
46            "kubernetes" | "k8s" | "crd" => Ok(Self::Kubernetes {
47                namespace: Self::k8s_namespace_from_env()?,
48            }),
49            other => Err(format!(
50                "MX_METADATA_BACKEND='{other}' is not valid. Use 'redis' or 'kubernetes'."
51            )),
52        }
53    }
54
55    /// Parse a backend type string into a config. Testable without env vars — callers
56    /// supply the connection strings directly.
57    ///
58    /// `env_name` appears in the error message so the caller knows which variable was bad.
59    pub fn from_type_str(
60        env_name: &str,
61        backend_type: &str,
62        redis_url: &str,
63        k8s_namespace: &str,
64    ) -> Result<Self, String> {
65        match backend_type.to_lowercase().as_str() {
66            "redis" => Ok(Self::Redis {
67                url: redis_url.to_string(),
68            }),
69            "kubernetes" | "k8s" | "crd" => Ok(Self::Kubernetes {
70                namespace: k8s_namespace.to_string(),
71            }),
72            other => Err(format!(
73                "{env_name}='{other}' is not valid. Use 'redis' or 'kubernetes'."
74            )),
75        }
76    }
77
78    /// Return the Redis connection URL from env. Accepts either `REDIS_URL` directly, or
79    /// `MX_REDIS_HOST` + `MX_REDIS_PORT` (with `REDIS_HOST` / `REDIS_PORT` fallbacks for
80    /// compatibility with charts that predate the `MX_` prefix). Errors when neither the
81    /// URL nor both host-and-port pieces are provided.
82    pub fn redis_url_from_env() -> Result<String, String> {
83        if let Ok(url) = std::env::var("REDIS_URL") {
84            return Ok(url);
85        }
86        let host = std::env::var("MX_REDIS_HOST")
87            .or_else(|_| std::env::var("REDIS_HOST"))
88            .map_err(|_| {
89                "MX_METADATA_BACKEND=redis requires REDIS_URL or MX_REDIS_HOST (alias \
90                 REDIS_HOST) to be set."
91                    .to_string()
92            })?;
93        let port = std::env::var("MX_REDIS_PORT")
94            .or_else(|_| std::env::var("REDIS_PORT"))
95            .map_err(|_| {
96                "MX_METADATA_BACKEND=redis requires REDIS_URL or MX_REDIS_PORT (alias \
97                 REDIS_PORT) to be set."
98                    .to_string()
99            })?;
100        Ok(format!("redis://{host}:{port}"))
101    }
102
103    /// Return the Kubernetes namespace from env. The downward API exposes
104    /// `POD_NAMESPACE` to in-cluster pods; `MX_METADATA_NAMESPACE` overrides it for
105    /// out-of-cluster operators. Errors when neither is set so a typo in the chart
106    /// can't silently land ModelCacheEntry CRs in the `default` namespace.
107    fn k8s_namespace_from_env() -> Result<String, String> {
108        std::env::var("MX_METADATA_NAMESPACE")
109            .or_else(|_| std::env::var("POD_NAMESPACE"))
110            .map_err(|_| {
111                "MX_METADATA_BACKEND=kubernetes requires MX_METADATA_NAMESPACE or \
112                 POD_NAMESPACE to be set."
113                    .to_string()
114            })
115    }
116}
117
118#[cfg(test)]
119#[allow(clippy::expect_used)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn parses_redis_and_kubernetes_aliases() {
125        let cfg = BackendConfig::from_type_str("X", "redis", "redis://h:1", "ns").expect("redis");
126        assert!(matches!(cfg, BackendConfig::Redis { url } if url == "redis://h:1"));
127
128        for alias in ["kubernetes", "k8s", "crd", "K8S", "Kubernetes"] {
129            let cfg =
130                BackendConfig::from_type_str("X", alias, "redis://h:1", "prod").expect("k8s alias");
131            assert!(matches!(cfg, BackendConfig::Kubernetes { namespace } if namespace == "prod"));
132        }
133    }
134
135    #[test]
136    fn rejects_unknown_and_includes_env_name() {
137        let err = BackendConfig::from_type_str("MX_WHATEVER", "memory", "", "")
138            .expect_err("should reject");
139        assert!(
140            err.contains("MX_WHATEVER"),
141            "error should name the env var: {err}"
142        );
143        assert!(
144            err.contains("'memory'"),
145            "error should echo bad value: {err}"
146        );
147    }
148
149    #[test]
150    fn rejects_empty_backend_type() {
151        let err = BackendConfig::from_type_str("MX_METADATA_BACKEND", "", "", "")
152            .expect_err("empty should reject");
153        assert!(err.contains("''"), "error should echo empty value: {err}");
154    }
155
156    #[test]
157    fn display_renders_backend_name() {
158        let redis = BackendConfig::Redis {
159            url: "redis://host:6379".to_string(),
160        };
161        assert_eq!(redis.to_string(), "redis");
162        let k8s = BackendConfig::Kubernetes {
163            namespace: "prod".to_string(),
164        };
165        assert_eq!(k8s.to_string(), "kubernetes");
166    }
167
168    /// The `from_env` / `redis_url_from_env` / `k8s_namespace_from_env` helpers read
169    /// process-global env vars. Tests that mutate env have to acquire a mutex to stay
170    /// serialized, since `cargo test` runs with multiple threads by default.
171    use modelexpress_common::test_support::{EnvVarGuard, acquire_env_mutex};
172
173    #[test]
174    #[allow(clippy::await_holding_lock)]
175    fn from_env_reads_mx_metadata_backend() {
176        let lock = acquire_env_mutex();
177        let _g1 = EnvVarGuard::set(&lock, "MX_METADATA_BACKEND", "redis");
178        let _g2 = EnvVarGuard::set(&lock, "REDIS_URL", "redis://myhost:7777");
179        let cfg = BackendConfig::from_env().expect("from_env redis");
180        assert!(matches!(cfg, BackendConfig::Redis { url } if url == "redis://myhost:7777"));
181    }
182
183    #[test]
184    #[allow(clippy::await_holding_lock)]
185    fn from_env_accepts_kubernetes_aliases() {
186        let lock = acquire_env_mutex();
187        let _g1 = EnvVarGuard::set(&lock, "MX_METADATA_BACKEND", "k8s");
188        let _g2 = EnvVarGuard::set(&lock, "POD_NAMESPACE", "test-ns");
189        let cfg = BackendConfig::from_env().expect("from_env k8s alias");
190        assert!(matches!(cfg, BackendConfig::Kubernetes { namespace } if namespace == "test-ns"));
191    }
192
193    #[test]
194    #[allow(clippy::await_holding_lock)]
195    fn from_env_errors_when_backend_unset() {
196        let lock = acquire_env_mutex();
197        let _g = EnvVarGuard::remove(&lock, "MX_METADATA_BACKEND");
198        let err = BackendConfig::from_env().expect_err("should reject missing backend");
199        assert!(err.contains("MX_METADATA_BACKEND"));
200    }
201
202    #[test]
203    #[allow(clippy::await_holding_lock)]
204    fn redis_url_from_env_honors_explicit_url_over_host_port() {
205        let lock = acquire_env_mutex();
206        let _g1 = EnvVarGuard::set(&lock, "REDIS_URL", "redis://explicit:1234");
207        let _g2 = EnvVarGuard::set(&lock, "MX_REDIS_HOST", "other");
208        let _g3 = EnvVarGuard::set(&lock, "MX_REDIS_PORT", "9999");
209        assert_eq!(
210            BackendConfig::redis_url_from_env().expect("REDIS_URL wins"),
211            "redis://explicit:1234"
212        );
213    }
214
215    #[test]
216    #[allow(clippy::await_holding_lock)]
217    fn redis_url_from_env_builds_from_host_port_when_url_missing() {
218        let lock = acquire_env_mutex();
219        let _g1 = EnvVarGuard::remove(&lock, "REDIS_URL");
220        let _g2 = EnvVarGuard::set(&lock, "MX_REDIS_HOST", "myhost");
221        let _g3 = EnvVarGuard::set(&lock, "MX_REDIS_PORT", "6380");
222        assert_eq!(
223            BackendConfig::redis_url_from_env().expect("host+port build"),
224            "redis://myhost:6380"
225        );
226    }
227
228    #[test]
229    #[allow(clippy::await_holding_lock)]
230    fn redis_url_from_env_errors_when_host_and_port_missing() {
231        let lock = acquire_env_mutex();
232        let _g1 = EnvVarGuard::remove(&lock, "REDIS_URL");
233        let _g2 = EnvVarGuard::remove(&lock, "MX_REDIS_HOST");
234        let _g3 = EnvVarGuard::remove(&lock, "REDIS_HOST");
235        let _g4 = EnvVarGuard::remove(&lock, "MX_REDIS_PORT");
236        let _g5 = EnvVarGuard::remove(&lock, "REDIS_PORT");
237        let err =
238            BackendConfig::redis_url_from_env().expect_err("should error on missing Redis env");
239        assert!(
240            err.contains("REDIS_URL") && err.contains("MX_REDIS_HOST"),
241            "error should name the required env vars: {err}"
242        );
243    }
244
245    #[test]
246    #[allow(clippy::await_holding_lock)]
247    fn redis_url_from_env_errors_when_port_missing() {
248        let lock = acquire_env_mutex();
249        let _g1 = EnvVarGuard::remove(&lock, "REDIS_URL");
250        let _g2 = EnvVarGuard::set(&lock, "MX_REDIS_HOST", "myhost");
251        let _g3 = EnvVarGuard::remove(&lock, "MX_REDIS_PORT");
252        let _g4 = EnvVarGuard::remove(&lock, "REDIS_PORT");
253        let err = BackendConfig::redis_url_from_env()
254            .expect_err("should error when port is missing even with host set");
255        assert!(
256            err.contains("MX_REDIS_PORT"),
257            "error should name the missing port env var: {err}"
258        );
259    }
260
261    #[test]
262    #[allow(clippy::await_holding_lock)]
263    fn from_env_kubernetes_errors_when_namespace_unset() {
264        let lock = acquire_env_mutex();
265        let _g1 = EnvVarGuard::set(&lock, "MX_METADATA_BACKEND", "kubernetes");
266        let _g2 = EnvVarGuard::remove(&lock, "MX_METADATA_NAMESPACE");
267        let _g3 = EnvVarGuard::remove(&lock, "POD_NAMESPACE");
268        let err = BackendConfig::from_env()
269            .expect_err("kubernetes backend without namespace should reject");
270        assert!(
271            err.contains("MX_METADATA_NAMESPACE") || err.contains("POD_NAMESPACE"),
272            "error should name the namespace env vars: {err}"
273        );
274    }
275}