use std::net::IpAddr;
use std::path::PathBuf;
use serde::Deserialize;
#[derive(Debug, Clone, PartialEq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ModelKind {
Chat,
Embed,
}
#[derive(Debug, Clone, PartialEq, Default, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum RuntimeKind {
#[default]
LlamaServer,
Onnx,
}
#[derive(Debug, Clone, Deserialize)]
pub struct EngineConfig {
pub model_path: String,
pub model_kind: ModelKind,
#[serde(default)]
pub runtime: RuntimeKind,
#[serde(default = "default_warmup")]
pub warm_up: String,
#[serde(default)]
pub gpu_layers: u32,
#[serde(default = "default_threads")]
pub n_threads: u32,
#[serde(default = "default_ctx")]
pub context_len: u32,
pub port: u16,
#[serde(default)]
pub bind_addr: Option<IpAddr>,
#[serde(default)]
pub metrics_port: Option<u16>,
#[serde(default = "default_timeout")]
pub timeout_secs: u64,
#[serde(default = "default_max_tokens")]
pub max_tokens: u32,
#[serde(default = "default_gradatum_url")]
pub gradatum_url: String,
#[serde(default = "default_llama_server_bin")]
pub llama_server_bin: PathBuf,
#[serde(default = "default_child_port")]
pub child_port: u16,
#[serde(default = "default_parallel")]
pub parallel: u32,
#[serde(default)]
pub extra_args: Vec<String>,
#[serde(default)]
pub mmproj_path: Option<PathBuf>,
#[serde(default = "default_startup_timeout")]
pub startup_timeout_secs: u64,
#[serde(default = "default_restart_max")]
pub child_restart_max: u32,
#[serde(default = "default_min_stable_uptime")]
pub min_stable_uptime_secs: u64,
#[serde(default = "default_body_limit")]
pub body_limit_bytes: usize,
}
fn default_warmup() -> String {
"eager".into()
}
fn default_threads() -> u32 {
8
}
fn default_ctx() -> u32 {
32_768
}
fn default_timeout() -> u64 {
120
}
fn default_max_tokens() -> u32 {
512
}
fn default_gradatum_url() -> String {
"http://127.0.0.1:19090".into()
}
fn default_llama_server_bin() -> PathBuf {
PathBuf::from("/usr/local/bin/llama-server")
}
fn default_child_port() -> u16 {
11436
}
fn default_parallel() -> u32 {
4
}
fn default_startup_timeout() -> u64 {
60
}
fn default_restart_max() -> u32 {
3
}
fn default_min_stable_uptime() -> u64 {
30
}
fn default_body_limit() -> usize {
32 * 1024 * 1024 }
#[derive(Deserialize)]
struct Wrapper {
engine: EngineConfig,
}
fn validate_bind_addr(addr: IpAddr) -> Result<(), anyhow::Error> {
match addr {
IpAddr::V4(v4) => check_v4_addr(v4, &addr.to_string())?,
IpAddr::V6(v6) => {
if let Some(v4_mapped) = v6.to_ipv4_mapped() {
check_v4_addr(v4_mapped, &addr.to_string())?;
} else {
if v6.is_unspecified() {
anyhow::bail!(
"bind_addr '::' interdit (C1 fail-closed) — \
bind wildcard IPv6 expose l'engine sur toutes les interfaces. \
Utiliser ::1 (loopback) ou l'IP unicast LAN spécifique."
);
}
if v6.is_multicast() {
anyhow::bail!(
"bind_addr '{addr}' interdit — adresse multicast IPv6 non autorisée."
);
}
}
}
}
Ok(())
}
fn validate_model_prefix(path: &std::path::Path) -> Result<(), anyhow::Error> {
let canonical = path
.canonicalize()
.map_err(|e| anyhow::anyhow!("canonicalize échoué pour {} : {e}", path.display()))?;
if !canonical.starts_with("/opt/gradatum/models/") {
anyhow::bail!(
"chemin doit être sous /opt/gradatum/models/ (P1-6) : {}",
canonical.display()
);
}
Ok(())
}
fn check_v4_addr(v4: std::net::Ipv4Addr, display: &str) -> Result<(), anyhow::Error> {
if v4.is_unspecified() {
anyhow::bail!(
"bind_addr '{display}' interdit (C1 fail-closed) — \
bind wildcard expose l'engine sur toutes les interfaces. \
Utiliser 127.0.0.1 (loopback) ou l'IP unicast LAN spécifique."
);
}
if v4.is_broadcast() {
anyhow::bail!("bind_addr '{display}' interdit — adresse broadcast non autorisée.");
}
if v4.is_multicast() {
anyhow::bail!("bind_addr '{display}' interdit — adresse multicast non autorisée.");
}
Ok(())
}
impl EngineConfig {
pub fn from_toml(s: &str) -> Result<Self, toml::de::Error> {
Ok(toml::from_str::<Wrapper>(s)?.engine)
}
pub fn validate(&self) -> Result<(), anyhow::Error> {
const MAX_BODY_LIMIT_BYTES: usize = 256 * 1024 * 1024;
if self.body_limit_bytes > MAX_BODY_LIMIT_BYTES {
anyhow::bail!(
"body_limit_bytes {} dépasse le plafond {} (256 MiB)",
self.body_limit_bytes,
MAX_BODY_LIMIT_BYTES
);
}
validate_model_prefix(std::path::Path::new(&self.model_path))
.map_err(|e| anyhow::anyhow!("model_path : {e}"))?;
if let Some(mmproj) = &self.mmproj_path {
validate_model_prefix(mmproj).map_err(|e| anyhow::anyhow!("mmproj_path : {e}"))?;
}
if let Some(addr) = self.bind_addr {
validate_bind_addr(addr)?;
}
Ok(())
}
pub fn resolved_bind_addr(&self) -> IpAddr {
self.bind_addr
.unwrap_or(IpAddr::V4(std::net::Ipv4Addr::LOCALHOST))
}
pub fn resolved_metrics_port(&self) -> u16 {
self.metrics_port
.unwrap_or_else(|| self.port.saturating_add(1))
}
pub fn load_local(path: &std::path::Path) -> Result<Self, Box<figment::Error>> {
use figment::{
providers::{Env, Format, Toml},
Figment,
};
let w: Wrapper = Figment::new()
.merge(Toml::file(path))
.merge(Env::prefixed("GRADATUM_ENGINE_"))
.extract()
.map_err(Box::new)?;
Ok(w.engine)
}
pub fn model_alias(&self) -> String {
std::path::Path::new(&self.model_path)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("engine")
.to_string()
}
pub fn provider_alias(&self) -> String {
match self.model_kind {
ModelKind::Chat => "engine-curator".into(),
ModelKind::Embed => "engine-embed".into(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_chat_instance() {
let toml = r#"
[engine]
model_path = "/opt/models/qwen3-4b.gguf"
model_kind = "chat"
warm_up = "eager"
gpu_layers = 0
n_threads = 8
context_len = 32768
port = 11435
"#;
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(c.model_kind, ModelKind::Chat);
assert_eq!(c.gpu_layers, 0);
assert_eq!(c.port, 11435);
assert_eq!(
c.runtime,
RuntimeKind::LlamaServer,
"défaut runtime = llamaserver (PIVOT v2)"
);
assert_eq!(c.timeout_secs, 120, "défaut timeout = 120s");
assert_eq!(
c.gradatum_url, "http://127.0.0.1:19090",
"défaut gradatum_url"
);
}
#[test]
fn rejects_unknown_kind() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"vision\"\nport=1\n";
assert!(EngineConfig::from_toml(toml).is_err());
}
#[test]
fn parses_onnx_runtime_seam() {
let toml =
"[engine]\nmodel_path=\"x\"\nmodel_kind=\"embed\"\nruntime=\"onnx\"\nport=11436\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(c.runtime, RuntimeKind::Onnx);
}
#[test]
fn parses_llamaserver_runtime() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nruntime=\"llamaserver\"\nport=11435\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.runtime,
RuntimeKind::LlamaServer,
"runtime=llamaserver parsé"
);
}
#[test]
fn parses_pivot_v2_fields() {
let toml = r#"
[engine]
model_path = "/opt/gradatum/models/qwen3-4b.gguf"
model_kind = "chat"
port = 11435
child_port = 11436
parallel = 4
startup_timeout_secs = 90
child_restart_max = 5
extra_args = ["--flash-attn"]
"#;
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(c.child_port, 11436, "child_port parsé");
assert_eq!(c.parallel, 4, "parallel parsé");
assert_eq!(c.startup_timeout_secs, 90, "startup_timeout_secs parsé");
assert_eq!(c.child_restart_max, 5, "child_restart_max parsé");
assert_eq!(c.extra_args, vec!["--flash-attn"], "extra_args parsé");
}
#[test]
fn defaults_pivot_v2_fields() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.llama_server_bin,
std::path::PathBuf::from("/usr/local/bin/llama-server"),
"défaut llama_server_bin"
);
assert_eq!(c.child_port, 11436, "défaut child_port");
assert_eq!(c.parallel, 4, "défaut parallel");
assert_eq!(c.startup_timeout_secs, 60, "défaut startup_timeout_secs");
assert_eq!(c.child_restart_max, 3, "défaut child_restart_max");
assert!(c.extra_args.is_empty(), "défaut extra_args = vide");
assert_eq!(
c.min_stable_uptime_secs, 30,
"défaut min_stable_uptime_secs = 30s"
);
}
#[test]
fn parses_min_stable_uptime_secs() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nmin_stable_uptime_secs=60\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.min_stable_uptime_secs, 60,
"min_stable_uptime_secs parsé depuis TOML"
);
}
#[test]
fn validate_rejects_model_path_outside_prefix() {
let tmp_path = "/tmp/gradatum-engine-test-model.gguf";
let _ = std::fs::write(tmp_path, b"fake-gguf");
let toml =
format!("[engine]\nmodel_path=\"{tmp_path}\"\nmodel_kind=\"chat\"\nport=11435\n");
let c = EngineConfig::from_toml(&toml).unwrap();
let result = c.validate();
let _ = std::fs::remove_file(tmp_path);
assert!(
result.is_err(),
"validate() doit rejeter un model_path hors /opt/gradatum/models/"
);
}
#[test]
fn validate_rejects_nonexistent_model_path() {
let toml =
"[engine]\nmodel_path=\"/opt/gradatum/models/does-not-exist.gguf\"\nmodel_kind=\"chat\"\nport=11435\n";
let c = EngineConfig::from_toml(toml).unwrap();
let result = c.validate();
assert!(
result.is_err(),
"validate() doit rejeter un model_path non-existant (canonicalize échoue)"
);
}
#[test]
fn bind_addr_default_is_none_resolves_to_loopback() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert!(c.bind_addr.is_none(), "défaut bind_addr = None");
let resolved = c.resolved_bind_addr();
assert_eq!(
resolved,
IpAddr::V4(std::net::Ipv4Addr::LOCALHOST),
"bind_addr None → résolu 127.0.0.1"
);
}
#[test]
fn bind_addr_loopback_explicit_ok() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbind_addr=\"127.0.0.1\"\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(c.bind_addr, Some(IpAddr::V4(std::net::Ipv4Addr::LOCALHOST)));
assert!(
validate_bind_addr(c.bind_addr.unwrap()).is_ok(),
"127.0.0.1 doit être accepté"
);
}
#[test]
fn bind_addr_routable_unicast_accepted() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbind_addr=\"203.0.113.5\"\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert!(
validate_bind_addr(c.bind_addr.unwrap()).is_ok(),
"203.0.113.5 (unicast routable) doit être accepté"
);
}
#[test]
fn bind_addr_0_0_0_0_rejected() {
let toml =
"[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbind_addr=\"0.0.0.0\"\n";
let c = EngineConfig::from_toml(toml).unwrap();
let result = validate_bind_addr(c.bind_addr.unwrap());
assert!(result.is_err(), "0.0.0.0 doit être rejeté (C1 fail-closed)");
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("0.0.0.0") && msg.contains("interdit"),
"message d'erreur doit citer 0.0.0.0 et 'interdit' : {msg}"
);
}
#[test]
fn bind_addr_ipv6_unspecified_rejected() {
let toml =
"[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbind_addr=\"::\"\n";
let c = EngineConfig::from_toml(toml).unwrap();
let result = validate_bind_addr(c.bind_addr.unwrap());
assert!(result.is_err(), ":: doit être rejeté (C1 fail-closed)");
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("::") && msg.contains("interdit"),
"message d'erreur doit citer :: et 'interdit' : {msg}"
);
}
#[test]
fn bind_addr_multicast_rejected() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbind_addr=\"224.0.0.1\"\n";
let c = EngineConfig::from_toml(toml).unwrap();
let result = validate_bind_addr(c.bind_addr.unwrap());
assert!(result.is_err(), "adresse multicast doit être rejetée");
}
#[test]
fn bind_addr_ipv4_mapped_unspecified_rejected() {
let addr: IpAddr = "::ffff:0.0.0.0".parse().unwrap();
let result = validate_bind_addr(addr);
assert!(
result.is_err(),
"::ffff:0.0.0.0 (IPv4-mapped unspecified) doit être rejeté (P0 fail-closed)"
);
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("interdit"),
"message d'erreur doit contenir 'interdit' : {msg}"
);
}
#[test]
fn bind_addr_broadcast_rejected() {
let addr: IpAddr = "255.255.255.255".parse().unwrap();
let result = validate_bind_addr(addr);
assert!(
result.is_err(),
"255.255.255.255 (broadcast) doit être rejeté"
);
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("interdit"),
"message doit contenir 'interdit' : {msg}"
);
}
#[test]
fn bind_addr_ipv4_mapped_broadcast_rejected() {
let addr: IpAddr = "::ffff:255.255.255.255".parse().unwrap();
let result = validate_bind_addr(addr);
assert!(
result.is_err(),
"::ffff:255.255.255.255 (broadcast mappé) doit être rejeté"
);
}
#[test]
fn bind_addr_ipv4_mapped_unicast_accepted() {
let addr: IpAddr = "::ffff:203.0.113.5".parse().unwrap();
let result = validate_bind_addr(addr);
assert!(
result.is_ok(),
"::ffff:203.0.113.5 (unicast mappé) doit être accepté : {result:?}"
);
}
#[test]
fn metrics_port_default_is_port_plus_one() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.resolved_metrics_port(),
11436,
"metrics_port défaut = port + 1"
);
}
#[test]
fn metrics_port_explicit_config() {
let toml =
"[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nmetrics_port=19091\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.resolved_metrics_port(),
19091,
"metrics_port configuré parsé"
);
}
#[test]
fn parses_timeout_secs() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=1\ntimeout_secs=60\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(c.timeout_secs, 60);
}
#[test]
fn parses_max_tokens_with_default() {
let toml_default = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=1\n";
let c = EngineConfig::from_toml(toml_default).unwrap();
assert_eq!(c.max_tokens, 512, "défaut max_tokens = 512");
let toml_custom =
"[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=1\nmax_tokens=256\n";
let c2 = EngineConfig::from_toml(toml_custom).unwrap();
assert_eq!(c2.max_tokens, 256, "max_tokens custom parsé");
}
#[test]
fn parses_gradatum_url() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=1\ngradatum_url=\"http://127.0.0.1:19090\"\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(c.gradatum_url, "http://127.0.0.1:19090");
}
#[test]
fn model_alias_from_path() {
let toml = "[engine]\nmodel_path=\"/opt/gradatum/models/qwen3-4b.gguf\"\nmodel_kind=\"chat\"\nport=1\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(c.model_alias(), "qwen3-4b");
}
#[test]
fn provider_alias_chat_embed() {
let toml_chat = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=1\n";
let toml_embed = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"embed\"\nport=1\n";
assert_eq!(
EngineConfig::from_toml(toml_chat).unwrap().provider_alias(),
"engine-curator"
);
assert_eq!(
EngineConfig::from_toml(toml_embed)
.unwrap()
.provider_alias(),
"engine-embed"
);
}
#[test]
fn parses_mmproj_path() {
let toml = r#"
[engine]
model_path = "/opt/gradatum/models/qwen3-35b.gguf"
model_kind = "chat"
port = 8080
mmproj_path = "/opt/gradatum/models/mmproj-F16.gguf"
"#;
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.mmproj_path,
Some(std::path::PathBuf::from(
"/opt/gradatum/models/mmproj-F16.gguf"
)),
"mmproj_path parsé depuis le TOML"
);
}
#[test]
fn mmproj_path_default_is_none() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert!(c.mmproj_path.is_none(), "défaut mmproj_path = None");
}
#[test]
fn validate_rejects_mmproj_outside_prefix() {
let model = "/tmp/gradatum-engine-mmproj-test-model.gguf";
let mmproj = "/tmp/gradatum-engine-mmproj-test-proj.gguf";
let _ = std::fs::write(model, b"fake");
let _ = std::fs::write(mmproj, b"fake");
let result = super::validate_model_prefix(std::path::Path::new(mmproj));
let _ = std::fs::remove_file(model);
let _ = std::fs::remove_file(mmproj);
assert!(
result.is_err(),
"validate_model_prefix doit rejeter un chemin hors /opt/gradatum/models/"
);
}
#[test]
fn body_limit_bytes_default_is_32_mib() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.body_limit_bytes,
32 * 1024 * 1024,
"défaut body_limit_bytes = 32 MiB (images vision base64 dépassent 1 MiB)"
);
}
#[test]
fn parses_body_limit_bytes() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbody_limit_bytes=8388608\n";
let c = EngineConfig::from_toml(toml).unwrap();
assert_eq!(
c.body_limit_bytes, 8_388_608,
"body_limit_bytes custom = 8 MiB parsé"
);
}
#[test]
fn validate_rejects_body_limit_over_cap() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbody_limit_bytes=268435457\n"; let c = EngineConfig::from_toml(toml).unwrap();
let result = c.validate();
assert!(
result.is_err(),
"body_limit_bytes > 256 MiB doit être rejeté"
);
assert!(
result.unwrap_err().to_string().contains("256 MiB"),
"message doit citer le plafond"
);
}
#[test]
fn validate_accepts_body_limit_at_cap_boundary() {
let toml = "[engine]\nmodel_path=\"x\"\nmodel_kind=\"chat\"\nport=11435\nbody_limit_bytes=268435456\n"; let c = EngineConfig::from_toml(toml).unwrap();
let err = c.validate().unwrap_err().to_string();
assert!(
!err.contains("256 MiB"),
"256 MiB pile ne doit PAS être rejeté par le cap (erreur = model_path)"
);
}
}