use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatticeConfig {
pub role: NodeRole,
pub quorum: QuorumConfig,
pub api: ApiConfig,
pub storage: StorageConfig,
pub telemetry: TelemetryConfig,
#[serde(default)]
pub federation: Option<FederationConfig>,
#[serde(default)]
pub node_agent: Option<NodeAgentConfig>,
#[serde(default)]
pub network: Option<NetworkConfig>,
#[serde(default)]
pub checkpoint: Option<CheckpointConfig>,
#[serde(default)]
pub scheduling: Option<SchedulingConfig>,
#[serde(default)]
pub accounting: Option<AccountingConfig>,
#[serde(default)]
pub rate_limit: Option<RateLimitConfig>,
#[serde(default)]
pub compat: Option<CompatConfig>,
#[serde(default)]
pub identity: Option<IdentityConfig>,
#[serde(default)]
pub vault: Option<VaultConfig>,
}
impl Default for LatticeConfig {
fn default() -> Self {
Self {
role: NodeRole::Combined,
quorum: QuorumConfig::default(),
api: ApiConfig {
grpc_address: "0.0.0.0:50051".to_string(),
rest_address: Some("0.0.0.0:8080".to_string()),
oidc_issuer: String::new(),
oidc_client_id: None,
tls_cert: None,
tls_key: None,
tls_ca: None,
bind_network: BindNetwork::Any,
oidc_hmac_secret: None,
},
storage: StorageConfig {
vast_api_url: None,
vast_username: None,
vast_password: None,
vast_timeout_secs: default_vast_timeout(),
s3_endpoint: String::new(),
nfs_home_path: "/home".to_string(),
local_scratch_path: "/scratch".to_string(),
},
telemetry: TelemetryConfig {
default_mode: "prod".to_string(),
tsdb_endpoint: String::new(),
prod_interval_seconds: 30,
ebpf_programs_path: PathBuf::from("/opt/lattice/ebpf"),
},
federation: None,
node_agent: None,
network: None,
checkpoint: None,
scheduling: None,
accounting: None,
rate_limit: None,
compat: None,
identity: None,
vault: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NodeRole {
QuorumMember,
ComputeNode,
Combined,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuorumConfig {
pub node_id: u64,
pub peers: Vec<PeerConfig>,
pub election_timeout_ms: u64,
pub heartbeat_interval_ms: u64,
pub snapshot_threshold: u64,
#[serde(default = "default_raft_listen_address")]
pub raft_listen_address: String,
#[serde(default)]
pub data_dir: Option<PathBuf>,
#[serde(default = "default_bind_network")]
pub bind_network: BindNetwork,
#[serde(default)]
pub audit_signing_key_path: Option<PathBuf>,
#[serde(default)]
pub bootstrap: bool,
}
fn default_raft_listen_address() -> String {
"0.0.0.0:9000".to_string()
}
fn default_bind_network() -> BindNetwork {
BindNetwork::Any
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum BindNetwork {
Hsn,
Management,
Any,
}
impl Default for QuorumConfig {
fn default() -> Self {
Self {
node_id: 1,
peers: Vec::new(),
election_timeout_ms: 500,
heartbeat_interval_ms: 100,
snapshot_threshold: 10000,
raft_listen_address: default_raft_listen_address(),
data_dir: None,
bind_network: BindNetwork::Any,
audit_signing_key_path: None,
bootstrap: false,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PeerConfig {
pub id: u64,
pub address: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiConfig {
pub grpc_address: String,
pub rest_address: Option<String>,
pub oidc_issuer: String,
#[serde(default)]
pub oidc_client_id: Option<String>,
pub tls_cert: Option<PathBuf>,
pub tls_key: Option<PathBuf>,
#[serde(default)]
pub tls_ca: Option<PathBuf>,
#[serde(default = "default_bind_network")]
pub bind_network: BindNetwork,
#[serde(default)]
pub oidc_hmac_secret: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StorageConfig {
pub vast_api_url: Option<String>,
#[serde(default)]
pub vast_username: Option<String>,
#[serde(default)]
pub vast_password: Option<String>,
#[serde(default = "default_vast_timeout")]
pub vast_timeout_secs: u64,
pub s3_endpoint: String,
pub nfs_home_path: String,
pub local_scratch_path: String,
}
fn default_vast_timeout() -> u64 {
30
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TelemetryConfig {
pub default_mode: String, pub tsdb_endpoint: String,
pub prod_interval_seconds: u64,
pub ebpf_programs_path: PathBuf,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FederationConfig {
pub sovra_endpoint: String,
pub workspace_id: String,
pub broker_address: String,
pub peers: Vec<FederationPeer>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FederationPeer {
pub name: String,
pub broker_address: String,
pub sovra_workspace_id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeAgentConfig {
pub heartbeat_interval_seconds: u64,
pub heartbeat_timeout_seconds: u64,
pub grace_period_seconds: u64,
pub sensitive_grace_period_seconds: u64,
#[serde(default = "default_bind_network")]
pub bind_network: BindNetwork,
}
impl Default for NodeAgentConfig {
fn default() -> Self {
Self {
heartbeat_interval_seconds: 10,
heartbeat_timeout_seconds: 30,
grace_period_seconds: 120,
sensitive_grace_period_seconds: 600,
bind_network: BindNetwork::Any,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NetworkConfig {
pub vni_pool_start: u32,
pub vni_pool_end: u32,
}
impl Default for NetworkConfig {
fn default() -> Self {
Self {
vni_pool_start: 100,
vni_pool_end: 4095,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CheckpointConfig {
pub evaluation_interval_seconds: u64,
pub checkpoint_timeout_seconds: u64,
pub max_deferral_seconds: u64,
}
impl Default for CheckpointConfig {
fn default() -> Self {
Self {
evaluation_interval_seconds: 30,
checkpoint_timeout_seconds: 300,
max_deferral_seconds: 60,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SchedulingConfig {
pub cycle_interval_seconds: u64,
pub backfill_depth: u32,
#[serde(default = "default_budget_period_days")]
pub budget_period_days: u32,
}
fn default_budget_period_days() -> u32 {
90
}
impl Default for SchedulingConfig {
fn default() -> Self {
Self {
cycle_interval_seconds: 5,
backfill_depth: 100,
budget_period_days: default_budget_period_days(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AccountingConfig {
pub enabled: bool,
pub waldur_api_url: String,
pub waldur_token: String,
pub push_interval_seconds: u64,
pub buffer_size: u32,
}
impl Default for AccountingConfig {
fn default() -> Self {
Self {
enabled: false,
waldur_api_url: String::new(),
waldur_token: String::new(),
push_interval_seconds: 60,
buffer_size: 1000,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RateLimitConfig {
pub attach_max_concurrent: u32,
pub stream_logs_max_concurrent: u32,
pub query_metrics_per_minute: u32,
pub stream_metrics_max_concurrent: u32,
pub diagnostics_per_minute: u32,
pub compare_per_minute: u32,
}
impl Default for RateLimitConfig {
fn default() -> Self {
Self {
attach_max_concurrent: 5,
stream_logs_max_concurrent: 10,
query_metrics_per_minute: 60,
stream_metrics_max_concurrent: 5,
diagnostics_per_minute: 10,
compare_per_minute: 10,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompatConfig {
pub set_slurm_env: bool,
pub partition_mapping: HashMap<String, String>,
pub qos_mapping: HashMap<String, u32>,
}
impl Default for CompatConfig {
fn default() -> Self {
Self {
set_slurm_env: true,
partition_mapping: HashMap::new(),
qos_mapping: HashMap::new(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IdentityConfig {
#[serde(default = "default_spire_socket")]
pub spire_socket: String,
#[serde(default)]
pub signing_endpoint: Option<String>,
#[serde(default)]
pub bootstrap_cert: Option<String>,
#[serde(default)]
pub bootstrap_key: Option<String>,
#[serde(default)]
pub bootstrap_ca: Option<String>,
#[serde(default = "default_cert_lifetime")]
pub cert_lifetime_seconds: u64,
}
fn default_spire_socket() -> String {
"/run/spire/agent.sock".to_string()
}
fn default_cert_lifetime() -> u64 {
259_200 }
impl Default for IdentityConfig {
fn default() -> Self {
Self {
spire_socket: default_spire_socket(),
signing_endpoint: None,
bootstrap_cert: None,
bootstrap_key: None,
bootstrap_ca: None,
cert_lifetime_seconds: default_cert_lifetime(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VaultConfig {
pub address: String,
#[serde(default = "default_vault_prefix")]
pub prefix: String,
pub role_id: String,
#[serde(default = "default_vault_secret_id_env")]
pub secret_id_env: String,
#[serde(default)]
pub tls_ca_path: Option<PathBuf>,
#[serde(default = "default_vault_timeout")]
pub timeout_secs: u64,
}
fn default_vault_prefix() -> String {
"secret/data/lattice".to_string()
}
fn default_vault_secret_id_env() -> String {
"VAULT_SECRET_ID".to_string()
}
fn default_vault_timeout() -> u64 {
10
}
impl Default for VaultConfig {
fn default() -> Self {
Self {
address: String::new(),
prefix: default_vault_prefix(),
role_id: String::new(),
secret_id_env: default_vault_secret_id_env(),
tls_ca_path: None,
timeout_secs: default_vault_timeout(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn node_agent_config_defaults() {
let cfg = NodeAgentConfig::default();
assert_eq!(cfg.heartbeat_interval_seconds, 10);
assert_eq!(cfg.heartbeat_timeout_seconds, 30);
assert_eq!(cfg.grace_period_seconds, 120);
assert_eq!(cfg.sensitive_grace_period_seconds, 600);
}
#[test]
fn network_config_defaults() {
let cfg = NetworkConfig::default();
assert_eq!(cfg.vni_pool_start, 100);
assert_eq!(cfg.vni_pool_end, 4095);
assert!(cfg.vni_pool_start < cfg.vni_pool_end);
}
#[test]
fn checkpoint_config_defaults() {
let cfg = CheckpointConfig::default();
assert_eq!(cfg.evaluation_interval_seconds, 30);
assert_eq!(cfg.checkpoint_timeout_seconds, 300);
assert_eq!(cfg.max_deferral_seconds, 60);
assert!(cfg.max_deferral_seconds < cfg.checkpoint_timeout_seconds);
}
#[test]
fn scheduling_config_defaults() {
let cfg = SchedulingConfig::default();
assert_eq!(cfg.cycle_interval_seconds, 5);
assert_eq!(cfg.backfill_depth, 100);
assert_eq!(cfg.budget_period_days, 90);
}
#[test]
fn accounting_config_defaults_disabled() {
let cfg = AccountingConfig::default();
assert!(!cfg.enabled);
assert_eq!(cfg.push_interval_seconds, 60);
assert_eq!(cfg.buffer_size, 1000);
}
#[test]
fn rate_limit_config_defaults() {
let cfg = RateLimitConfig::default();
assert_eq!(cfg.attach_max_concurrent, 5);
assert_eq!(cfg.stream_logs_max_concurrent, 10);
assert_eq!(cfg.query_metrics_per_minute, 60);
assert_eq!(cfg.stream_metrics_max_concurrent, 5);
assert_eq!(cfg.diagnostics_per_minute, 10);
assert_eq!(cfg.compare_per_minute, 10);
}
#[test]
fn compat_config_defaults() {
let cfg = CompatConfig::default();
assert!(cfg.set_slurm_env);
assert!(cfg.partition_mapping.is_empty());
assert!(cfg.qos_mapping.is_empty());
}
#[test]
fn identity_config_defaults() {
let cfg = IdentityConfig::default();
assert_eq!(cfg.spire_socket, "/run/spire/agent.sock");
assert!(cfg.signing_endpoint.is_none());
assert!(cfg.bootstrap_cert.is_none());
assert!(cfg.bootstrap_key.is_none());
assert!(cfg.bootstrap_ca.is_none());
assert_eq!(cfg.cert_lifetime_seconds, 259_200);
}
#[test]
fn identity_config_deserializes() {
let yaml = r#"
spire_socket: /custom/spire.sock
signing_endpoint: https://quorum:9443
bootstrap_cert: /etc/lattice/cert.pem
bootstrap_key: /etc/lattice/key.pem
bootstrap_ca: /etc/lattice/ca.pem
cert_lifetime_seconds: 86400
"#;
let cfg: IdentityConfig = serde_yaml::from_str(yaml).unwrap();
assert_eq!(cfg.spire_socket, "/custom/spire.sock");
assert_eq!(cfg.signing_endpoint.as_deref(), Some("https://quorum:9443"));
assert_eq!(cfg.cert_lifetime_seconds, 86400);
}
#[test]
fn lattice_config_deserializes_minimal_yaml() {
let yaml = r#"
role: QuorumMember
quorum:
node_id: 1
peers:
- id: 2
address: "10.0.0.2:9000"
election_timeout_ms: 500
heartbeat_interval_ms: 100
snapshot_threshold: 10000
api:
grpc_address: "0.0.0.0:50051"
oidc_issuer: "https://auth.example.com"
storage:
s3_endpoint: "https://s3.example.com"
nfs_home_path: "/home"
local_scratch_path: "/scratch"
telemetry:
default_mode: "prod"
tsdb_endpoint: "https://tsdb.example.com"
prod_interval_seconds: 30
ebpf_programs_path: "/opt/lattice/ebpf"
"#;
let cfg: LatticeConfig = serde_yaml::from_str(yaml).unwrap();
assert_eq!(cfg.quorum.node_id, 1);
assert_eq!(cfg.quorum.peers.len(), 1);
assert_eq!(cfg.api.grpc_address, "0.0.0.0:50051");
assert!(cfg.federation.is_none());
assert!(cfg.node_agent.is_none());
assert!(cfg.network.is_none());
assert_eq!(cfg.quorum.raft_listen_address, "0.0.0.0:9000");
}
#[test]
fn default_config_parses_without_error() {
let config = LatticeConfig::default();
assert!(!config.api.grpc_address.is_empty());
assert!(config.quorum.node_id > 0);
}
#[test]
fn config_yaml_roundtrip() {
let config = LatticeConfig::default();
let yaml = serde_yaml::to_string(&config).unwrap();
let parsed: LatticeConfig = serde_yaml::from_str(&yaml).unwrap();
assert_eq!(parsed.api.grpc_address, config.api.grpc_address);
assert_eq!(parsed.quorum.node_id, config.quorum.node_id);
assert_eq!(
parsed.quorum.raft_listen_address,
config.quorum.raft_listen_address
);
}
#[test]
fn quorum_config_defaults() {
let cfg = QuorumConfig::default();
assert_eq!(cfg.node_id, 1);
assert!(cfg.peers.is_empty());
assert_eq!(cfg.raft_listen_address, "0.0.0.0:9000");
}
#[test]
fn lattice_config_defaults() {
let cfg = LatticeConfig::default();
assert_eq!(cfg.quorum.node_id, 1);
assert!(cfg.quorum.peers.is_empty());
assert_eq!(cfg.api.grpc_address, "0.0.0.0:50051");
assert!(cfg.telemetry.tsdb_endpoint.is_empty());
assert!(cfg.federation.is_none());
}
}