Skip to main content

orca_core/config/
cluster.rs

1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4
5use super::ai::AiConfig;
6use crate::backup::BackupConfig;
7
8/// Top-level cluster configuration (`cluster.toml`).
9#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10pub struct ClusterConfig {
11    pub cluster: ClusterMeta,
12    #[serde(default)]
13    pub node: Vec<NodeConfig>,
14    #[serde(default)]
15    pub observability: Option<ObservabilityConfig>,
16    #[serde(default)]
17    pub ai: Option<AiConfig>,
18    #[serde(default)]
19    pub backup: Option<BackupConfig>,
20    /// API bearer tokens for authentication. Empty = allow all requests.
21    #[serde(default)]
22    pub api_tokens: Vec<String>,
23    /// Mesh networking configuration (NetBird).
24    #[serde(default)]
25    pub network: Option<NetworkConfig>,
26}
27
28/// Mesh networking configuration.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct NetworkConfig {
31    /// Network provider: "netbird" (default).
32    #[serde(default = "default_network_provider")]
33    pub provider: String,
34    /// NetBird setup key for joining the mesh.
35    pub setup_key: Option<String>,
36    /// NetBird management URL (default: api.netbird.io).
37    pub management_url: Option<String>,
38}
39
40fn default_network_provider() -> String {
41    "netbird".into()
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct ClusterMeta {
46    #[serde(default = "default_cluster_name")]
47    pub name: String,
48    pub domain: Option<String>,
49    pub acme_email: Option<String>,
50    #[serde(default = "default_log_level")]
51    pub log_level: String,
52    #[serde(default = "default_api_port")]
53    pub api_port: u16,
54    #[serde(default = "default_grpc_port")]
55    pub grpc_port: u16,
56}
57
58impl Default for ClusterMeta {
59    fn default() -> Self {
60        Self {
61            name: default_cluster_name(),
62            domain: None,
63            acme_email: None,
64            log_level: default_log_level(),
65            api_port: default_api_port(),
66            grpc_port: default_grpc_port(),
67        }
68    }
69}
70
71fn default_cluster_name() -> String {
72    "orca".into()
73}
74
75pub(crate) fn default_log_level() -> String {
76    "info".into()
77}
78
79pub(crate) fn default_api_port() -> u16 {
80    6880
81}
82
83pub(crate) fn default_grpc_port() -> u16 {
84    6881
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct NodeConfig {
89    pub address: String,
90    #[serde(default)]
91    pub labels: HashMap<String, String>,
92    /// GPU devices available on this node.
93    #[serde(default)]
94    pub gpus: Vec<NodeGpuConfig>,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct NodeGpuConfig {
99    /// Vendor: "nvidia" or "amd".
100    pub vendor: String,
101    /// Number of GPUs of this type.
102    #[serde(default = "default_gpu_count")]
103    pub count: u32,
104    /// Model name for scheduling (e.g., "A100", "RTX4090").
105    pub model: Option<String>,
106}
107
108pub(crate) fn default_gpu_count() -> u32 {
109    1
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct ObservabilityConfig {
114    pub otlp_endpoint: Option<String>,
115    pub alerts: Option<AlertChannelConfig>,
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct AlertChannelConfig {
120    pub webhook: Option<String>,
121    pub email: Option<String>,
122}