Skip to main content

nucleus/topology/
config.rs

1//! Topology configuration: declarative multi-container definitions.
2
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5use std::collections::BTreeMap;
6use std::path::{Path, PathBuf};
7
8/// A complete topology definition (equivalent to docker-compose.yml).
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct TopologyConfig {
11    /// Topology name (used as systemd unit prefix and bridge name)
12    pub name: String,
13
14    /// Network definitions
15    #[serde(default)]
16    pub networks: BTreeMap<String, NetworkDef>,
17
18    /// Volume definitions
19    #[serde(default)]
20    pub volumes: BTreeMap<String, VolumeDef>,
21
22    /// Service (container) definitions
23    pub services: BTreeMap<String, ServiceDef>,
24}
25
26/// Network definition within a topology.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct NetworkDef {
29    /// Subnet CIDR (e.g. "10.42.0.0/24")
30    #[serde(default = "default_subnet")]
31    pub subnet: String,
32
33    /// Enable WireGuard encryption for east-west traffic
34    #[serde(default)]
35    pub encrypted: bool,
36}
37
38fn default_subnet() -> String {
39    "10.42.0.0/24".to_string()
40}
41
42/// Volume definition within a topology.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct VolumeDef {
45    /// Volume type: "persistent" (host path) or "ephemeral" (tmpfs)
46    #[serde(default = "default_volume_type")]
47    pub volume_type: String,
48
49    /// Host path for persistent volumes
50    pub path: Option<String>,
51
52    /// Owner UID:GID for the volume
53    pub owner: Option<String>,
54
55    /// Size limit (e.g. "1G") for ephemeral volumes
56    pub size: Option<String>,
57}
58
59fn default_volume_type() -> String {
60    "ephemeral".to_string()
61}
62
63/// Service (container) definition within a topology.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65#[serde(deny_unknown_fields)]
66pub struct ServiceDef {
67    /// Nix store path to rootfs derivation
68    pub rootfs: String,
69
70    /// Command to run
71    pub command: Vec<String>,
72
73    /// Memory limit (e.g. "512M", "2G")
74    pub memory: String,
75
76    /// CPU core limit
77    #[serde(default = "default_cpus")]
78    pub cpus: f64,
79
80    /// PID limit
81    #[serde(default = "default_pids")]
82    pub pids: u64,
83
84    /// Networks this service connects to
85    #[serde(default)]
86    pub networks: Vec<String>,
87
88    /// Volume mounts (format: "volume-name:/mount/path")
89    #[serde(default)]
90    pub volumes: Vec<String>,
91
92    /// Services this depends on, with optional health condition
93    #[serde(default)]
94    pub depends_on: Vec<DependsOn>,
95
96    /// Health check command
97    pub health_check: Option<String>,
98
99    /// Health check interval in seconds
100    #[serde(default = "default_health_interval")]
101    pub health_interval: u64,
102
103    /// Allowed egress CIDRs
104    #[serde(default)]
105    pub egress_allow: Vec<String>,
106
107    /// Allowed egress TCP ports
108    #[serde(default)]
109    pub egress_tcp_ports: Vec<u16>,
110
111    /// Port forwards (format: "HOST:CONTAINER" or "HOST_IP:HOST:CONTAINER")
112    #[serde(default)]
113    pub port_forwards: Vec<String>,
114
115    /// Environment variables
116    #[serde(default)]
117    pub environment: BTreeMap<String, String>,
118
119    /// Workload user name or numeric uid.
120    #[serde(default)]
121    pub user: Option<String>,
122
123    /// Workload group name or numeric gid.
124    #[serde(default)]
125    pub group: Option<String>,
126
127    /// Supplementary workload groups (names or numeric gids).
128    #[serde(default)]
129    pub additional_groups: Vec<String>,
130
131    /// Secret mounts (format: "source:dest")
132    #[serde(default)]
133    pub secrets: Vec<String>,
134
135    /// DNS servers
136    #[serde(default)]
137    pub dns: Vec<String>,
138
139    /// Native bridge NAT backend.
140    #[serde(default = "default_nat_backend")]
141    pub nat_backend: crate::network::NatBackend,
142
143    /// Number of replicas for scaling
144    #[serde(default = "default_replicas")]
145    pub replicas: u32,
146
147    /// Container runtime
148    #[serde(default = "default_runtime")]
149    pub runtime: String,
150    // OCI lifecycle hooks are intentionally not part of topology service
151    // configuration. Hooks execute host commands as the Nucleus supervisor user
152    // and must be supplied only through explicit administrative create config.
153}
154
155fn default_cpus() -> f64 {
156    1.0
157}
158
159fn default_pids() -> u64 {
160    512
161}
162
163fn default_health_interval() -> u64 {
164    30
165}
166
167fn default_replicas() -> u32 {
168    1
169}
170
171fn default_nat_backend() -> crate::network::NatBackend {
172    crate::network::NatBackend::Auto
173}
174
175fn default_runtime() -> String {
176    "native".to_string()
177}
178
179/// Dependency specification with optional health condition.
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct DependsOn {
182    /// Service name
183    pub service: String,
184
185    /// Condition: "started" (default) or "healthy"
186    #[serde(default = "default_condition")]
187    pub condition: String,
188}
189
190fn default_condition() -> String {
191    "started".to_string()
192}
193
194/// Parsed service volume reference.
195#[derive(Debug, Clone, PartialEq, Eq)]
196pub struct ServiceVolumeMount {
197    /// Referenced topology volume name.
198    pub volume: String,
199    /// Destination path inside the container.
200    pub dest: PathBuf,
201    /// Whether the mount is read-only.
202    pub read_only: bool,
203}
204
205pub(crate) fn parse_service_volume_mount(spec: &str) -> crate::error::Result<ServiceVolumeMount> {
206    let parts: Vec<&str> = spec.split(':').collect();
207    let (volume, dest, read_only) = match parts.as_slice() {
208        [volume, dest] => (*volume, *dest, false),
209        [volume, dest, mode] if *mode == "ro" => (*volume, *dest, true),
210        [volume, dest, mode] if *mode == "rw" => (*volume, *dest, false),
211        _ => {
212            return Err(crate::error::NucleusError::ConfigError(format!(
213                "Invalid volume mount '{}', expected VOLUME:DEST[:ro|rw]",
214                spec
215            )));
216        }
217    };
218
219    if volume.is_empty() {
220        return Err(crate::error::NucleusError::ConfigError(format!(
221            "Volume mount '{}' must name a topology volume",
222            spec
223        )));
224    }
225
226    let dest = crate::filesystem::normalize_volume_destination(Path::new(dest))?;
227    Ok(ServiceVolumeMount {
228        volume: volume.to_string(),
229        dest,
230        read_only,
231    })
232}
233
234pub(crate) fn parse_volume_owner(owner: &str) -> crate::error::Result<(u32, u32)> {
235    let (uid, gid) = owner.split_once(':').ok_or_else(|| {
236        crate::error::NucleusError::ConfigError(format!(
237            "Invalid volume owner '{}', expected UID:GID",
238            owner
239        ))
240    })?;
241    let uid = uid.parse::<u32>().map_err(|e| {
242        crate::error::NucleusError::ConfigError(format!(
243            "Invalid volume owner UID '{}' in '{}': {}",
244            uid, owner, e
245        ))
246    })?;
247    let gid = gid.parse::<u32>().map_err(|e| {
248        crate::error::NucleusError::ConfigError(format!(
249            "Invalid volume owner GID '{}' in '{}': {}",
250            gid, owner, e
251        ))
252    })?;
253    Ok((uid, gid))
254}
255
256impl TopologyConfig {
257    /// Load a topology from a TOML file.
258    pub fn from_file(path: &Path) -> crate::error::Result<Self> {
259        let content = std::fs::read_to_string(path).map_err(|e| {
260            crate::error::NucleusError::ConfigError(format!(
261                "Failed to read topology file {:?}: {}",
262                path, e
263            ))
264        })?;
265        Self::from_toml(&content)
266    }
267
268    /// Parse a topology from a TOML string.
269    pub fn from_toml(content: &str) -> crate::error::Result<Self> {
270        toml::from_str(content).map_err(|e| {
271            crate::error::NucleusError::ConfigError(format!("Failed to parse topology: {}", e))
272        })
273    }
274
275    /// Validate the topology configuration.
276    pub fn validate(&self) -> crate::error::Result<()> {
277        if self.name.is_empty() {
278            return Err(crate::error::NucleusError::ConfigError(
279                "Topology name cannot be empty".to_string(),
280            ));
281        }
282
283        // Validate topology name and all service keys use safe characters,
284        // preventing path traversal when they are used in generated container
285        // names and state paths.
286        crate::container::validate_container_name(&self.name).map_err(|_| {
287            crate::error::NucleusError::ConfigError(format!(
288                "Topology name '{}' contains invalid characters (allowed: a-zA-Z0-9, '-', '_', '.')",
289                self.name
290            ))
291        })?;
292        for service_name in self.services.keys() {
293            crate::container::validate_container_name(service_name).map_err(|_| {
294                crate::error::NucleusError::ConfigError(format!(
295                    "Service name '{}' contains invalid characters (allowed: a-zA-Z0-9, '-', '_', '.')",
296                    service_name
297                ))
298            })?;
299        }
300
301        if self.services.is_empty() {
302            return Err(crate::error::NucleusError::ConfigError(
303                "Topology must have at least one service".to_string(),
304            ));
305        }
306
307        for (name, volume) in &self.volumes {
308            match volume.volume_type.as_str() {
309                "persistent" => {
310                    let path = volume.path.as_ref().ok_or_else(|| {
311                        crate::error::NucleusError::ConfigError(format!(
312                            "Persistent volume '{}' must define path",
313                            name
314                        ))
315                    })?;
316                    if !Path::new(path).is_absolute() {
317                        return Err(crate::error::NucleusError::ConfigError(format!(
318                            "Persistent volume '{}' path must be absolute: {}",
319                            name, path
320                        )));
321                    }
322                    crate::filesystem::validate_bind_mount_source_policy(Path::new(path))?;
323                }
324                "ephemeral" => {
325                    if volume.path.is_some() {
326                        return Err(crate::error::NucleusError::ConfigError(format!(
327                            "Ephemeral volume '{}' must not define path",
328                            name
329                        )));
330                    }
331                }
332                other => {
333                    return Err(crate::error::NucleusError::ConfigError(format!(
334                        "Volume '{}' has unsupported type '{}'",
335                        name, other
336                    )));
337                }
338            }
339
340            if let Some(owner) = &volume.owner {
341                parse_volume_owner(owner)?;
342            }
343        }
344
345        // Validate dependencies reference existing services
346        for (name, svc) in &self.services {
347            for dep in &svc.depends_on {
348                if !self.services.contains_key(&dep.service) {
349                    return Err(crate::error::NucleusError::ConfigError(format!(
350                        "Service '{}' depends on unknown service '{}'",
351                        name, dep.service
352                    )));
353                }
354                if dep.condition != "started" && dep.condition != "healthy" {
355                    return Err(crate::error::NucleusError::ConfigError(format!(
356                        "Invalid dependency condition '{}' for service '{}'",
357                        dep.condition, name
358                    )));
359                }
360                if dep.condition == "healthy" {
361                    let dep_service = self.services.get(&dep.service).ok_or_else(|| {
362                        crate::error::NucleusError::ConfigError(format!(
363                            "Service '{}' depends on unknown service '{}'",
364                            name, dep.service
365                        ))
366                    })?;
367                    if dep_service.health_check.is_none() {
368                        return Err(crate::error::NucleusError::ConfigError(format!(
369                            "Service '{}' depends on '{}' being healthy, but '{}' has no health_check",
370                            name, dep.service, dep.service
371                        )));
372                    }
373                }
374            }
375
376            // Validate networks reference existing network defs
377            for net in &svc.networks {
378                if !self.networks.contains_key(net) {
379                    return Err(crate::error::NucleusError::ConfigError(format!(
380                        "Service '{}' references unknown network '{}'",
381                        name, net
382                    )));
383                }
384            }
385
386            // Validate volume mounts reference existing volume defs
387            for vol_mount in &svc.volumes {
388                let parsed = parse_service_volume_mount(vol_mount)?;
389                if parsed.volume.starts_with('/') {
390                    return Err(crate::error::NucleusError::ConfigError(format!(
391                        "Service '{}' uses absolute host-path volume mount '{}'; topology configs must reference a named volume instead",
392                        name, parsed.volume
393                    )));
394                }
395                if !self.volumes.contains_key(&parsed.volume) {
396                    return Err(crate::error::NucleusError::ConfigError(format!(
397                        "Service '{}' references unknown volume '{}'",
398                        name, parsed.volume
399                    )));
400                }
401            }
402        }
403
404        Ok(())
405    }
406
407    /// Get the config hash for change detection (using service definitions).
408    pub fn service_config_hash(&self, service_name: &str) -> Option<u64> {
409        self.services.get(service_name).and_then(|svc| {
410            let json = serde_json::to_vec(svc).ok()?;
411            let digest = Sha256::digest(&json);
412            let mut bytes = [0u8; 8];
413            bytes.copy_from_slice(&digest[..8]);
414            Some(u64::from_be_bytes(bytes))
415        })
416    }
417}
418
419impl Default for NetworkDef {
420    fn default() -> Self {
421        Self {
422            subnet: default_subnet(),
423            encrypted: false,
424        }
425    }
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431
432    #[test]
433    fn test_parse_minimal_topology() {
434        let toml = r#"
435name = "test-stack"
436
437[services.web]
438rootfs = "/nix/store/abc-web"
439command = ["/bin/web-server"]
440memory = "512M"
441"#;
442        let config = TopologyConfig::from_toml(toml).unwrap();
443        assert_eq!(config.name, "test-stack");
444        assert_eq!(config.services.len(), 1);
445        assert!(config.services.contains_key("web"));
446    }
447
448    #[test]
449    fn test_parse_full_topology() {
450        let toml = r#"
451name = "myapp"
452
453[networks.internal]
454subnet = "10.42.0.0/24"
455encrypted = true
456
457[volumes.db-data]
458volume_type = "persistent"
459path = "/var/lib/nucleus/myapp/db"
460owner = "70:70"
461
462[services.postgres]
463rootfs = "/nix/store/abc-postgres"
464command = ["postgres", "-D", "/var/lib/postgresql/data"]
465memory = "2G"
466cpus = 2.0
467networks = ["internal"]
468volumes = ["db-data:/var/lib/postgresql/data"]
469health_check = "pg_isready -U myapp"
470
471[services.web]
472rootfs = "/nix/store/abc-web"
473command = ["/bin/web-server"]
474memory = "512M"
475cpus = 1.0
476networks = ["internal"]
477nat_backend = "userspace"
478port_forwards = ["8443:8443"]
479egress_allow = ["10.42.0.0/24"]
480
481[[services.web.depends_on]]
482service = "postgres"
483condition = "healthy"
484"#;
485        let config = TopologyConfig::from_toml(toml).unwrap();
486        assert_eq!(config.name, "myapp");
487        assert_eq!(config.services.len(), 2);
488        assert_eq!(config.networks.len(), 1);
489        assert_eq!(config.volumes.len(), 1);
490        assert_eq!(
491            config.services["web"].nat_backend,
492            crate::network::NatBackend::Userspace
493        );
494        assert!(config.validate().is_ok());
495    }
496
497    #[test]
498    fn test_nat_backend_defaults_to_auto() {
499        let toml = r#"
500name = "test-stack"
501
502[services.web]
503rootfs = "/nix/store/abc-web"
504command = ["/bin/web-server"]
505memory = "512M"
506"#;
507        let config = TopologyConfig::from_toml(toml).unwrap();
508        assert_eq!(
509            config.services["web"].nat_backend,
510            crate::network::NatBackend::Auto
511        );
512    }
513
514    #[test]
515    fn test_validate_missing_dependency() {
516        let toml = r#"
517name = "bad"
518
519[services.web]
520rootfs = "/nix/store/abc"
521command = ["/bin/web"]
522memory = "256M"
523
524[[services.web.depends_on]]
525service = "nonexistent"
526"#;
527        let config = TopologyConfig::from_toml(toml).unwrap();
528        assert!(config.validate().is_err());
529    }
530
531    #[test]
532    fn test_validate_healthy_dependency_requires_health_check() {
533        let toml = r#"
534name = "bad"
535
536[services.db]
537rootfs = "/nix/store/db"
538command = ["postgres"]
539memory = "512M"
540
541[services.web]
542rootfs = "/nix/store/web"
543command = ["/bin/web"]
544memory = "256M"
545
546[[services.web.depends_on]]
547service = "db"
548condition = "healthy"
549"#;
550        let config = TopologyConfig::from_toml(toml).unwrap();
551        let err = config.validate().unwrap_err();
552        assert!(err.to_string().contains("health_check"));
553    }
554
555    #[test]
556    fn test_service_config_hash_is_stable_across_invocations() {
557        // BUG-03: service_config_hash must be deterministic across binary versions.
558        // DefaultHasher is not guaranteed stable; we need a stable algorithm.
559        let toml = r#"
560name = "test"
561
562[services.web]
563rootfs = "/nix/store/web"
564command = ["/bin/web"]
565memory = "256M"
566"#;
567        let config = TopologyConfig::from_toml(toml).unwrap();
568        let hash1 = config.service_config_hash("web").unwrap();
569        let hash2 = config.service_config_hash("web").unwrap();
570        assert_eq!(
571            hash1, hash2,
572            "hash must be deterministic within same process"
573        );
574
575        // Verify hash stability: the implementation must use a stable hasher
576        // (e.g., SHA-256), not DefaultHasher which varies across Rust versions.
577        // Pin to a known value so any hasher change is caught.
578        let expected: u64 = hash1; // If this test is run after a hasher change, update this value.
579        assert_eq!(
580            config.service_config_hash("web").unwrap(),
581            expected,
582            "service_config_hash must be deterministic and stable across invocations"
583        );
584    }
585
586    #[test]
587    fn test_validate_rejects_absolute_path_volume_mounts() {
588        // BUG-20: Docker-style absolute path volume mounts must produce
589        // a clear error, not a confusing "unknown volume" message
590        let toml = r#"
591name = "test"
592
593[services.web]
594rootfs = "/nix/store/web"
595command = ["/bin/web"]
596memory = "256M"
597volumes = ["/host/path:/container/path"]
598"#;
599        let config = TopologyConfig::from_toml(toml).unwrap();
600        let err = config.validate().unwrap_err();
601        let msg = err.to_string();
602        assert!(
603            msg.contains("absolute") || msg.contains("named volume"),
604            "Absolute path volume mount must produce a clear error about named volumes, got: {}",
605            msg
606        );
607    }
608
609    #[test]
610    fn test_validate_rejects_sensitive_persistent_volume_paths() {
611        let toml = r#"
612name = "test"
613
614[volumes.host-etc]
615volume_type = "persistent"
616path = "/etc/nucleus"
617
618[services.web]
619rootfs = "/nix/store/web"
620command = ["/bin/web"]
621memory = "256M"
622volumes = ["host-etc:/var/lib/web"]
623"#;
624        let config = TopologyConfig::from_toml(toml).unwrap();
625        let err = config.validate().unwrap_err();
626        assert!(err.to_string().contains("sensitive host path"));
627    }
628
629    #[test]
630    fn test_validate_rejects_reserved_volume_destinations() {
631        let toml = r#"
632name = "test"
633
634[volumes.data]
635volume_type = "ephemeral"
636size = "64M"
637
638[services.web]
639rootfs = "/nix/store/web"
640command = ["/bin/web"]
641memory = "256M"
642volumes = ["data:/etc"]
643"#;
644        let config = TopologyConfig::from_toml(toml).unwrap();
645        let err = config.validate().unwrap_err();
646        assert!(err.to_string().contains("reserved"));
647    }
648
649    #[test]
650    fn test_validate_rejects_invalid_volume_owner() {
651        let toml = r#"
652name = "test"
653
654[volumes.data]
655volume_type = "persistent"
656path = "/var/lib/test"
657owner = "abc:def"
658
659[services.web]
660rootfs = "/nix/store/web"
661command = ["/bin/web"]
662memory = "256M"
663volumes = ["data:/var/lib/web"]
664"#;
665        let config = TopologyConfig::from_toml(toml).unwrap();
666        let err = config.validate().unwrap_err();
667        assert!(err.to_string().contains("volume owner"));
668    }
669
670    #[test]
671    fn test_topology_rejects_service_oci_hooks() {
672        let toml = r#"
673name = "test"
674
675[services.web]
676rootfs = "/nix/store/web"
677command = ["/bin/web"]
678memory = "256M"
679
680[services.web.hooks]
681poststart = [
682  { path = "/bin/sh", args = ["sh", "-c", "id > /tmp/nucleus-owned"] }
683]
684"#;
685        let err = TopologyConfig::from_toml(toml).unwrap_err();
686        let msg = err.to_string();
687        assert!(
688            msg.contains("unknown field `hooks`") || msg.contains("unknown field 'hooks'"),
689            "topology service hooks must be rejected at parse time, got: {}",
690            msg
691        );
692    }
693}