hpc_node/namespace.rs
1//! Namespace handoff protocol.
2//!
3//! Defines the protocol for passing Linux namespace file descriptors between
4//! pact-agent (provider) and lattice-node-agent (consumer) via unix socket
5//! with `SCM_RIGHTS`.
6
7use serde::{Deserialize, Serialize};
8
9/// Well-known socket path for namespace handoff.
10///
11/// pact-agent listens on this socket. lattice-node-agent connects to request
12/// namespaces for allocations.
13pub const HANDOFF_SOCKET_PATH: &str = "/run/pact/handoff.sock";
14
15/// Namespace types that can be created and handed off.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
17pub enum NamespaceType {
18 /// PID namespace — isolated process ID space per allocation.
19 Pid,
20 /// Network namespace — isolated network stack per allocation.
21 Net,
22 /// Mount namespace — isolated mount table per allocation.
23 Mount,
24}
25
26/// Request from lattice to pact for allocation namespaces.
27///
28/// Sent over the handoff unix socket as a framed message.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct NamespaceRequest {
31 /// Allocation identifier (from lattice scheduler).
32 pub allocation_id: String,
33 /// Which namespaces to create.
34 pub namespaces: Vec<NamespaceType>,
35 /// Optional uenv image to mount inside the mount namespace.
36 pub uenv_image: Option<String>,
37}
38
39/// Response from pact to lattice with namespace metadata.
40///
41/// The actual namespace file descriptors are passed via `SCM_RIGHTS`
42/// ancillary data on the unix socket, in the same order as
43/// `requested_types`.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct NamespaceResponse {
46 /// Allocation identifier (echoed from request).
47 pub allocation_id: String,
48 /// Namespace types in the order their FDs are attached via `SCM_RIGHTS`.
49 pub fd_types: Vec<NamespaceType>,
50 /// Mount point for uenv bind-mount inside the mount namespace (if requested).
51 pub uenv_mount_path: Option<String>,
52}
53
54/// Notification that an allocation has ended.
55///
56/// Sent when pact detects that all processes in the allocation's cgroup
57/// have exited (WI5: cgroup-empty detection). Lattice can also send this
58/// proactively if it knows the allocation ended.
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct AllocationEnded {
61 /// Allocation identifier.
62 pub allocation_id: String,
63}
64
65/// Trait for the namespace handoff provider (pact-agent implements).
66///
67/// Creates Linux namespaces for allocations and makes their FDs available
68/// for passing to lattice via the unix socket.
69pub trait NamespaceProvider: Send + Sync {
70 /// Create namespaces for an allocation.
71 ///
72 /// Returns metadata about the created namespaces. The actual FDs are
73 /// made available for the handoff socket to send via `SCM_RIGHTS`.
74 ///
75 /// # Errors
76 ///
77 /// Returns [`NamespaceError::CreationFailed`] if `unshare(2)` or namespace
78 /// setup fails.
79 fn create_namespaces(
80 &self,
81 request: &NamespaceRequest,
82 ) -> Result<NamespaceResponse, NamespaceError>;
83
84 /// Release namespaces for a completed allocation.
85 ///
86 /// Cleans up namespace FDs and any associated resources (bind-mounts).
87 fn release_namespaces(&self, allocation_id: &str) -> Result<(), NamespaceError>;
88}
89
90/// Trait for the namespace handoff consumer (lattice-node-agent implements).
91///
92/// Requests namespaces from the provider (pact). When the provider is
93/// unavailable, falls back to self-service namespace creation (WI4, F27).
94pub trait NamespaceConsumer: Send + Sync {
95 /// Request namespaces from the provider.
96 ///
97 /// If the provider is unavailable (handoff socket not reachable),
98 /// implementations should fall back to creating their own namespaces
99 /// using the same conventions (WI4).
100 fn request_namespaces(
101 &self,
102 request: &NamespaceRequest,
103 ) -> Result<NamespaceResponse, NamespaceError>;
104}
105
106/// Errors from namespace operations.
107#[derive(Debug, thiserror::Error)]
108pub enum NamespaceError {
109 #[error("handoff socket unavailable: {reason}")]
110 SocketUnavailable { reason: String },
111
112 #[error("namespace creation failed: {reason}")]
113 CreationFailed { reason: String },
114
115 #[error("allocation not found: {allocation_id}")]
116 AllocationNotFound { allocation_id: String },
117
118 #[error("FD passing failed: {0}")]
119 FdPassing(#[from] std::io::Error),
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125
126 #[test]
127 fn namespace_request_serialization() {
128 let req = NamespaceRequest {
129 allocation_id: "alloc-42".to_string(),
130 namespaces: vec![NamespaceType::Pid, NamespaceType::Net, NamespaceType::Mount],
131 uenv_image: Some("pytorch-2.5.sqfs".to_string()),
132 };
133 let json = serde_json::to_string(&req).unwrap();
134 let deser: NamespaceRequest = serde_json::from_str(&json).unwrap();
135 assert_eq!(deser.allocation_id, "alloc-42");
136 assert_eq!(deser.namespaces.len(), 3);
137 assert_eq!(deser.uenv_image.as_deref(), Some("pytorch-2.5.sqfs"));
138 }
139
140 #[test]
141 fn namespace_response_fd_order() {
142 let resp = NamespaceResponse {
143 allocation_id: "alloc-42".to_string(),
144 fd_types: vec![NamespaceType::Pid, NamespaceType::Net, NamespaceType::Mount],
145 uenv_mount_path: Some("/run/pact/uenv/pytorch-2.5".to_string()),
146 };
147 assert_eq!(resp.fd_types[0], NamespaceType::Pid);
148 assert_eq!(resp.fd_types[1], NamespaceType::Net);
149 assert_eq!(resp.fd_types[2], NamespaceType::Mount);
150 }
151}