Skip to main content

hpc_node/
namespace.rs

1//! Namespace handoff protocol.
2//!
3//! Defines the protocol for passing Linux namespace file descriptors between
4//! pact-agent (provider) and lattice-node-agent (consumer) via unix socket
5//! with `SCM_RIGHTS`.
6
7use serde::{Deserialize, Serialize};
8
9/// Well-known socket path for namespace handoff.
10///
11/// pact-agent listens on this socket. lattice-node-agent connects to request
12/// namespaces for allocations.
13pub const HANDOFF_SOCKET_PATH: &str = "/run/pact/handoff.sock";
14
15/// Namespace types that can be created and handed off.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
17pub enum NamespaceType {
18    /// PID namespace — isolated process ID space per allocation.
19    Pid,
20    /// Network namespace — isolated network stack per allocation.
21    Net,
22    /// Mount namespace — isolated mount table per allocation.
23    Mount,
24}
25
26/// Request from lattice to pact for allocation namespaces.
27///
28/// Sent over the handoff unix socket as a framed message.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct NamespaceRequest {
31    /// Allocation identifier (from lattice scheduler).
32    pub allocation_id: String,
33    /// Which namespaces to create.
34    pub namespaces: Vec<NamespaceType>,
35    /// Optional uenv image to mount inside the mount namespace.
36    pub uenv_image: Option<String>,
37}
38
39/// Response from pact to lattice with namespace metadata.
40///
41/// The actual namespace file descriptors are passed via `SCM_RIGHTS`
42/// ancillary data on the unix socket, in the same order as
43/// `requested_types`.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct NamespaceResponse {
46    /// Allocation identifier (echoed from request).
47    pub allocation_id: String,
48    /// Namespace types in the order their FDs are attached via `SCM_RIGHTS`.
49    pub fd_types: Vec<NamespaceType>,
50    /// Mount point for uenv bind-mount inside the mount namespace (if requested).
51    pub uenv_mount_path: Option<String>,
52}
53
54/// Notification that an allocation has ended.
55///
56/// Sent when pact detects that all processes in the allocation's cgroup
57/// have exited (WI5: cgroup-empty detection). Lattice can also send this
58/// proactively if it knows the allocation ended.
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct AllocationEnded {
61    /// Allocation identifier.
62    pub allocation_id: String,
63}
64
65/// Trait for the namespace handoff provider (pact-agent implements).
66///
67/// Creates Linux namespaces for allocations and makes their FDs available
68/// for passing to lattice via the unix socket.
69pub trait NamespaceProvider: Send + Sync {
70    /// Create namespaces for an allocation.
71    ///
72    /// Returns metadata about the created namespaces. The actual FDs are
73    /// made available for the handoff socket to send via `SCM_RIGHTS`.
74    ///
75    /// # Errors
76    ///
77    /// Returns [`NamespaceError::CreationFailed`] if `unshare(2)` or namespace
78    /// setup fails.
79    fn create_namespaces(
80        &self,
81        request: &NamespaceRequest,
82    ) -> Result<NamespaceResponse, NamespaceError>;
83
84    /// Release namespaces for a completed allocation.
85    ///
86    /// Cleans up namespace FDs and any associated resources (bind-mounts).
87    fn release_namespaces(&self, allocation_id: &str) -> Result<(), NamespaceError>;
88}
89
90/// Trait for the namespace handoff consumer (lattice-node-agent implements).
91///
92/// Requests namespaces from the provider (pact). When the provider is
93/// unavailable, falls back to self-service namespace creation (WI4, F27).
94pub trait NamespaceConsumer: Send + Sync {
95    /// Request namespaces from the provider.
96    ///
97    /// If the provider is unavailable (handoff socket not reachable),
98    /// implementations should fall back to creating their own namespaces
99    /// using the same conventions (WI4).
100    fn request_namespaces(
101        &self,
102        request: &NamespaceRequest,
103    ) -> Result<NamespaceResponse, NamespaceError>;
104}
105
106/// Errors from namespace operations.
107#[derive(Debug, thiserror::Error)]
108pub enum NamespaceError {
109    #[error("handoff socket unavailable: {reason}")]
110    SocketUnavailable { reason: String },
111
112    #[error("namespace creation failed: {reason}")]
113    CreationFailed { reason: String },
114
115    #[error("allocation not found: {allocation_id}")]
116    AllocationNotFound { allocation_id: String },
117
118    #[error("FD passing failed: {0}")]
119    FdPassing(#[from] std::io::Error),
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn namespace_request_serialization() {
128        let req = NamespaceRequest {
129            allocation_id: "alloc-42".to_string(),
130            namespaces: vec![NamespaceType::Pid, NamespaceType::Net, NamespaceType::Mount],
131            uenv_image: Some("pytorch-2.5.sqfs".to_string()),
132        };
133        let json = serde_json::to_string(&req).unwrap();
134        let deser: NamespaceRequest = serde_json::from_str(&json).unwrap();
135        assert_eq!(deser.allocation_id, "alloc-42");
136        assert_eq!(deser.namespaces.len(), 3);
137        assert_eq!(deser.uenv_image.as_deref(), Some("pytorch-2.5.sqfs"));
138    }
139
140    #[test]
141    fn namespace_response_fd_order() {
142        let resp = NamespaceResponse {
143            allocation_id: "alloc-42".to_string(),
144            fd_types: vec![NamespaceType::Pid, NamespaceType::Net, NamespaceType::Mount],
145            uenv_mount_path: Some("/run/pact/uenv/pytorch-2.5".to_string()),
146        };
147        assert_eq!(resp.fd_types[0], NamespaceType::Pid);
148        assert_eq!(resp.fd_types[1], NamespaceType::Net);
149        assert_eq!(resp.fd_types[2], NamespaceType::Mount);
150    }
151}