1use crate::container::{ContainerState, ProcessIdentity};
2use crate::error::{NucleusError, Result};
3use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
4use nix::unistd::{fork, ForkResult, Pid};
5use std::ffi::CString;
6use std::fs::File;
7use std::os::unix::io::AsRawFd;
8use std::thread;
9use std::time::{Duration, Instant};
10use tracing::info;
11
12pub struct ContainerAttach;
14
15pub enum NamespaceProbe {
17 Exec(Vec<String>),
18 TcpConnect(u16),
19}
20
21pub struct NamespaceCommandRunner;
23
24impl ContainerAttach {
25 pub fn attach(state: &ContainerState, command: Vec<String>) -> Result<i32> {
30 if !state.is_running() {
31 return Err(NucleusError::AttachError(format!(
32 "Container {} is not running",
33 state.id
34 )));
35 }
36
37 let current_uid = nix::unistd::Uid::effective().as_raw();
39 if current_uid != 0 && current_uid != state.creator_uid {
40 return Err(NucleusError::AttachError(format!(
41 "Permission denied: container {} owned by UID {}, caller is UID {}",
42 state.id, state.creator_uid, current_uid
43 )));
44 }
45
46 if state.using_gvisor {
49 return Err(NucleusError::AttachError(format!(
50 "Container {} uses gVisor runtime; attach is not supported \
51 (use 'runsc exec' to interact with the guest workload)",
52 state.id
53 )));
54 }
55
56 let pid = state.pid;
57 info!("Attaching to container {} (PID {})", state.id, pid);
58
59 let ns_fds = Self::open_namespace_fds(pid, state.rootless)?;
60
61 match unsafe { fork() }
63 .map_err(|e| NucleusError::AttachError(format!("Fork failed: {}", e)))?
64 {
65 ForkResult::Parent { child } => {
66 Self::wait_for_child(child)
68 }
69 ForkResult::Child => {
70 match Self::enter_and_exec(&ns_fds, &command) {
72 Ok(_) => unreachable!(),
73 Err(e) => {
74 eprintln!("Attach failed: {}", e);
75 std::process::exit(1);
76 }
77 }
78 }
79 }
80 }
81
82 fn enter_and_exec(ns_fds: &[(String, File)], command: &[String]) -> Result<()> {
83 if command.is_empty() {
84 return Err(NucleusError::AttachError(
85 "No command specified for attach".to_string(),
86 ));
87 }
88
89 Self::enter_namespaces(ns_fds)?;
90 Self::apply_exec_hardening()?;
91 let env = Self::default_exec_env()?;
92 Self::exec_with_env(command, &env)
93 }
94
95 fn open_namespace_fds(pid: u32, rootless: bool) -> Result<Vec<(String, File)>> {
96 let ns_types = if rootless {
97 &["user", "pid", "mnt", "net", "uts", "ipc", "cgroup"][..]
98 } else {
99 &["pid", "mnt", "net", "uts", "ipc", "cgroup"][..]
100 };
101 let mut ns_fds: Vec<(String, File)> = Vec::new();
102
103 for ns in ns_types {
104 let ns_path = format!("/proc/{}/ns/{}", pid, ns);
105 match File::open(&ns_path) {
106 Ok(f) => ns_fds.push(((*ns).to_string(), f)),
107 Err(e) => {
108 info!("Skipping namespace {}: {}", ns, e);
110 }
111 }
112 }
113
114 if ns_fds.is_empty() {
115 return Err(NucleusError::AttachError(
116 "Could not open any namespace FDs".to_string(),
117 ));
118 }
119
120 Ok(ns_fds)
121 }
122
123 fn enter_namespaces(ns_fds: &[(String, File)]) -> Result<()> {
124 let mut pid_ns_fd: Option<&File> = None;
128
129 for (ns_name, fd) in ns_fds {
131 if ns_name == "user" {
132 let ret = unsafe { libc::setns(fd.as_raw_fd(), libc::CLONE_NEWUSER) };
135 if ret != 0 {
136 let err = std::io::Error::last_os_error();
137 return Err(NucleusError::AttachError(format!(
138 "setns(user) failed: {}",
139 err
140 )));
141 }
142 info!("Entered user namespace");
143 }
144 }
145
146 for (ns_name, fd) in ns_fds {
148 if ns_name == "pid" {
149 pid_ns_fd = Some(fd);
150 continue;
151 }
152 if ns_name == "user" {
153 continue; }
155
156 let nstype = Self::ns_name_to_clone_flag(ns_name);
157 let raw_fd = fd.as_raw_fd();
158 let ret = unsafe { libc::setns(raw_fd, nstype) };
161 if ret != 0 {
162 let err = std::io::Error::last_os_error();
163 return Err(NucleusError::AttachError(format!(
164 "setns({}) failed: {}",
165 ns_name, err
166 )));
167 }
168 info!("Entered {} namespace", ns_name);
169 }
170
171 if let Some(fd) = pid_ns_fd {
172 let ret = unsafe { libc::setns(fd.as_raw_fd(), libc::CLONE_NEWPID) };
175 if ret != 0 {
176 let err = std::io::Error::last_os_error();
177 return Err(NucleusError::AttachError(format!(
178 "setns(pid) failed: {}",
179 err
180 )));
181 }
182 info!("Entered pid namespace");
183
184 match unsafe { fork() }.map_err(|e| {
186 NucleusError::AttachError(format!("Fork failed after setns(pid): {}", e))
187 })? {
188 ForkResult::Parent { child } => {
189 let code = Self::wait_for_child(child)?;
190 std::process::exit(code);
191 }
192 ForkResult::Child => {
193 }
195 }
196 }
197
198 nix::unistd::chdir("/")
200 .map_err(|e| NucleusError::AttachError(format!("chdir(\"/\") failed: {}", e)))?;
201
202 Ok(())
203 }
204
205 fn apply_exec_hardening() -> Result<()> {
206 let ret = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
211 if ret != 0 {
212 return Err(NucleusError::AttachError(format!(
213 "Failed to set PR_SET_NO_NEW_PRIVS: {}",
214 std::io::Error::last_os_error()
215 )));
216 }
217
218 let mut cap_mgr = crate::security::CapabilityManager::new();
219 cap_mgr.drop_all().map_err(|e| {
220 NucleusError::AttachError(format!("Failed to drop capabilities: {}", e))
221 })?;
222
223 let mut seccomp_mgr = crate::security::SeccompManager::new();
225 if let Err(e) = seccomp_mgr.apply_minimal_filter() {
226 tracing::warn!(
227 "Failed to apply seccomp filter on attach: {} (continuing)",
228 e
229 );
230 }
231
232 let mut landlock_mgr = crate::security::LandlockManager::new();
234 if let Err(e) = landlock_mgr.apply_container_policy_with_mode(true) {
235 tracing::warn!("Failed to apply Landlock on attach: {} (continuing)", e);
236 }
237
238 Ok(())
239 }
240
241 fn default_exec_env() -> Result<Vec<CString>> {
242 Ok(vec![
243 CString::new("PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
244 .map_err(|e| NucleusError::AttachError(format!("Invalid PATH env: {}", e)))?,
245 CString::new("TERM=xterm")
246 .map_err(|e| NucleusError::AttachError(format!("Invalid TERM env: {}", e)))?,
247 CString::new("HOME=/")
248 .map_err(|e| NucleusError::AttachError(format!("Invalid HOME env: {}", e)))?,
249 ])
250 }
251
252 fn exec_with_env(command: &[String], env: &[CString]) -> Result<()> {
253 let program = CString::new(command[0].as_str())
254 .map_err(|e| NucleusError::AttachError(format!("Invalid program name: {}", e)))?;
255
256 let args: std::result::Result<Vec<CString>, _> = command
257 .iter()
258 .map(|arg| CString::new(arg.as_str()))
259 .collect();
260 let args =
261 args.map_err(|e| NucleusError::AttachError(format!("Invalid argument: {}", e)))?;
262
263 nix::unistd::execve::<CString, CString>(&program, &args, env)
264 .map_err(|e| NucleusError::AttachError(format!("execve failed: {}", e)))?;
265
266 Ok(())
267 }
268
269 fn ns_name_to_clone_flag(name: &str) -> libc::c_int {
270 match name {
271 "user" => libc::CLONE_NEWUSER,
272 "pid" => libc::CLONE_NEWPID,
273 "mnt" => libc::CLONE_NEWNS,
274 "net" => libc::CLONE_NEWNET,
275 "uts" => libc::CLONE_NEWUTS,
276 "ipc" => libc::CLONE_NEWIPC,
277 "cgroup" => libc::CLONE_NEWCGROUP,
278 unknown => {
281 tracing::warn!(
282 "Unknown namespace type '{}': setns will infer from FD (potential typo?)",
283 unknown
284 );
285 0
286 }
287 }
288 }
289
290 fn wait_for_child(child: Pid) -> Result<i32> {
291 loop {
292 match waitpid(child, None) {
293 Ok(WaitStatus::Exited(_, code)) => return Ok(code),
294 Ok(WaitStatus::Signaled(_, signal, _)) => return Ok(128 + signal as i32),
295 Err(nix::errno::Errno::EINTR) => continue,
296 Err(e) => {
297 return Err(NucleusError::AttachError(format!("waitpid failed: {}", e)));
298 }
299 _ => continue,
300 }
301 }
302 }
303}
304
305impl NamespaceCommandRunner {
306 pub fn run(
313 pid: u32,
314 rootless: bool,
315 using_gvisor: bool,
316 probe: NamespaceProbe,
317 process_identity: Option<&ProcessIdentity>,
318 timeout: Option<Duration>,
319 ) -> Result<bool> {
320 if using_gvisor {
321 return Err(NucleusError::ExecError(
322 "Namespace-local exec probes are unsupported for gVisor containers".to_string(),
323 ));
324 }
325
326 let ns_fds = ContainerAttach::open_namespace_fds(pid, rootless)?;
327
328 match unsafe { fork() }.map_err(|e| {
329 NucleusError::ExecError(format!("Failed to fork namespace helper: {}", e))
330 })? {
331 ForkResult::Parent { child } => Self::wait_for_probe(child, timeout),
332 ForkResult::Child => {
333 let exit_code =
334 match Self::enter_and_run(&ns_fds, probe, process_identity, rootless) {
335 Ok(true) => 0,
336 Ok(false) => 1,
337 Err(e) => {
338 eprintln!("Namespace helper failed: {}", e);
339 125
340 }
341 };
342 std::process::exit(exit_code);
343 }
344 }
345 }
346
347 fn enter_and_run(
348 ns_fds: &[(String, File)],
349 probe: NamespaceProbe,
350 process_identity: Option<&ProcessIdentity>,
351 rootless: bool,
352 ) -> Result<bool> {
353 ContainerAttach::enter_namespaces(ns_fds)?;
354 ContainerAttach::apply_exec_hardening()?;
355
356 match probe {
357 NamespaceProbe::Exec(command) => {
358 if let Some(identity) = process_identity {
359 crate::container::Container::apply_process_identity_to_current_process(
360 identity, rootless,
361 )?;
362 }
363 let env = ContainerAttach::default_exec_env()?;
364 ContainerAttach::exec_with_env(&command, &env)?;
365 unreachable!()
366 }
367 NamespaceProbe::TcpConnect(port) => {
368 let addr = std::net::SocketAddr::from(([127, 0, 0, 1], port));
369 Ok(std::net::TcpStream::connect_timeout(&addr, Duration::from_secs(2)).is_ok())
370 }
371 }
372 }
373
374 fn wait_for_probe(child: Pid, timeout: Option<Duration>) -> Result<bool> {
375 let start = Instant::now();
376 loop {
377 match waitpid(child, Some(WaitPidFlag::WNOHANG)) {
378 Ok(WaitStatus::StillAlive) => {
379 if let Some(limit) = timeout {
380 if start.elapsed() >= limit {
381 let _ =
382 nix::sys::signal::kill(child, nix::sys::signal::Signal::SIGKILL);
383 let _ = waitpid(child, None);
384 return Ok(false);
385 }
386 }
387 thread::sleep(Duration::from_millis(50));
388 }
389 Ok(WaitStatus::Exited(_, code)) => return Ok(code == 0),
390 Ok(WaitStatus::Signaled(_, _, _)) => return Ok(false),
391 Err(nix::errno::Errno::EINTR) => continue,
392 Err(e) => {
393 return Err(NucleusError::ExecError(format!(
394 "Failed waiting for namespace helper: {}",
395 e
396 )));
397 }
398 _ => continue,
399 }
400 }
401 }
402}