1use crate::container::{ContainerState, ProcessIdentity};
2use crate::error::{NucleusError, Result};
3use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
4use nix::unistd::{fork, ForkResult, Pid};
5use std::ffi::CString;
6use std::fs::File;
7use std::os::unix::io::AsRawFd;
8use std::thread;
9use std::time::{Duration, Instant};
10use tracing::info;
11
12pub struct ContainerAttach;
14
15pub enum NamespaceProbe {
17 Exec(Vec<String>),
18 TcpConnect(u16),
19}
20
21pub struct NamespaceCommandRunner;
23
24impl ContainerAttach {
25 pub fn attach(state: &ContainerState, command: Vec<String>) -> Result<i32> {
30 if !state.is_running() {
31 return Err(NucleusError::AttachError(format!(
32 "Container {} is not running",
33 state.id
34 )));
35 }
36
37 let current_uid = nix::unistd::Uid::effective().as_raw();
39 if current_uid != 0 && current_uid != state.creator_uid {
40 return Err(NucleusError::AttachError(format!(
41 "Permission denied: container {} owned by UID {}, caller is UID {}",
42 state.id, state.creator_uid, current_uid
43 )));
44 }
45
46 if state.using_gvisor {
49 return Err(NucleusError::AttachError(format!(
50 "Container {} uses gVisor runtime; attach is not supported \
51 (use 'runsc exec' to interact with the guest workload)",
52 state.id
53 )));
54 }
55
56 let pid = state.pid;
57 info!("Attaching to container {} (PID {})", state.id, pid);
58
59 let ns_fds = Self::open_namespace_fds(pid, state.rootless)?;
60
61 match unsafe { fork() }
63 .map_err(|e| NucleusError::AttachError(format!("Fork failed: {}", e)))?
64 {
65 ForkResult::Parent { child } => {
66 Self::wait_for_child(child)
68 }
69 ForkResult::Child => {
70 match Self::enter_and_exec(&ns_fds, &command) {
72 Ok(_) => unreachable!(),
73 Err(e) => {
74 eprintln!("Attach failed: {}", e);
75 std::process::exit(1);
76 }
77 }
78 }
79 }
80 }
81
82 fn enter_and_exec(ns_fds: &[(String, File)], command: &[String]) -> Result<()> {
83 if command.is_empty() {
84 return Err(NucleusError::AttachError(
85 "No command specified for attach".to_string(),
86 ));
87 }
88
89 Self::enter_namespaces(ns_fds)?;
90 Self::apply_exec_hardening()?;
91 let env = Self::default_exec_env()?;
92 Self::exec_with_env(command, &env)
93 }
94
95 fn open_namespace_fds(pid: u32, rootless: bool) -> Result<Vec<(String, File)>> {
96 let ns_types = if rootless {
97 &["user", "pid", "mnt", "net", "uts", "ipc", "cgroup"][..]
98 } else {
99 &["pid", "mnt", "net", "uts", "ipc", "cgroup"][..]
100 };
101 let mut ns_fds: Vec<(String, File)> = Vec::new();
102
103 for ns in ns_types {
104 let ns_path = format!("/proc/{}/ns/{}", pid, ns);
105 match File::open(&ns_path) {
106 Ok(f) => ns_fds.push(((*ns).to_string(), f)),
107 Err(e) => {
108 info!("Skipping namespace {}: {}", ns, e);
110 }
111 }
112 }
113
114 if ns_fds.is_empty() {
115 return Err(NucleusError::AttachError(
116 "Could not open any namespace FDs".to_string(),
117 ));
118 }
119
120 Ok(ns_fds)
121 }
122
123 fn enter_namespaces(ns_fds: &[(String, File)]) -> Result<()> {
124 let mut pid_ns_fd: Option<&File> = None;
128
129 for (ns_name, fd) in ns_fds {
131 if ns_name == "user" {
132 let ret = unsafe { libc::setns(fd.as_raw_fd(), libc::CLONE_NEWUSER) };
133 if ret != 0 {
134 let err = std::io::Error::last_os_error();
135 return Err(NucleusError::AttachError(format!(
136 "setns(user) failed: {}",
137 err
138 )));
139 }
140 info!("Entered user namespace");
141 }
142 }
143
144 for (ns_name, fd) in ns_fds {
146 if ns_name == "pid" {
147 pid_ns_fd = Some(fd);
148 continue;
149 }
150 if ns_name == "user" {
151 continue; }
153
154 let nstype = Self::ns_name_to_clone_flag(ns_name);
155 let raw_fd = fd.as_raw_fd();
156 let ret = unsafe { libc::setns(raw_fd, nstype) };
157 if ret != 0 {
158 let err = std::io::Error::last_os_error();
159 return Err(NucleusError::AttachError(format!(
160 "setns({}) failed: {}",
161 ns_name, err
162 )));
163 }
164 info!("Entered {} namespace", ns_name);
165 }
166
167 if let Some(fd) = pid_ns_fd {
168 let ret = unsafe { libc::setns(fd.as_raw_fd(), libc::CLONE_NEWPID) };
169 if ret != 0 {
170 let err = std::io::Error::last_os_error();
171 return Err(NucleusError::AttachError(format!(
172 "setns(pid) failed: {}",
173 err
174 )));
175 }
176 info!("Entered pid namespace");
177
178 match unsafe { fork() }.map_err(|e| {
180 NucleusError::AttachError(format!("Fork failed after setns(pid): {}", e))
181 })? {
182 ForkResult::Parent { child } => {
183 let code = Self::wait_for_child(child)?;
184 std::process::exit(code);
185 }
186 ForkResult::Child => {
187 }
189 }
190 }
191
192 nix::unistd::chdir("/")
194 .map_err(|e| NucleusError::AttachError(format!("chdir(\"/\") failed: {}", e)))?;
195
196 Ok(())
197 }
198
199 fn apply_exec_hardening() -> Result<()> {
200 let ret = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
202 if ret != 0 {
203 return Err(NucleusError::AttachError(format!(
204 "Failed to set PR_SET_NO_NEW_PRIVS: {}",
205 std::io::Error::last_os_error()
206 )));
207 }
208
209 let mut cap_mgr = crate::security::CapabilityManager::new();
210 cap_mgr.drop_all().map_err(|e| {
211 NucleusError::AttachError(format!("Failed to drop capabilities: {}", e))
212 })?;
213
214 Ok(())
215 }
216
217 fn default_exec_env() -> Result<Vec<CString>> {
218 Ok(vec![
219 CString::new("PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
220 .map_err(|e| NucleusError::AttachError(format!("Invalid PATH env: {}", e)))?,
221 CString::new("TERM=xterm")
222 .map_err(|e| NucleusError::AttachError(format!("Invalid TERM env: {}", e)))?,
223 CString::new("HOME=/")
224 .map_err(|e| NucleusError::AttachError(format!("Invalid HOME env: {}", e)))?,
225 ])
226 }
227
228 fn exec_with_env(command: &[String], env: &[CString]) -> Result<()> {
229 let program = CString::new(command[0].as_str())
230 .map_err(|e| NucleusError::AttachError(format!("Invalid program name: {}", e)))?;
231
232 let args: std::result::Result<Vec<CString>, _> = command
233 .iter()
234 .map(|arg| CString::new(arg.as_str()))
235 .collect();
236 let args =
237 args.map_err(|e| NucleusError::AttachError(format!("Invalid argument: {}", e)))?;
238
239 nix::unistd::execve::<CString, CString>(&program, &args, env)
240 .map_err(|e| NucleusError::AttachError(format!("execve failed: {}", e)))?;
241
242 Ok(())
243 }
244
245 fn ns_name_to_clone_flag(name: &str) -> libc::c_int {
246 match name {
247 "user" => libc::CLONE_NEWUSER,
248 "pid" => libc::CLONE_NEWPID,
249 "mnt" => libc::CLONE_NEWNS,
250 "net" => libc::CLONE_NEWNET,
251 "uts" => libc::CLONE_NEWUTS,
252 "ipc" => libc::CLONE_NEWIPC,
253 "cgroup" => libc::CLONE_NEWCGROUP,
254 _ => 0,
256 }
257 }
258
259 fn wait_for_child(child: Pid) -> Result<i32> {
260 loop {
261 match waitpid(child, None) {
262 Ok(WaitStatus::Exited(_, code)) => return Ok(code),
263 Ok(WaitStatus::Signaled(_, signal, _)) => return Ok(128 + signal as i32),
264 Err(nix::errno::Errno::EINTR) => continue,
265 Err(e) => {
266 return Err(NucleusError::AttachError(format!("waitpid failed: {}", e)));
267 }
268 _ => continue,
269 }
270 }
271 }
272}
273
274impl NamespaceCommandRunner {
275 pub fn run(
282 pid: u32,
283 rootless: bool,
284 using_gvisor: bool,
285 probe: NamespaceProbe,
286 process_identity: Option<&ProcessIdentity>,
287 timeout: Option<Duration>,
288 ) -> Result<bool> {
289 if using_gvisor {
290 return Err(NucleusError::ExecError(
291 "Namespace-local exec probes are unsupported for gVisor containers".to_string(),
292 ));
293 }
294
295 let ns_fds = ContainerAttach::open_namespace_fds(pid, rootless)?;
296
297 match unsafe { fork() }.map_err(|e| {
298 NucleusError::ExecError(format!("Failed to fork namespace helper: {}", e))
299 })? {
300 ForkResult::Parent { child } => Self::wait_for_probe(child, timeout),
301 ForkResult::Child => {
302 let exit_code =
303 match Self::enter_and_run(&ns_fds, probe, process_identity, rootless) {
304 Ok(true) => 0,
305 Ok(false) => 1,
306 Err(e) => {
307 eprintln!("Namespace helper failed: {}", e);
308 125
309 }
310 };
311 std::process::exit(exit_code);
312 }
313 }
314 }
315
316 fn enter_and_run(
317 ns_fds: &[(String, File)],
318 probe: NamespaceProbe,
319 process_identity: Option<&ProcessIdentity>,
320 rootless: bool,
321 ) -> Result<bool> {
322 ContainerAttach::enter_namespaces(ns_fds)?;
323 ContainerAttach::apply_exec_hardening()?;
324
325 match probe {
326 NamespaceProbe::Exec(command) => {
327 if let Some(identity) = process_identity {
328 crate::container::Container::apply_process_identity_to_current_process(
329 identity, rootless,
330 )?;
331 }
332 let env = ContainerAttach::default_exec_env()?;
333 ContainerAttach::exec_with_env(&command, &env)?;
334 unreachable!()
335 }
336 NamespaceProbe::TcpConnect(port) => {
337 let addr = std::net::SocketAddr::from(([127, 0, 0, 1], port));
338 Ok(std::net::TcpStream::connect_timeout(&addr, Duration::from_secs(2)).is_ok())
339 }
340 }
341 }
342
343 fn wait_for_probe(child: Pid, timeout: Option<Duration>) -> Result<bool> {
344 let start = Instant::now();
345 loop {
346 match waitpid(child, Some(WaitPidFlag::WNOHANG)) {
347 Ok(WaitStatus::StillAlive) => {
348 if let Some(limit) = timeout {
349 if start.elapsed() >= limit {
350 let _ =
351 nix::sys::signal::kill(child, nix::sys::signal::Signal::SIGKILL);
352 let _ = waitpid(child, None);
353 return Ok(false);
354 }
355 }
356 thread::sleep(Duration::from_millis(50));
357 }
358 Ok(WaitStatus::Exited(_, code)) => return Ok(code == 0),
359 Ok(WaitStatus::Signaled(_, _, _)) => return Ok(false),
360 Err(nix::errno::Errno::EINTR) => continue,
361 Err(e) => {
362 return Err(NucleusError::ExecError(format!(
363 "Failed waiting for namespace helper: {}",
364 e
365 )));
366 }
367 _ => continue,
368 }
369 }
370 }
371}