1use crate::container::ContainerState;
2use crate::error::{NucleusError, Result};
3use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
4use nix::unistd::{fork, ForkResult, Pid};
5use std::ffi::CString;
6use std::fs::File;
7use std::os::unix::io::AsRawFd;
8use std::thread;
9use std::time::{Duration, Instant};
10use tracing::info;
11
12pub struct ContainerAttach;
14
15pub enum NamespaceProbe {
17 Exec(Vec<String>),
18 TcpConnect(u16),
19}
20
21pub struct NamespaceCommandRunner;
23
24impl ContainerAttach {
25 pub fn attach(state: &ContainerState, command: Vec<String>) -> Result<i32> {
30 if !state.is_running() {
31 return Err(NucleusError::AttachError(format!(
32 "Container {} is not running",
33 state.id
34 )));
35 }
36
37 let current_uid = nix::unistd::Uid::effective().as_raw();
39 if current_uid != 0 && current_uid != state.creator_uid {
40 return Err(NucleusError::AttachError(format!(
41 "Permission denied: container {} owned by UID {}, caller is UID {}",
42 state.id, state.creator_uid, current_uid
43 )));
44 }
45
46 if state.using_gvisor {
49 return Err(NucleusError::AttachError(format!(
50 "Container {} uses gVisor runtime; attach is not supported \
51 (use 'runsc exec' to interact with the guest workload)",
52 state.id
53 )));
54 }
55
56 let pid = state.pid;
57 info!("Attaching to container {} (PID {})", state.id, pid);
58
59 let ns_fds = Self::open_namespace_fds(pid, state.rootless)?;
60
61 match unsafe { fork() }
63 .map_err(|e| NucleusError::AttachError(format!("Fork failed: {}", e)))?
64 {
65 ForkResult::Parent { child } => {
66 Self::wait_for_child(child)
68 }
69 ForkResult::Child => {
70 match Self::enter_and_exec(&ns_fds, &command) {
72 Ok(_) => unreachable!(),
73 Err(e) => {
74 eprintln!("Attach failed: {}", e);
75 std::process::exit(1);
76 }
77 }
78 }
79 }
80 }
81
82 fn enter_and_exec(ns_fds: &[(String, File)], command: &[String]) -> Result<()> {
83 if command.is_empty() {
84 return Err(NucleusError::AttachError(
85 "No command specified for attach".to_string(),
86 ));
87 }
88
89 Self::enter_namespaces(ns_fds)?;
90 Self::apply_exec_hardening()?;
91 let env = Self::default_exec_env()?;
92 Self::exec_with_env(command, &env)
93 }
94
95 fn open_namespace_fds(pid: u32, rootless: bool) -> Result<Vec<(String, File)>> {
96 let ns_types = if rootless {
97 &["user", "pid", "mnt", "net", "uts", "ipc", "cgroup"][..]
98 } else {
99 &["pid", "mnt", "net", "uts", "ipc", "cgroup"][..]
100 };
101 let mut ns_fds: Vec<(String, File)> = Vec::new();
102
103 for ns in ns_types {
104 let ns_path = format!("/proc/{}/ns/{}", pid, ns);
105 match File::open(&ns_path) {
106 Ok(f) => ns_fds.push(((*ns).to_string(), f)),
107 Err(e) => {
108 info!("Skipping namespace {}: {}", ns, e);
110 }
111 }
112 }
113
114 if ns_fds.is_empty() {
115 return Err(NucleusError::AttachError(
116 "Could not open any namespace FDs".to_string(),
117 ));
118 }
119
120 Ok(ns_fds)
121 }
122
123 fn enter_namespaces(ns_fds: &[(String, File)]) -> Result<()> {
124 let mut pid_ns_fd: Option<&File> = None;
128
129 for (ns_name, fd) in ns_fds {
131 if ns_name == "user" {
132 let ret = unsafe { libc::setns(fd.as_raw_fd(), libc::CLONE_NEWUSER) };
133 if ret != 0 {
134 let err = std::io::Error::last_os_error();
135 return Err(NucleusError::AttachError(format!(
136 "setns(user) failed: {}",
137 err
138 )));
139 }
140 info!("Entered user namespace");
141 }
142 }
143
144 for (ns_name, fd) in ns_fds {
146 if ns_name == "pid" {
147 pid_ns_fd = Some(fd);
148 continue;
149 }
150 if ns_name == "user" {
151 continue; }
153
154 let nstype = Self::ns_name_to_clone_flag(ns_name);
155 let raw_fd = fd.as_raw_fd();
156 let ret = unsafe { libc::setns(raw_fd, nstype) };
157 if ret != 0 {
158 let err = std::io::Error::last_os_error();
159 return Err(NucleusError::AttachError(format!(
160 "setns({}) failed: {}",
161 ns_name, err
162 )));
163 }
164 info!("Entered {} namespace", ns_name);
165 }
166
167 if let Some(fd) = pid_ns_fd {
168 let ret = unsafe { libc::setns(fd.as_raw_fd(), libc::CLONE_NEWPID) };
169 if ret != 0 {
170 let err = std::io::Error::last_os_error();
171 return Err(NucleusError::AttachError(format!(
172 "setns(pid) failed: {}",
173 err
174 )));
175 }
176 info!("Entered pid namespace");
177
178 match unsafe { fork() }.map_err(|e| {
180 NucleusError::AttachError(format!("Fork failed after setns(pid): {}", e))
181 })? {
182 ForkResult::Parent { child } => {
183 let code = Self::wait_for_child(child)?;
184 std::process::exit(code);
185 }
186 ForkResult::Child => {
187 }
189 }
190 }
191
192 nix::unistd::chdir("/")
194 .map_err(|e| NucleusError::AttachError(format!("chdir(\"/\") failed: {}", e)))?;
195
196 Ok(())
197 }
198
199 fn apply_exec_hardening() -> Result<()> {
200 let ret = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
202 if ret != 0 {
203 return Err(NucleusError::AttachError(format!(
204 "Failed to set PR_SET_NO_NEW_PRIVS: {}",
205 std::io::Error::last_os_error()
206 )));
207 }
208
209 let mut cap_mgr = crate::security::CapabilityManager::new();
210 cap_mgr.drop_all().map_err(|e| {
211 NucleusError::AttachError(format!("Failed to drop capabilities: {}", e))
212 })?;
213
214 Ok(())
215 }
216
217 fn default_exec_env() -> Result<Vec<CString>> {
218 Ok(vec![
219 CString::new("PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
220 .map_err(|e| NucleusError::AttachError(format!("Invalid PATH env: {}", e)))?,
221 CString::new("TERM=xterm")
222 .map_err(|e| NucleusError::AttachError(format!("Invalid TERM env: {}", e)))?,
223 CString::new("HOME=/")
224 .map_err(|e| NucleusError::AttachError(format!("Invalid HOME env: {}", e)))?,
225 ])
226 }
227
228 fn exec_with_env(command: &[String], env: &[CString]) -> Result<()> {
229 let program = CString::new(command[0].as_str())
230 .map_err(|e| NucleusError::AttachError(format!("Invalid program name: {}", e)))?;
231
232 let args: std::result::Result<Vec<CString>, _> = command
233 .iter()
234 .map(|arg| CString::new(arg.as_str()))
235 .collect();
236 let args =
237 args.map_err(|e| NucleusError::AttachError(format!("Invalid argument: {}", e)))?;
238
239 nix::unistd::execve::<CString, CString>(&program, &args, env)
240 .map_err(|e| NucleusError::AttachError(format!("execve failed: {}", e)))?;
241
242 Ok(())
243 }
244
245 fn ns_name_to_clone_flag(name: &str) -> libc::c_int {
246 match name {
247 "user" => libc::CLONE_NEWUSER,
248 "pid" => libc::CLONE_NEWPID,
249 "mnt" => libc::CLONE_NEWNS,
250 "net" => libc::CLONE_NEWNET,
251 "uts" => libc::CLONE_NEWUTS,
252 "ipc" => libc::CLONE_NEWIPC,
253 "cgroup" => libc::CLONE_NEWCGROUP,
254 _ => 0,
256 }
257 }
258
259 fn wait_for_child(child: Pid) -> Result<i32> {
260 loop {
261 match waitpid(child, None) {
262 Ok(WaitStatus::Exited(_, code)) => return Ok(code),
263 Ok(WaitStatus::Signaled(_, signal, _)) => return Ok(128 + signal as i32),
264 Err(nix::errno::Errno::EINTR) => continue,
265 Err(e) => {
266 return Err(NucleusError::AttachError(format!("waitpid failed: {}", e)));
267 }
268 _ => continue,
269 }
270 }
271 }
272}
273
274impl NamespaceCommandRunner {
275 pub fn run(
282 pid: u32,
283 rootless: bool,
284 using_gvisor: bool,
285 probe: NamespaceProbe,
286 timeout: Option<Duration>,
287 ) -> Result<bool> {
288 if using_gvisor {
289 return Err(NucleusError::ExecError(
290 "Namespace-local exec probes are unsupported for gVisor containers".to_string(),
291 ));
292 }
293
294 let ns_fds = ContainerAttach::open_namespace_fds(pid, rootless)?;
295
296 match unsafe { fork() }.map_err(|e| {
297 NucleusError::ExecError(format!("Failed to fork namespace helper: {}", e))
298 })? {
299 ForkResult::Parent { child } => Self::wait_for_probe(child, timeout),
300 ForkResult::Child => {
301 let exit_code = match Self::enter_and_run(&ns_fds, probe) {
302 Ok(true) => 0,
303 Ok(false) => 1,
304 Err(e) => {
305 eprintln!("Namespace helper failed: {}", e);
306 125
307 }
308 };
309 std::process::exit(exit_code);
310 }
311 }
312 }
313
314 fn enter_and_run(ns_fds: &[(String, File)], probe: NamespaceProbe) -> Result<bool> {
315 ContainerAttach::enter_namespaces(ns_fds)?;
316 ContainerAttach::apply_exec_hardening()?;
317
318 match probe {
319 NamespaceProbe::Exec(command) => {
320 let env = ContainerAttach::default_exec_env()?;
321 ContainerAttach::exec_with_env(&command, &env)?;
322 unreachable!()
323 }
324 NamespaceProbe::TcpConnect(port) => {
325 let addr = std::net::SocketAddr::from(([127, 0, 0, 1], port));
326 Ok(std::net::TcpStream::connect_timeout(&addr, Duration::from_secs(2)).is_ok())
327 }
328 }
329 }
330
331 fn wait_for_probe(child: Pid, timeout: Option<Duration>) -> Result<bool> {
332 let start = Instant::now();
333 loop {
334 match waitpid(child, Some(WaitPidFlag::WNOHANG)) {
335 Ok(WaitStatus::StillAlive) => {
336 if let Some(limit) = timeout {
337 if start.elapsed() >= limit {
338 let _ = nix::sys::signal::kill(child, nix::sys::signal::Signal::SIGKILL);
339 let _ = waitpid(child, None);
340 return Ok(false);
341 }
342 }
343 thread::sleep(Duration::from_millis(50));
344 }
345 Ok(WaitStatus::Exited(_, code)) => return Ok(code == 0),
346 Ok(WaitStatus::Signaled(_, _, _)) => return Ok(false),
347 Err(nix::errno::Errno::EINTR) => continue,
348 Err(e) => {
349 return Err(NucleusError::ExecError(format!(
350 "Failed waiting for namespace helper: {}",
351 e
352 )));
353 }
354 _ => continue,
355 }
356 }
357 }
358}