Skip to main content

tailscale/ssh/
shell.rs

1//! A turnkey login-shell [`ChannelHandler`] for Tailscale SSH.
2//!
3//! [`ShellHandler`] runs the policy-mapped local user's login shell inside a PTY, faithfully
4//! mirroring the interactive subset of Go `tailssh`'s incubator path: a `pty-req` allocates the
5//! PTY and starts the login shell (`<shell> -l`), `window-change` resizes it, and the child's exit
6//! code is reported back as an `exit-status`.
7//!
8//! # Security
9//!
10//! This handler **spawns a real login shell and drops privileges** to the authorized user. Several
11//! invariants keep it fail-closed:
12//!
13//! * The local user comes **only** from the [`SshAccept`] produced by the single fail-closed
14//!   authorization decision in [`auth_none`][russh::server::Handler::auth_none]. The handler never
15//!   re-evaluates policy nor falls back to a configured default user.
16//! * If the user cannot be resolved against the local passwd database, [`ShellHandler::new`]
17//!   returns `Err` and the channel is closed — **a shell is never spawned for an unknown user**.
18//! * Privileges are dropped in the child's `pre_exec` in the exact order
19//!   supplementary-groups → `setgid` → `setuid` (uid **last**, because after `setuid` the process
20//!   can no longer change its gid). Any failure aborts the `exec`, so the shell never runs with the
21//!   wrong or elevated identity. This requires the daemon to run as root; if it does not, the
22//!   `setuid`/`setgid` calls fail and the spawn fails closed.
23//! * The child environment is built from scratch (`HOME`/`USER`/`SHELL`/`PATH`/`TERM`) rather than
24//!   inherited, so the daemon's environment (which may carry secrets) never leaks into the shell.
25
26use std::{path::PathBuf, sync::Arc};
27
28use nix::unistd::{Gid, Uid, User};
29use pty_process::{OwnedWritePty, Size};
30use russh::{ChannelId, Sig, server::Handle};
31use tokio::{
32    io::{AsyncReadExt, AsyncWriteExt},
33    sync::Mutex,
34};
35
36use crate::{
37    Device,
38    ssh::{ChannelEvent, ChannelHandler, SshAccept},
39};
40
41/// Default shell used when a resolved user has no shell set in the passwd database.
42const DEFAULT_SHELL: &str = "/bin/sh";
43
44/// Default `PATH` for the spawned login shell. The login shell itself (`-l`) will typically
45/// re-derive `PATH` from system/user profiles; this is a safe minimal baseline.
46const DEFAULT_PATH: &str = "/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin";
47
48/// The resolved local-user facts needed to spawn and privilege-drop into a login shell.
49///
50/// Captured up front in [`ShellHandler::new`] so the security-critical values are fixed at
51/// authorization time and not re-resolved later.
52#[derive(Debug, Clone)]
53struct ResolvedUser {
54    /// Unix login name.
55    name: String,
56    /// Numeric user id to `setuid` to.
57    uid: Uid,
58    /// Numeric primary group id to `setgid` to.
59    gid: Gid,
60    /// Home directory (used as the shell's working directory and `$HOME`).
61    home: PathBuf,
62    /// Login shell to exec (falls back to [`DEFAULT_SHELL`] if the passwd entry is empty).
63    shell: PathBuf,
64}
65
66/// Resolve `local_user` against the local passwd database.
67///
68/// **Fail-closed:** a missing entry ([`Ok(None)`]) or a lookup error both yield `Err`, so callers
69/// never proceed to spawn a shell for an unresolved user. An empty shell field is normalized to
70/// [`DEFAULT_SHELL`].
71fn resolve_user(local_user: &str) -> std::io::Result<ResolvedUser> {
72    match User::from_name(local_user) {
73        Ok(Some(user)) => {
74            let shell = if user.shell.as_os_str().is_empty() {
75                PathBuf::from(DEFAULT_SHELL)
76            } else {
77                user.shell
78            };
79            Ok(ResolvedUser {
80                name: user.name,
81                uid: user.uid,
82                gid: user.gid,
83                home: user.dir,
84                shell,
85            })
86        }
87        Ok(None) => Err(std::io::Error::new(
88            std::io::ErrorKind::NotFound,
89            format!("ssh: local user {local_user:?} not found in passwd database"),
90        )),
91        Err(e) => Err(std::io::Error::other(format!(
92            "ssh: resolving local user {local_user:?} failed: {e}"
93        ))),
94    }
95}
96
97/// Build the minimal, non-inherited environment for the login shell as `(key, value)` pairs.
98///
99/// Only `HOME`, `USER`, `LOGNAME`, `SHELL`, `PATH`, and `TERM` are set; nothing is inherited from
100/// the daemon, so its environment (potentially holding secrets) never leaks to the shell.
101fn build_env(user: &ResolvedUser) -> Vec<(String, String)> {
102    vec![
103        ("HOME".to_string(), user.home.to_string_lossy().into_owned()),
104        ("USER".to_string(), user.name.clone()),
105        ("LOGNAME".to_string(), user.name.clone()),
106        (
107            "SHELL".to_string(),
108            user.shell.to_string_lossy().into_owned(),
109        ),
110        ("PATH".to_string(), DEFAULT_PATH.to_string()),
111        ("TERM".to_string(), "xterm-256color".to_string()),
112    ]
113}
114
115/// The login-shell flag (`-l`) passed to the user's shell to start it as a login shell, mirroring
116/// Go `tailssh`'s interactive path.
117const LOGIN_SHELL_ARG: &str = "-l";
118
119/// One privilege-drop operation, in the order it must be applied.
120///
121/// This is a pure, comparable representation of the security-critical drop sequence so the
122/// ordering invariant (uid **last**) can be unit-tested without root or a real fork. The plan is
123/// built before the fork (allocates) and applied step-by-step inside the `pre_exec` closure (no
124/// alloc, async-signal-safe).
125#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126enum PrivDropStep {
127    /// Set supplementary groups from the user's group membership (Linux; absent on Apple).
128    /// Carries the primary `gid` because `initgroups` needs it; storing it here keeps the
129    /// executor free of any pre-fork lookups.
130    InitGroups(Gid),
131    /// Set the real/effective/saved group id.
132    SetGid(Gid),
133    /// Set the real/effective/saved user id. MUST be last.
134    SetUid(Uid),
135}
136
137/// Build the privilege-drop plan in the sacred order: supplementary groups, then setgid, then
138/// setuid LAST (uid-last so the process cannot re-raise its gid after dropping uid). This is a
139/// pure function so the ordering invariant can be unit-tested without root or a real fork.
140///
141/// `with_initgroups` is `false` on Apple targets (where `nix` has no `initgroups`), matching the
142/// `#[cfg(not(target_vendor = "apple"))]` gating of the real call; on Apple the plan is just
143/// `[SetGid, SetUid]`.
144fn priv_drop_plan(uid: Uid, gid: Gid, with_initgroups: bool) -> Vec<PrivDropStep> {
145    let mut plan = Vec::with_capacity(3);
146    if with_initgroups {
147        plan.push(PrivDropStep::InitGroups(gid));
148    }
149    plan.push(PrivDropStep::SetGid(gid));
150    plan.push(PrivDropStep::SetUid(uid));
151    plan
152}
153
154/// Apply a single privilege-drop step via the corresponding `nix`/libc wrapper.
155///
156/// Runs post-fork inside `pre_exec`, so it must stay async-signal-safe: it only calls the libc
157/// wrappers and allocates nothing. `user_cname` is the login name needed by `initgroups`; it is
158/// `Some` only on platforms where an [`PrivDropStep::InitGroups`] step is present.
159fn apply_priv_drop_step(
160    step: &PrivDropStep,
161    user_cname: Option<&std::ffi::CStr>,
162) -> std::io::Result<()> {
163    match step {
164        PrivDropStep::InitGroups(gid) => {
165            // `initgroups` is configured out of `nix` on Apple targets, and `priv_drop_plan`
166            // never emits this step there, so the call is gated to match.
167            #[cfg(not(target_vendor = "apple"))]
168            {
169                let cname = user_cname.ok_or_else(|| {
170                    std::io::Error::other("ssh: initgroups step without user name")
171                })?;
172                nix::unistd::initgroups(cname, *gid)
173                    .map_err(|e| std::io::Error::from_raw_os_error(e as i32))?;
174            }
175            #[cfg(target_vendor = "apple")]
176            {
177                let _ = (gid, user_cname);
178            }
179        }
180        PrivDropStep::SetGid(gid) => {
181            nix::unistd::setgid(*gid).map_err(|e| std::io::Error::from_raw_os_error(e as i32))?;
182        }
183        PrivDropStep::SetUid(uid) => {
184            nix::unistd::setuid(*uid).map_err(|e| std::io::Error::from_raw_os_error(e as i32))?;
185        }
186    }
187    Ok(())
188}
189
190/// A turnkey [`ChannelHandler`] that runs the authorized user's login shell in a PTY.
191///
192/// Construct one indirectly via [`Device::listen_ssh`][crate::Device::listen_ssh]; it is not meant
193/// to be created by hand.
194pub struct ShellHandler {
195    /// The russh channel this shell is bound to.
196    channel_id: ChannelId,
197    /// The owned write half of the PTY master; client input is written here, and window-resize
198    /// `TIOCSWINSZ` ioctls are issued through it.
199    pty_write: OwnedWritePty,
200    /// The spawned child shell, shared with the output-pump task so both sides can signal/kill it.
201    child: Arc<Mutex<tokio::process::Child>>,
202}
203
204impl ShellHandler {
205    /// Forward the numeric POSIX signal `signum` to the child shell, best-effort.
206    async fn signal_child(&self, signum: i32) {
207        let pid = { self.child.lock().await.id() };
208        let Some(pid) = pid else {
209            return;
210        };
211        let Ok(signal) = nix::sys::signal::Signal::try_from(signum) else {
212            tracing::debug!(signum, "ssh: unmapped signal; not forwarding");
213            return;
214        };
215        if let Err(e) =
216            nix::sys::signal::kill(nix::unistd::Pid::from_raw(pid as nix::libc::pid_t), signal)
217        {
218            tracing::debug!(error = %e, signum, "ssh: failed forwarding signal to shell");
219        }
220    }
221
222    /// Kill the child shell, best-effort. Used on channel close/EOF.
223    async fn kill_child(&self) {
224        let mut child = self.child.lock().await;
225        if let Err(e) = child.start_kill() {
226            tracing::debug!(error = %e, "ssh: failed to kill shell child");
227        }
228    }
229}
230
231/// Map a russh [`Sig`] to its POSIX signal number for forwarding to the child.
232fn sig_to_signum(sig: &Sig) -> Option<i32> {
233    Some(match sig {
234        Sig::HUP => nix::libc::SIGHUP,
235        Sig::INT => nix::libc::SIGINT,
236        Sig::QUIT => nix::libc::SIGQUIT,
237        Sig::KILL => nix::libc::SIGKILL,
238        Sig::TERM => nix::libc::SIGTERM,
239        _ => return None,
240    })
241}
242
243impl ChannelHandler for ShellHandler {
244    type Error = std::io::Error;
245
246    fn new(
247        rt: tokio::runtime::Handle,
248        channel_id: ChannelId,
249        session: Handle,
250        _dev: Arc<Device>,
251        accept: &SshAccept,
252    ) -> Result<Self, Self::Error> {
253        // SECURITY: the identity comes solely from the fail-closed `auth_none` decision.
254        let user = resolve_user(&accept.local_user)?;
255        let env = build_env(&user);
256
257        // Allocate the PTY master/subordinate pair.
258        let (pty, pts) = pty_process::open().map_err(std::io::Error::other)?;
259
260        // Build the privilege-drop plan BEFORE the fork (this allocates a Vec). Inside the
261        // `pre_exec` closure we only iterate + call the syscalls (no alloc, async-signal-safe).
262        //
263        // `initgroups` is unavailable on Apple targets in `nix`; it is the production (Linux)
264        // path. macOS dev builds still compile and drop the primary gid + uid (no InitGroups step,
265        // so `user_cname` is not needed there).
266        #[cfg(not(target_vendor = "apple"))]
267        let with_initgroups = true;
268        #[cfg(target_vendor = "apple")]
269        let with_initgroups = false;
270        let plan = priv_drop_plan(user.uid, user.gid, with_initgroups);
271        // The login name needed by `initgroups`; only present on the platforms that have that step.
272        #[cfg(not(target_vendor = "apple"))]
273        let user_cname = std::ffi::CString::new(user.name.clone())
274            .map_err(|e| std::io::Error::other(format!("ssh: user name has NUL byte: {e}")))?;
275
276        let mut cmd = pty_process::Command::new(&user.shell);
277        cmd = cmd.arg(LOGIN_SHELL_ARG).current_dir(&user.home).env_clear();
278        for (k, v) in env {
279            cmd = cmd.env(k, v);
280        }
281
282        // SECURITY: privilege drop runs in the child between fork and exec. Order is sacred:
283        // (1) supplementary groups, (2) setgid, (3) setuid LAST. setuid is last because once the
284        // uid is dropped the process can no longer change its gid. Any failure aborts the exec, so
285        // the shell never runs with the wrong or elevated identity. The ordered `plan` was built
286        // pre-fork (see `priv_drop_plan`); here we only iterate it and apply each step in order —
287        // behavior is identical to the previous inline initgroups→setgid→setuid sequence.
288        //
289        // Safety: the closure only calls async-signal-safe libc wrappers (initgroups/setgid/
290        // setuid) via `apply_priv_drop_step` and allocates nothing; it is sound to run post-fork.
291        cmd = unsafe {
292            cmd.pre_exec(move || {
293                #[cfg(not(target_vendor = "apple"))]
294                let user_cname = Some(user_cname.as_c_str());
295                #[cfg(target_vendor = "apple")]
296                let user_cname: Option<&std::ffi::CStr> = None;
297                for step in &plan {
298                    apply_priv_drop_step(step, user_cname)?;
299                }
300                Ok(())
301            })
302        };
303
304        let child = cmd.spawn(pts).map_err(std::io::Error::other)?;
305
306        let (mut pty_read, pty_write) = pty.into_split();
307        let child = Arc::new(Mutex::new(child));
308
309        // Pump PTY output → SSH channel data, then report the child's exit status. Runs on the
310        // shared tokio runtime so it lives independently of `handle_event` calls.
311        let pump_child = child.clone();
312        rt.spawn(async move {
313            let mut buf = [0u8; 16 * 1024];
314            loop {
315                match pty_read.read(&mut buf).await {
316                    Ok(0) => break,
317                    Ok(n) => {
318                        if session.data(channel_id, buf[..n].to_vec()).await.is_err() {
319                            tracing::debug!(%channel_id, "ssh: client gone; stopping shell pump");
320                            break;
321                        }
322                    }
323                    Err(e) => {
324                        tracing::debug!(error = %e, %channel_id, "ssh: pty read error");
325                        break;
326                    }
327                }
328            }
329
330            // Report exit status (best-effort). russh exposes `exit_status_request(id, u32)`.
331            let status = { pump_child.lock().await.wait().await };
332            match status {
333                Ok(status) => {
334                    // A signal-killed shell has `code() == None`; reporting that as `exit-status 0`
335                    // would lie to the client (success). russh's `exit_signal_request` needs a `Sig`
336                    // name mapped from the raw signal number — awkward — so we take the simpler,
337                    // still-correct path: convey signal death as the conventional `128 + signal`
338                    // non-zero status (what a POSIX shell reports), never a bogus 0.
339                    use std::os::unix::process::ExitStatusExt as _;
340                    let code = status
341                        .code()
342                        .unwrap_or_else(|| 128 + status.signal().unwrap_or(0))
343                        as u32;
344                    if session.exit_status_request(channel_id, code).await.is_err() {
345                        tracing::debug!(%channel_id, "ssh: failed sending exit-status");
346                    }
347                }
348                Err(e) => {
349                    tracing::debug!(error = %e, %channel_id, "ssh: waiting on shell child");
350                }
351            }
352            if session.close(channel_id).await.is_err() {
353                tracing::trace!(%channel_id, "ssh: channel already closed");
354            }
355        });
356
357        Ok(Self {
358            channel_id,
359            pty_write,
360            child,
361        })
362    }
363
364    async fn handle_event(&mut self, event: &ChannelEvent) -> Result<(), Self::Error> {
365        match event {
366            ChannelEvent::Data(bytes) => {
367                self.pty_write.write_all(bytes).await?;
368                self.pty_write.flush().await?;
369            }
370            ChannelEvent::Resize { width, height } => {
371                // `pty-req` initial size and later `window-change` both arrive here. Issue
372                // TIOCSWINSZ via pty-process' resize (rows, cols).
373                if let Err(e) = self.pty_write.resize(Size::new(*height, *width)) {
374                    tracing::debug!(error = %e, channel_id = %self.channel_id, "ssh: pty resize");
375                }
376            }
377            ChannelEvent::Signal(sig) => {
378                if let Some(signum) = sig_to_signum(sig) {
379                    self.signal_child(signum).await;
380                } else {
381                    tracing::debug!(?sig, "ssh: unhandled signal; not forwarding");
382                }
383            }
384            ChannelEvent::Close | ChannelEvent::Eof => {
385                tracing::debug!(channel_id = %self.channel_id, ?event, "ssh: closing shell");
386                self.kill_child().await;
387            }
388        }
389        Ok(())
390    }
391}
392
393#[cfg(all(test, feature = "ssh"))]
394mod tests {
395    use super::*;
396
397    fn fake_user() -> ResolvedUser {
398        ResolvedUser {
399            name: "alice".to_string(),
400            uid: Uid::from_raw(1000),
401            gid: Gid::from_raw(1000),
402            home: PathBuf::from("/home/alice"),
403            shell: PathBuf::from("/bin/bash"),
404        }
405    }
406
407    #[test]
408    fn env_is_minimal_and_correct() {
409        let env = build_env(&fake_user());
410        let get = |k: &str| {
411            env.iter()
412                .find(|(key, _)| key == k)
413                .map(|(_, v)| v.as_str())
414        };
415
416        assert_eq!(get("HOME"), Some("/home/alice"));
417        assert_eq!(get("USER"), Some("alice"));
418        assert_eq!(get("LOGNAME"), Some("alice"));
419        assert_eq!(get("SHELL"), Some("/bin/bash"));
420        assert_eq!(get("TERM"), Some("xterm-256color"));
421        assert_eq!(get("PATH"), Some(DEFAULT_PATH));
422        // No daemon environment leaks through: only the six known keys are present.
423        assert_eq!(env.len(), 6);
424    }
425
426    #[test]
427    fn resolve_unknown_user_fails_closed() {
428        // A username that cannot exist in any passwd database must yield Err, never a shell.
429        let err = resolve_user("definitely-not-a-real-user-xyz")
430            .expect_err("bogus user must fail closed");
431        assert!(matches!(
432            err.kind(),
433            std::io::ErrorKind::NotFound | std::io::ErrorKind::Other
434        ));
435    }
436
437    #[test]
438    fn login_shell_uses_dash_l() {
439        // The interactive path always starts a login shell with `-l`. The exec form
440        // (`<shell> -c <cmd>`) is documented as unsupported because `ChannelEvent` carries no
441        // exec request; see the module note in `Device::listen_ssh`.
442        assert_eq!(LOGIN_SHELL_ARG, "-l");
443    }
444
445    #[test]
446    fn priv_drop_plan_orders_uid_last() {
447        let uid = Uid::from_raw(1000);
448        let gid = Gid::from_raw(1000);
449        // Linux production path includes the supplementary-groups step first.
450        let plan = priv_drop_plan(uid, gid, true);
451        assert_eq!(
452            plan,
453            vec![
454                PrivDropStep::InitGroups(gid),
455                PrivDropStep::SetGid(gid),
456                PrivDropStep::SetUid(uid),
457            ],
458            "drop sequence must be initgroups → setgid → setuid"
459        );
460        // setuid MUST be last — fails loudly if anyone reorders.
461        assert_eq!(plan.last(), Some(&PrivDropStep::SetUid(uid)));
462    }
463
464    #[test]
465    fn priv_drop_plan_apple_skips_initgroups() {
466        let uid = Uid::from_raw(1000);
467        let gid = Gid::from_raw(1000);
468        // Apple path: `initgroups` is unavailable, so no InitGroups step — but still uid-last.
469        let plan = priv_drop_plan(uid, gid, false);
470        assert_eq!(
471            plan,
472            vec![PrivDropStep::SetGid(gid), PrivDropStep::SetUid(uid)],
473        );
474        assert!(!plan.contains(&PrivDropStep::InitGroups(gid)));
475        assert_eq!(plan.last(), Some(&PrivDropStep::SetUid(uid)));
476    }
477
478    #[test]
479    fn priv_drop_setgid_before_setuid() {
480        let uid = Uid::from_raw(1000);
481        let gid = Gid::from_raw(1000);
482        // The sacred invariant expressed directly: gid is dropped before uid, on every platform.
483        for with_initgroups in [true, false] {
484            let plan = priv_drop_plan(uid, gid, with_initgroups);
485            let setgid_idx = plan
486                .iter()
487                .position(|s| *s == PrivDropStep::SetGid(gid))
488                .expect("plan must set gid");
489            let setuid_idx = plan
490                .iter()
491                .position(|s| *s == PrivDropStep::SetUid(uid))
492                .expect("plan must set uid");
493            assert!(
494                setgid_idx < setuid_idx,
495                "setgid must precede setuid (with_initgroups={with_initgroups})"
496            );
497        }
498    }
499
500    #[test]
501    fn empty_shell_falls_back_to_default() {
502        // Mirror resolve_user's normalization of an empty passwd shell field.
503        let mut u = fake_user();
504        u.shell = PathBuf::from("");
505        let shell = if u.shell.as_os_str().is_empty() {
506            PathBuf::from(DEFAULT_SHELL)
507        } else {
508            u.shell.clone()
509        };
510        assert_eq!(shell, PathBuf::from(DEFAULT_SHELL));
511    }
512}