Skip to main content

sozu_command_lib/
sd_notify.rs

1//! systemd `sd_notify` integration for the master process.
2//!
3//! When sozu runs under a systemd unit declared `Type=notify`, the
4//! master process tells systemd it is ready (`READY=1`) only AFTER the
5//! initial workers have spawned and any saved state has been replayed
6//! — without that handshake, `Type=simple` declares the unit ready as
7//! soon as the binary forks, which is wrong for sozu's master/worker
8//! lifecycle (clients can hit a configured `After=sozu.service` peer
9//! before sozu accepts traffic).
10//!
11//! Implementation is std-only: read `$NOTIFY_SOCKET`, connect a
12//! `UnixDatagram`, send the protocol-formatted state line. Closes
13//! [#228]. Linux is the only target system where this matters in
14//! practice; the helper is a no-op when `$NOTIFY_SOCKET` is unset
15//! (e.g. when the operator runs `sozu start` from a shell without
16//! systemd in the supervision chain), so non-Linux builds compile and
17//! call into the helper without behavioural change.
18//!
19//! [#228]: https://github.com/sozu-proxy/sozu/issues/228
20
21use std::env;
22use std::ffi::{OsStr, OsString};
23use std::io;
24use std::os::unix::ffi::OsStrExt;
25use std::os::unix::net::UnixDatagram;
26
27/// Send a state message to systemd via `$NOTIFY_SOCKET`.
28///
29/// Supports both filesystem-path sockets (`/run/systemd/notify`) and
30/// Linux-abstract sockets (the address starts with `@` per
31/// `sd_notify(3)`; common in container images and namespace-gated
32/// hosts). Abstract addresses are routed through `libc::sendto` with
33/// `AF_UNIX` and a leading NUL byte — `UnixDatagram::connect` would
34/// otherwise resolve the path verbatim and fail with `ENOENT`,
35/// silently breaking the `Type=notify` readiness handshake.
36///
37/// Returns `Ok(false)` when `$NOTIFY_SOCKET` is unset (sozu is not
38/// running under systemd notify supervision; the caller can ignore
39/// this case). Returns `Ok(true)` when the datagram was sent.
40/// Returns `Err(...)` for socket / permission failures so the caller
41/// can decide whether to log + continue or fail-fast.
42///
43/// Multiple state lines can be packed into a single message by
44/// joining them with `\n` (see [`STATE_READY`] / [`STATE_STOPPING`]).
45pub fn notify(state: &str) -> io::Result<bool> {
46    let path: OsString = match env::var_os("NOTIFY_SOCKET") {
47        Some(p) => p,
48        None => return Ok(false),
49    };
50    let bytes = path.as_bytes();
51    if bytes.first() == Some(&b'@') {
52        send_abstract(&bytes[1..], state.as_bytes())?;
53    } else {
54        let sock = UnixDatagram::unbound()?;
55        sock.connect::<&OsStr>(path.as_os_str())?;
56        sock.send(state.as_bytes())?;
57    }
58    Ok(true)
59}
60
61/// Linux-abstract `AF_UNIX` send. The kernel address layout is a
62/// `sun_path` whose first byte is NUL followed by the abstract name
63/// (no terminator); `addrlen` covers `sun_family` + 1 + name bytes.
64fn send_abstract(name: &[u8], payload: &[u8]) -> io::Result<()> {
65    use std::mem::{MaybeUninit, size_of};
66
67    // sun_path is 108 bytes on Linux; minus the leading NUL leaves
68    // 107 usable bytes for the abstract name.
69    if name.len() > 107 {
70        return Err(io::Error::new(
71            io::ErrorKind::InvalidInput,
72            "abstract socket name too long",
73        ));
74    }
75
76    let fd = unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_DGRAM | libc::SOCK_CLOEXEC, 0) };
77    if fd < 0 {
78        return Err(io::Error::last_os_error());
79    }
80
81    let mut addr: MaybeUninit<libc::sockaddr_un> = MaybeUninit::zeroed();
82    // SAFETY: the buffer was zero-initialised above; we write the family
83    // and the abstract path bytes into the inline `sun_path` array.
84    let addrlen = unsafe {
85        let p = addr.as_mut_ptr();
86        (*p).sun_family = libc::AF_UNIX as libc::sa_family_t;
87        let path_ptr = (*p).sun_path.as_mut_ptr() as *mut u8;
88        path_ptr.write(0);
89        std::ptr::copy_nonoverlapping(name.as_ptr(), path_ptr.add(1), name.len());
90        (size_of::<libc::sa_family_t>() + 1 + name.len()) as libc::socklen_t
91    };
92
93    let result = unsafe {
94        libc::sendto(
95            fd,
96            payload.as_ptr() as *const libc::c_void,
97            payload.len(),
98            0,
99            addr.as_ptr() as *const libc::sockaddr,
100            addrlen,
101        )
102    };
103
104    let err = if result < 0 {
105        Some(io::Error::last_os_error())
106    } else {
107        None
108    };
109
110    // Always close. `close()` errors on a freshly-created datagram
111    // socket are not actionable for the caller.
112    unsafe {
113        libc::close(fd);
114    }
115
116    match err {
117        Some(e) => Err(e),
118        None => Ok(()),
119    }
120}
121
122/// `READY=1` — master finished spawning workers and loading state.
123pub const STATE_READY: &str = "READY=1";
124
125/// `STOPPING=1` — graceful shutdown begins.
126pub const STATE_STOPPING: &str = "STOPPING=1";
127
128/// `RELOADING=1` — hot reload begins (the same binary is reloading
129/// configuration; for binary upgrade see [`main_pid`]).
130pub const STATE_RELOADING: &str = "RELOADING=1";
131
132/// `WATCHDOG=1` — watchdog ping. Currently NOT wired into the master
133/// event loop — the bundled `os-build/systemd/sozu.service` and
134/// `sozu@.service` units intentionally do NOT declare `WatchdogSec=`,
135/// so leaving this constant unused at link time is the documented
136/// state. Operators who add `WatchdogSec=` to a custom unit will see
137/// the unit get killed for missing pings until the periodic-timer
138/// follow-up lands.
139#[allow(dead_code)]
140pub const STATE_WATCHDOG: &str = "WATCHDOG=1";
141
142/// Send `MAINPID=<pid>` so systemd tracks the new master after a
143/// hot-upgrade re-exec hands the supervision off to a forked child.
144/// `pid` is unsigned because `MAINPID=` accepts unsigned decimal per
145/// `sd_notify(3)`; callers convert from `std::process::id()` directly.
146pub fn main_pid(pid: u32) -> io::Result<bool> {
147    notify(&format!("MAINPID={pid}"))
148}
149
150/// Send `STATUS=<text>` for human-readable status (visible via
151/// `systemctl status sozu`).
152#[allow(dead_code)]
153pub fn status(text: &str) -> io::Result<bool> {
154    notify(&format!("STATUS={text}"))
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use serial_test::serial;
161    use std::os::unix::net::UnixDatagram;
162    use tempfile::TempDir;
163
164    fn with_notify_socket<R>(value: Option<&OsStr>, body: impl FnOnce() -> R) -> R {
165        // SAFETY: cargo runs each `#[test]` on its own thread but the
166        // env table is process-shared. `#[serial(notify_socket)]` on
167        // the caller serialises every test that touches this env var,
168        // so the unsafe mutation cannot race a parallel reader. The
169        // previous value is restored on exit so the next test sees a
170        // clean slate.
171        let prev = env::var_os("NOTIFY_SOCKET");
172        unsafe {
173            match value {
174                Some(v) => env::set_var("NOTIFY_SOCKET", v),
175                None => env::remove_var("NOTIFY_SOCKET"),
176            }
177        }
178        let result = body();
179        unsafe {
180            match prev {
181                Some(v) => env::set_var("NOTIFY_SOCKET", v),
182                None => env::remove_var("NOTIFY_SOCKET"),
183            }
184        }
185        result
186    }
187
188    #[test]
189    #[serial(notify_socket)]
190    fn notify_no_socket_set_is_noop() {
191        let res = with_notify_socket(None, || notify(STATE_READY).expect("noop"));
192        assert!(
193            !res,
194            "notify must report `false` when NOTIFY_SOCKET is unset"
195        );
196    }
197
198    #[test]
199    #[serial(notify_socket)]
200    fn notify_writes_payload_to_socket() {
201        let dir = TempDir::new().expect("tempdir");
202        let socket_path = dir.path().join("notify.sock");
203        let listener = UnixDatagram::bind(&socket_path).expect("bind notify socket");
204
205        let sent = with_notify_socket(Some(socket_path.as_os_str()), || {
206            notify(STATE_READY).expect("notify must succeed")
207        });
208        assert!(sent, "notify must report `true` when datagram was sent");
209
210        let mut buf = [0u8; 64];
211        let (n, _addr) = listener.recv_from(&mut buf).expect("recv notify datagram");
212        assert_eq!(&buf[..n], STATE_READY.as_bytes());
213    }
214
215    /// systemd may pass an abstract socket address as `@/path` (the
216    /// leading `@` is replaced with NUL inside the kernel address).
217    /// `UnixDatagram::connect` would interpret that as a filesystem
218    /// path and fail with `ENOENT` — silently breaking `Type=notify`
219    /// readiness gating in container images and namespace-gated hosts.
220    /// Use `libc::sendto` with `AF_UNIX` + leading-NUL `sun_path`.
221    #[test]
222    #[serial(notify_socket)]
223    fn notify_writes_payload_to_abstract_socket() {
224        let abstract_name = format!("sozu-test-notify-{}", std::process::id());
225        let listener = bind_abstract(abstract_name.as_bytes()).expect("bind abstract");
226
227        let env_value = format!("@{abstract_name}");
228        let sent = with_notify_socket(Some(OsStr::new(&env_value)), || {
229            notify(STATE_READY).expect("notify must succeed on abstract socket")
230        });
231        assert!(sent, "notify must report `true` for abstract socket");
232
233        let mut buf = [0u8; 64];
234        let (n, _) = listener
235            .recv_from(&mut buf)
236            .expect("recv notify datagram on abstract socket");
237        assert_eq!(&buf[..n], STATE_READY.as_bytes());
238    }
239
240    fn bind_abstract(name: &[u8]) -> io::Result<UnixDatagram> {
241        use std::mem::{MaybeUninit, size_of};
242        use std::os::fd::FromRawFd;
243
244        let fd = unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_DGRAM | libc::SOCK_CLOEXEC, 0) };
245        if fd < 0 {
246            return Err(io::Error::last_os_error());
247        }
248        // SAFETY: zero-init buffer + abstract path layout matches the
249        // send path's convention; addrlen covers sun_family + leading
250        // NUL byte + name length.
251        let (addr, addrlen) = unsafe {
252            let mut addr: MaybeUninit<libc::sockaddr_un> = MaybeUninit::zeroed();
253            let p = addr.as_mut_ptr();
254            (*p).sun_family = libc::AF_UNIX as libc::sa_family_t;
255            let path_ptr = (*p).sun_path.as_mut_ptr() as *mut u8;
256            path_ptr.write(0);
257            std::ptr::copy_nonoverlapping(name.as_ptr(), path_ptr.add(1), name.len());
258            (
259                addr,
260                (size_of::<libc::sa_family_t>() + 1 + name.len()) as libc::socklen_t,
261            )
262        };
263        let res = unsafe { libc::bind(fd, addr.as_ptr() as *const libc::sockaddr, addrlen) };
264        if res < 0 {
265            let err = io::Error::last_os_error();
266            unsafe { libc::close(fd) };
267            return Err(err);
268        }
269        // SAFETY: fd was just created by `socket(2)` and bound; we hand
270        // ownership to UnixDatagram which closes it on drop.
271        Ok(unsafe { UnixDatagram::from_raw_fd(fd) })
272    }
273}