microsandbox_protocol/lib.rs
1//! `microsandbox-protocol` defines the shared protocol types used for communication
2//! between the host and the guest agent over CBOR-over-virtio-serial.
3//!
4//! For how the protocol is versioned and evolved while staying backward compatible
5//! across independently-upgraded hosts and live sandboxes, see `VERSIONING.md` in
6//! this crate.
7
8#![warn(missing_docs)]
9
10mod error;
11
12//--------------------------------------------------------------------------------------------------
13// Constants: Host↔Guest Shutdown Timings
14//--------------------------------------------------------------------------------------------------
15
16const HANDOFF_POWEROFF_TIMEOUT_SECS: u64 = 5;
17const SHUTDOWN_FLUSH_MARGIN_SECS: u64 = 3;
18const NORMAL_SHUTDOWN_FLUSH_TIMEOUT_SECS: u64 = 2;
19
20/// Maximum time agentd spends in its handoff-mode poweroff sequence.
21///
22/// In init-handoff sandboxes (systemd, openrc, …) agentd's shutdown
23/// handler signals the new PID 1 with `SIGRTMIN+4`, sleeps for this
24/// duration to give the init a chance to act, then falls back to
25/// `SIGTERM`. The host's handoff shutdown fallback must exceed this
26/// so it doesn't cut the sequence short.
27pub const HANDOFF_POWEROFF_TIMEOUT: std::time::Duration =
28 std::time::Duration::from_secs(HANDOFF_POWEROFF_TIMEOUT_SECS);
29
30/// Additional host-side margin after agentd's handoff poweroff grace.
31///
32/// This gives the guest init time to react to agentd's fallback signal before
33/// the host gives up and tears down the VMM process.
34pub const SHUTDOWN_FLUSH_MARGIN: std::time::Duration =
35 std::time::Duration::from_secs(SHUTDOWN_FLUSH_MARGIN_SECS);
36
37/// Host fallback window for normal sandboxes where agentd remains PID 1.
38///
39/// agentd can synchronously `sync()`, remount the root read-only, and request
40/// kernel poweroff directly in this mode, so normal development sandboxes
41/// should not pay the longer handoff-init grace.
42pub const NORMAL_SHUTDOWN_FLUSH_TIMEOUT: std::time::Duration =
43 std::time::Duration::from_secs(NORMAL_SHUTDOWN_FLUSH_TIMEOUT_SECS);
44
45/// Host fallback window for sandboxes that hand PID 1 to another init.
46///
47/// agentd uses this window to `sync()` block-backed root filesystems
48/// and power off the kernel cleanly (or run its handoff sequence —
49/// see [`HANDOFF_POWEROFF_TIMEOUT`]). On a healthy guest the VMM
50/// exits well inside the window and the host fallback is a no-op;
51/// the fallback only fires when the guest is wedged.
52///
53/// Equals [`HANDOFF_POWEROFF_TIMEOUT`] plus [`SHUTDOWN_FLUSH_MARGIN`] for the
54/// init's own signal handling — enforced at compile time below.
55pub const HANDOFF_SHUTDOWN_FLUSH_TIMEOUT: std::time::Duration =
56 std::time::Duration::from_secs(HANDOFF_POWEROFF_TIMEOUT_SECS + SHUTDOWN_FLUSH_MARGIN_SECS);
57
58/// Legacy name for the handoff-init shutdown fallback window.
59///
60/// New runtime code should choose between [`NORMAL_SHUTDOWN_FLUSH_TIMEOUT`]
61/// and [`HANDOFF_SHUTDOWN_FLUSH_TIMEOUT`] based on whether a sandbox uses
62/// handoff init.
63pub const SHUTDOWN_FLUSH_TIMEOUT: std::time::Duration = HANDOFF_SHUTDOWN_FLUSH_TIMEOUT;
64
65// Compile-time invariant: the host must wait at least as long as
66// agentd's longest internal grace, otherwise the host fallback will
67// cut agentd's handoff sequence short and we'll silently strand
68// init-handoff sandboxes.
69const _: () = assert!(
70 HANDOFF_SHUTDOWN_FLUSH_TIMEOUT.as_secs() > HANDOFF_POWEROFF_TIMEOUT.as_secs(),
71 "HANDOFF_SHUTDOWN_FLUSH_TIMEOUT must exceed HANDOFF_POWEROFF_TIMEOUT",
72);
73
74//--------------------------------------------------------------------------------------------------
75// Constants: Host↔Guest Protocol
76//--------------------------------------------------------------------------------------------------
77
78/// Virtio-console port name for the agent channel.
79pub const AGENT_PORT_NAME: &str = "agent";
80
81/// Virtiofs tag for the runtime filesystem (scripts, heartbeat).
82pub const RUNTIME_FS_TAG: &str = "msb_runtime";
83
84/// Guest-write byte budget for the runtime (`/.msb`) virtiofs mount.
85///
86/// `/.msb` is a host↔guest control channel, not bulk storage: the only
87/// guest-written payload is a ~1 KiB heartbeat (host-written scripts and TLS
88/// certs form the mount's baseline and are not charged). This 16 MiB ceiling is
89/// therefore almost entirely abuse headroom — it exists so the channel cannot be
90/// used to fill the host disk. It is intentionally a fixed constant rather than
91/// a user-facing knob.
92pub const RUNTIME_FS_QUOTA_BYTES: u64 = 16 * 1024 * 1024;
93
94/// Guest mount point for the runtime filesystem.
95pub const RUNTIME_MOUNT_POINT: &str = "/.msb";
96
97/// Guest directory for file mount virtiofs shares.
98pub const FILE_MOUNTS_DIR: &str = "/.msb/file-mounts";
99
100/// Guest path for named scripts (added to PATH by agentd).
101pub const SCRIPTS_PATH: &str = "/.msb/scripts";
102
103/// Maximum number of simultaneous SDK clients the host relay admits.
104pub const AGENT_RELAY_MAX_CLIENTS: u32 = 128;
105
106/// Size of the correlation ID range allocated to each relay client.
107pub const AGENT_RELAY_ID_RANGE_STEP: u32 = u32::MAX / AGENT_RELAY_MAX_CLIENTS;
108
109//--------------------------------------------------------------------------------------------------
110// Constants: Guest Init Environment Variables
111//--------------------------------------------------------------------------------------------------
112
113/// Environment variable carrying the sandbox in-guest security profile.
114///
115/// Values:
116/// - `default` — preserve normal guest-root semantics. Exec sessions do not
117/// set `no_new_privs` and keep `CAP_SYS_ADMIN`.
118/// - `restricted` — set `no_new_privs` and drop `CAP_SYS_ADMIN` before user
119/// exec sessions. Agentd also forces `nosuid,nodev` on user mounts.
120///
121/// Example:
122/// - `MSB_SECURITY_PROFILE=restricted`
123pub const ENV_SECURITY_PROFILE: &str = "MSB_SECURITY_PROFILE";
124
125/// Environment variable carrying tmpfs mount specs for guest init.
126///
127/// - `path` — guest mount path (required, always the first element)
128/// - `size=N` — size limit in MiB (optional)
129/// - `noexec` — mount with noexec flag (optional)
130/// - `nosuid` — mount with nosuid flag (optional)
131/// - `nodev` — mount with nodev flag (optional)
132/// - `ro` — mount read-only (optional)
133/// - `rw` — explicit writable default (optional)
134/// - `mode=N` — permission mode as octal integer (optional, e.g. `mode=1777`)
135///
136/// Format: `path[:opts][;path[:opts];...]`.
137///
138/// Entries are separated by `;`. Within an entry, the path comes first,
139/// followed by an optional colon and comma-separated options. Options compose
140/// order-independently (e.g. `:ro,noexec` and `:noexec,ro` are equivalent).
141///
142/// Examples:
143/// - `MSB_TMPFS=/tmp:size=256` — 256 MiB tmpfs at `/tmp`
144/// - `MSB_TMPFS=/tmp:size=256;/var/tmp:size=128` — two tmpfs mounts
145/// - `MSB_TMPFS=/tmp` — tmpfs at `/tmp` with defaults
146/// - `MSB_TMPFS=/tmp:size=256,noexec` — with noexec flag
147/// - `MSB_TMPFS=/seed:size=64,ro` — read-only tmpfs
148pub const ENV_TMPFS: &str = "MSB_TMPFS";
149
150/// Environment variable specifying how agentd assembles the root filesystem.
151///
152/// Format: comma-separated `key=value` pairs, semicolons for multi-value fields.
153///
154/// Variants:
155/// - `kind=disk-image,device=/dev/vda[,fstype=ext4]`
156/// - `kind=oci-layered,lowers=/dev/vdb;/dev/vdc;/dev/vdd,lower_fstype=erofs,upper=/dev/vde,upper_fstype=ext4`
157/// - `kind=oci-flat,lower=/dev/vdb,lower_fstype=erofs,upper=/dev/vdc,upper_fstype=ext4`
158///
159/// Legacy format (`/dev/vda[,fstype=ext4]`) is accepted and treated as `kind=disk-image`.
160pub const ENV_BLOCK_ROOT: &str = "MSB_BLOCK_ROOT";
161
162/// Environment variable carrying the guest network interface configuration.
163///
164/// Format: `key=value,...`
165///
166/// - `iface=NAME` — interface name (required)
167/// - `mac=AA:BB:CC:DD:EE:FF` — MAC address (required)
168/// - `mtu=N` — MTU (optional)
169///
170/// Example:
171/// - `MSB_NET=iface=eth0,mac=02:5a:7b:13:01:02,mtu=1500`
172pub const ENV_NET: &str = "MSB_NET";
173
174/// Environment variable carrying the guest IPv4 network configuration.
175///
176/// Format: `key=value,...`
177///
178/// - `addr=A.B.C.D/N` — address with prefix length (required)
179/// - `gw=A.B.C.D` — default gateway (required)
180/// - `dns=A.B.C.D` — DNS server (optional)
181///
182/// Example:
183/// - `MSB_NET_IPV4=addr=172.16.1.2/30,gw=172.16.1.1,dns=172.16.1.1`
184pub const ENV_NET_IPV4: &str = "MSB_NET_IPV4";
185
186/// Environment variable carrying the guest IPv6 network configuration.
187///
188/// Format: `key=value,...`
189///
190/// - `addr=ADDR/N` — address with prefix length (required)
191/// - `gw=ADDR` — default gateway (required)
192/// - `dns=ADDR` — DNS server (optional)
193///
194/// Example:
195/// - `MSB_NET_IPV6=addr=fd42:6d73:62:2a::2/64,gw=fd42:6d73:62:2a::1,dns=fd42:6d73:62:2a::1`
196pub const ENV_NET_IPV6: &str = "MSB_NET_IPV6";
197
198/// Environment variable carrying virtiofs directory volume mount specs for guest init.
199///
200/// Format: `tag:guest_path[:opts][;tag:guest_path[:opts];...]`
201///
202/// - `tag` — virtiofs tag name (required, matches the tag used in `--mount`)
203/// - `guest_path` — mount point inside the guest (required)
204/// - `ro` / `rw` — access mode option (optional)
205/// - `noexec` — disable direct execution from the mount (optional)
206/// - `nosuid` — mount with nosuid flag (optional)
207/// - `nodev` — mount with nodev flag (optional)
208///
209/// Entries are separated by `;`.
210///
211/// Examples:
212/// - `MSB_DIR_MOUNTS=data:/data` — mount virtiofs tag `data` at `/data`
213/// - `MSB_DIR_MOUNTS=data:/data:ro,noexec` — mount read-only and noexec
214/// - `MSB_DIR_MOUNTS=data:/data;cache:/cache:ro` — two mounts
215pub const ENV_DIR_MOUNTS: &str = "MSB_DIR_MOUNTS";
216
217/// Environment variable carrying virtiofs **file** volume mount specs for guest init.
218///
219/// Used when the host path is a single file rather than a directory. The SDK
220/// wraps each file in an isolated staging directory (hard-linked to preserve
221/// the same inode) and shares that directory via virtiofs. Agentd mounts the
222/// share at [`FILE_MOUNTS_DIR`]`/<tag>/` and bind-mounts the file to the
223/// guest path.
224///
225/// Format: `tag:filename:guest_path[:opts][;tag:filename:guest_path[:opts];...]`
226///
227/// - `tag` — virtiofs tag name (required, matches the tag used in `--mount`)
228/// - `filename` — name of the file inside the virtiofs share (required)
229/// - `guest_path` — final file path inside the guest (required)
230/// - `ro` / `rw` — access mode option (optional)
231/// - `noexec` — disable direct execution from the mount (optional)
232/// - `nosuid` — mount with nosuid flag (optional)
233/// - `nodev` — mount with nodev flag (optional)
234///
235/// Entries are separated by `;`.
236///
237/// Examples:
238/// - `MSB_FILE_MOUNTS=fm_config:app.conf:/etc/app.conf`
239/// - `MSB_FILE_MOUNTS=fm_config:app.conf:/etc/app.conf:ro,noexec`
240/// - `MSB_FILE_MOUNTS=fm_a:a.sh:/usr/bin/a.sh;fm_b:b.sh:/usr/bin/b.sh`
241pub const ENV_FILE_MOUNTS: &str = "MSB_FILE_MOUNTS";
242
243/// Environment variable carrying disk-image volume mount specs for guest init.
244///
245/// Each spec describes one virtio-blk device attached for the sole purpose
246/// of being mounted at a guest path by agentd (distinct from the rootfs
247/// block device, which is described by [`ENV_BLOCK_ROOT`]).
248///
249/// Format: `id:guest_path[:opts][;id:guest_path[:opts];...]`
250///
251/// - `id` — the `virtio_blk_config.serial` value set by the VMM. Agentd
252/// resolves it to a device node via `/dev/disk/by-id/virtio-<id>`, or
253/// by scanning `/sys/block/*/serial` as a fallback.
254/// - `guest_path` — absolute mount path in the guest (required).
255/// - `fstype=...` — inner filesystem type (optional). When absent,
256/// agentd probes `/proc/filesystems` to find a type that mounts cleanly.
257/// - `ro` / `rw` — access mode option (optional).
258/// - `noexec` — disable direct execution from the mount (optional).
259/// - `nosuid` — mount with nosuid flag (optional).
260/// - `nodev` — mount with nodev flag (optional).
261///
262/// Entries are separated by `;`. Options are comma-separated flags or
263/// key-value pairs in the final option block.
264///
265/// Examples:
266/// - `MSB_DISK_MOUNTS=data_12ab:/data:fstype=ext4` — ext4 disk at `/data`
267/// - `MSB_DISK_MOUNTS=seed_7f:/seed:ro` — autodetect fstype, read-only
268/// - `MSB_DISK_MOUNTS=a_1:/a:fstype=ext4;b_2:/b:ro,noexec` — two disks
269pub const ENV_DISK_MOUNTS: &str = "MSB_DISK_MOUNTS";
270
271/// Environment variable carrying the default guest user for agentd execs.
272///
273/// Format: `USER[:GROUP]` or `UID[:GID]`
274///
275/// - `USER`
276/// - `UID`
277/// - `USER:GROUP`
278/// - `UID:GID`
279///
280/// Example:
281/// - `MSB_USER=alice` — default to user `alice`
282/// - `MSB_USER=1000` — default to UID 1000
283/// - `MSB_USER=alice:developers` — default to user `alice` and group `developers`
284/// - `MSB_USER=1000:100` — default to UID 1000 and GID 100
285pub const ENV_USER: &str = "MSB_USER";
286
287/// Environment variable carrying the guest hostname for agentd.
288///
289/// Format: bare string
290///
291/// Example:
292/// - `MSB_HOSTNAME=worker-01`
293///
294/// agentd calls `sethostname()` and adds the name to `/etc/hosts`.
295/// Defaults to a sandbox-name-derived hostname when not explicitly set.
296pub const ENV_HOSTNAME: &str = "MSB_HOSTNAME";
297
298/// Environment variable carrying the DNS name the guest uses to reach
299/// the sandbox host (Docker's `host.docker.internal` equivalent).
300///
301/// The host-side network stack emits this value via its
302/// `guest_env_vars()` method; agentd reads it into
303/// [`crate::exec`]-adjacent boot params and writes the mapping into
304/// `/etc/hosts`. The value the network stack emits is a fixed
305/// protocol constant — today always `host.microsandbox.internal`.
306pub const ENV_HOST_ALIAS: &str = "MSB_HOST_ALIAS";
307
308/// Environment variable carrying sandbox-wide resource limits.
309///
310/// Format: `resource=limit[:hard][;resource=limit[:hard];...]`
311///
312/// - `resource` — lowercase rlimit name such as `nofile` or `nproc`
313/// - `limit` — soft limit
314/// - `hard` — hard limit (optional; if omitted, uses the soft limit)
315///
316/// Examples:
317/// - `MSB_RLIMITS=nofile=65535`
318/// - `MSB_RLIMITS=nofile=65535:65535;nproc=4096:4096`
319///
320/// agentd applies these during PID 1 startup so every later guest process
321/// inherits the raised baseline instead of having to opt into per-exec rlimits.
322pub const ENV_RLIMITS: &str = "MSB_RLIMITS";
323
324/// Environment variable selecting a guest init binary for PID 1 handoff.
325///
326/// When set, agentd performs initial setup (mounts, runtime dirs), then
327/// forks. The parent execs the binary at this path, becoming the new
328/// PID 1. The child stays alive as a normal grandchild process serving
329/// host requests over virtio-serial.
330///
331/// Format: bare absolute path inside the guest rootfs, or the literal
332/// sentinel [`HANDOFF_INIT_AUTO`] which triggers a candidate probe in
333/// agentd (see [`HANDOFF_INIT_AUTO_CANDIDATES`]).
334///
335/// Examples:
336/// - `MSB_HANDOFF_INIT=/lib/systemd/systemd`
337/// - `MSB_HANDOFF_INIT=auto`
338pub const ENV_HANDOFF_INIT: &str = "MSB_HANDOFF_INIT";
339
340/// Sentinel value for [`ENV_HANDOFF_INIT`] requesting auto-detection.
341///
342/// The host may resolve this sentinel before boot when an OCI image
343/// declares a known init as the first entrypoint token. If the sentinel
344/// reaches the guest unchanged, agentd probes [`HANDOFF_INIT_AUTO_CANDIDATES`]
345/// in order and uses the first path that exists and is executable. If
346/// none match, boot fails with a clear error in `kernel.log` listing the
347/// paths it checked.
348pub const HANDOFF_INIT_AUTO: &str = "auto";
349
350/// Ordered list of image entrypoint paths that `--init auto` may treat
351/// as an explicit handoff init.
352///
353/// This host-side list is intentionally slightly wider than
354/// [`HANDOFF_INIT_AUTO_CANDIDATES`]: `/init` is common in s6-overlay
355/// images but too broad to probe blindly inside every guest rootfs.
356/// Matching it only when the image declares it as ENTRYPOINT keeps the
357/// behavior image-directed.
358pub const HANDOFF_INIT_IMAGE_ENTRYPOINT_CANDIDATES: &[&str] = &[
359 "/init",
360 "/sbin/init",
361 "/lib/systemd/systemd",
362 "/usr/lib/systemd/systemd",
363];
364
365/// Ordered list of init-binary paths agentd probes when
366/// [`ENV_HANDOFF_INIT`] is set to [`HANDOFF_INIT_AUTO`].
367///
368/// Order matters: the first match wins. The list covers the three
369/// well-known locations across major distros:
370/// - `/sbin/init` — BusyBox (Alpine), sysvinit, OpenRC's wrapper.
371/// Usually a symlink to the actual init on systemd distros, so it
372/// resolves naturally on Debian/Ubuntu too.
373/// - `/lib/systemd/systemd` — Debian, Ubuntu, derivatives.
374/// - `/usr/lib/systemd/systemd` — Fedora, RHEL, modern Debian.
375pub const HANDOFF_INIT_AUTO_CANDIDATES: &[&str] = &[
376 "/sbin/init",
377 "/lib/systemd/systemd",
378 "/usr/lib/systemd/systemd",
379];
380
381/// Argv list for the handoff init binary.
382///
383/// Format: base64url-no-padding encoded JSON array of strings.
384/// Empty or unset means the init is exec'd with `argv = [program]`.
385/// This deliberately differs from the delimiter-based `MSB_*` boot env
386/// formats because argv entries are arbitrary strings; wrapping JSON in
387/// base64url preserves spaces, separators, empty strings, and Unicode
388/// without inventing a second escaping language.
389///
390/// Example:
391/// - `MSB_HANDOFF_INIT_ARGS=WyItdW5pdD1tdWx0aS11c2VyLnRhcmdldCJd`
392pub const ENV_HANDOFF_INIT_ARGS: &str = "MSB_HANDOFF_INIT_ARGS";
393
394/// Working directory for the handoff init binary.
395///
396/// Docker applies `WORKDIR` before executing `ENTRYPOINT + CMD`. Init handoff
397/// uses this optional path so image-declared init entrypoints receive the same
398/// process cwd as they would under container startup.
399///
400/// Example:
401/// - `MSB_HANDOFF_INIT_CWD=/opt/app`
402pub const ENV_HANDOFF_INIT_CWD: &str = "MSB_HANDOFF_INIT_CWD";
403
404/// Extra environment variables for the handoff init binary.
405///
406/// Format: base64url-no-padding encoded JSON array of `[key, value]`
407/// pairs. Merged on top of the inherited env.
408/// This uses the same structured payload exception as
409/// [`ENV_HANDOFF_INIT_ARGS`] so env values can contain the delimiter
410/// characters used by older `MSB_*` boot env formats.
411///
412/// Example:
413/// - `MSB_HANDOFF_INIT_ENV=W1siY29udGFpbmVyIiwibWljcm9zYW5kYm94Il1d`
414pub const ENV_HANDOFF_INIT_ENV: &str = "MSB_HANDOFF_INIT_ENV";
415
416/// Guest-side path to the CA certificate for TLS interception.
417///
418/// Placed by the sandbox process via the runtime virtiofs mount.
419/// agentd checks for this file during init and installs it into the guest
420/// trust store.
421pub const GUEST_TLS_CA_PATH: &str = "/.msb/tls/ca.pem";
422
423/// Guest-side path to a PEM bundle of the host's extra trusted CAs.
424///
425/// Placed by the sandbox process via the runtime virtiofs mount when
426/// host-CA trust is enabled (default). agentd checks for this file during
427/// init and appends it to the guest's trust bundle, so outbound TLS works
428/// even behind a corporate MITM proxy whose gateway CA is installed on
429/// the host but unknown to the guest.
430pub const GUEST_TLS_HOST_CAS_PATH: &str = "/.msb/tls/host-cas.pem";
431
432//--------------------------------------------------------------------------------------------------
433// Exports
434//--------------------------------------------------------------------------------------------------
435
436pub mod codec;
437pub mod core;
438pub mod exec;
439pub mod fs;
440pub mod heartbeat;
441pub mod message;
442pub mod tcp;
443
444pub use error::*;