cellos-supervisor 0.5.1

CellOS execution-cell runner — boots cells in Firecracker microVMs or gVisor, enforces narrow typed authority, emits signed CloudEvents.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
//! SEC-21 Phase 3h — DNSSEC trust-anchor loader for the supervisor-side
//! resolver-refresh path.
//!
//! This module is the bounded "where do the keys come from" half of Phase
//! 3h. The "what does the validator do with them" half lives in
//! [`super::hickory_resolve::resolve_with_ttl_validated`] which sets
//! `ResolverOpts.trust_anchor` (causing hickory's
//! `ResolverBuilder::build` to auto-flip `validate = true`) and lets
//! hickory's bundled validator (`hickory-resolver` 0.26.1, `dnssec-ring`
//! feature) chain responses back to the operator-supplied anchors (or
//! the bundled IANA root KSKs when no override is set).
//!
//! ## Source precedence
//!
//! 1. `CELLOS_DNSSEC_TRUST_ANCHORS_PATH` env var (always wins when set).
//! 2. Per-resolver `dnsAuthority.resolvers[].dnssec.trustAnchorsPath`
//!    spec field.
//! 3. Hickory's bundled IANA root KSKs — used when neither override is
//!    provided.
//!
//! ## SEC-21 Phase 3h.2 — operator-supplied anchors are now enforcement-real
//!
//! The earlier 0.24 integration could only *audit* operator-supplied
//! anchor paths; hickory's public API did not expose a knob to plug a
//! custom `TrustAnchor` into the resolver, so `validate = true` always
//! consulted the bundled IANA defaults. The 0.26.1 upgrade closes this:
//! [`hickory_resolver::config::ResolverOpts::trust_anchor`] is now a
//! public `Option<PathBuf>` field that the resolver builder feeds into
//! `hickory_proto::dnssec::TrustAnchors::from_file` at validator-build
//! time. The supervisor's loader:
//!
//! - **Pre-validates the path** with `O_NOFOLLOW` + 32 KiB ceiling
//!   discipline (mirrors [`cellos_core::trust_keys::load_trust_verify_keys_file`]
//!   for the authority-keys file). Hickory will re-open the file
//!   internally; that re-open is acceptable because we already proved
//!   the path is not a symlink and is bounded.
//! - **Retains the path** in [`TrustAnchors::path`] for the
//!   [`super::hickory_resolve::resolve_with_ttl_validated`] caller to
//!   forward into `ResolverOpts.trust_anchor`.
//! - **Stamps the source descriptor** (basename or `"iana-default"`)
//!   into every `dns_authority_dnssec_failed` event for SIEM grep —
//!   filesystem layout is never leaked into the audit stream.
//!
//! The `bytes` field is retained for backward compatibility with audit
//! call sites that expect a payload; production code should prefer
//! `path()` for the validator-injection path.
//!
//! ## Symlink + size discipline (W6 SEC-25 parity)
//!
//! Path-based loads open with `O_NOFOLLOW` on Unix to refuse a swapped-in
//! symlink at the final path component, and the file is bounded at 32
//! KiB. Same constants as
//! [`cellos_core::trust_keys::load_trust_verify_keys_file`].
//!
//! Trust-anchor files are size-bounded at 32 KiB before parsing — far
//! larger than any realistic root-KSK file (the IANA root key is a few
//! hundred bytes) but small enough to make a malicious operator-injected
//! file unable to exhaust supervisor memory. Files exceeding the bound
//! are rejected with `TrustAnchorMissing`.

use std::fs::OpenOptions;
use std::io::Read;
use std::path::{Path, PathBuf};

use cellos_core::CellosError;

/// Hard ceiling on the size of an operator-supplied trust-anchor file.
/// 32 KiB is generous: the IANA root KSK weighs in at ~300 bytes; a file
/// substantially larger than this is far more likely to be a misdirected
/// download (or a hostile padding attack) than a legitimate anchor set.
pub(crate) const TRUST_ANCHORS_MAX_BYTES: u64 = 32 * 1024;

/// Stable descriptor for the "we loaded the bundled IANA defaults"
/// case. Stamped into emitted events as `trustAnchorSource` so the SIEM
/// can prove which anchor set was in use without leaking a filesystem
/// path.
pub const TRUST_ANCHOR_SOURCE_IANA_DEFAULT: &str = "iana-default";

/// Operator-supplied env override for the trust-anchor file. When set,
/// supersedes any per-resolver `dnssec.trustAnchorsPath` spec field —
/// matches the W6 SEC-25 `CELLOS_AUTHORITY_KEYS_PATH` precedence
/// pattern (env wins over spec) so an emergency rotation does not
/// require respinning the cell spec.
pub const ENV_TRUST_ANCHORS_PATH: &str = "CELLOS_DNSSEC_TRUST_ANCHORS_PATH";

/// Loaded DNSSEC trust anchors used by the SEC-21 Phase 3h validator.
///
/// Three carried pieces of state:
///
/// - **`bytes`** — the raw file content (or empty when the IANA default
///   is used — hickory ships the bytes itself in that case). Retained
///   for backward compatibility with audit call sites that expect a
///   payload.
/// - **`source`** — a short descriptor stamped into emitted events for
///   audit; it is NEVER a full path, only a basename or
///   `"iana-default"`, so the operator's filesystem layout is not
///   leaked into the audit stream.
/// - **`path`** — the validated path that
///   [`super::hickory_resolve::resolve_with_ttl_validated`] forwards
///   into `ResolverOpts.trust_anchor` to engage hickory 0.26.1's
///   custom-anchor validator. `None` for the IANA-default sentinel
///   (hickory falls back to its bundled defaults when the field is
///   `None`).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TrustAnchors {
    /// Raw file content (or empty when the IANA default is used —
    /// hickory ships the bytes itself in that case).
    pub bytes: Vec<u8>,
    /// Source descriptor for audit. `"iana-default"` for the bundled
    /// case; the file's basename for the path-loaded case.
    pub source: String,
    /// Validated trust-anchor path forwarded into
    /// `ResolverOpts.trust_anchor`. `None` means the IANA-default
    /// sentinel — hickory uses its bundled defaults.
    path: Option<PathBuf>,
}

impl TrustAnchors {
    /// Load trust anchors honouring the SEC-21 Phase 3h precedence:
    /// env > spec > IANA default.
    ///
    /// `spec_path`: optional per-resolver
    /// `dnsAuthority.resolvers[].dnssec.trustAnchorsPath` from the spec.
    /// `None` means the operator did not set the spec field; the env
    /// var (if set) still wins. Empty-string spec_path is treated as
    /// `None` for ergonomic reasons (operators often plumb an
    /// empty-string default through their templating layer).
    ///
    /// # Errors
    ///
    /// Returns `CellosError::InvalidSpec` when the source is set
    /// (env or spec) but cannot be opened, is symlinked, or exceeds
    /// [`TRUST_ANCHORS_MAX_BYTES`]. The supervisor surfaces this as
    /// a `dns_authority_dnssec_failed{reason: "trust_anchor_missing"}`
    /// event; if `failClosed` is true the resolver answer is dropped.
    pub fn load(spec_path: Option<&str>) -> Result<Self, CellosError> {
        // Env precedence: when set AND non-empty, supersedes spec.
        let env_path: Option<String> = match std::env::var(ENV_TRUST_ANCHORS_PATH) {
            Ok(v) if !v.is_empty() => Some(v),
            _ => None,
        };

        let chosen: Option<PathBuf> = match (env_path.as_deref(), spec_path) {
            (Some(env), _) => Some(PathBuf::from(env)),
            (None, Some(spec)) if !spec.is_empty() => Some(PathBuf::from(spec)),
            _ => None,
        };

        let Some(path) = chosen else {
            // No override — use hickory's bundled IANA defaults.
            return Ok(Self::iana_default());
        };

        Self::load_from_path(&path)
    }

    /// Load from a concrete filesystem path with O_NOFOLLOW + size
    /// bounding. Public so the unit tests can drive the path-loader
    /// directly without round-tripping the env var.
    pub fn load_from_path(path: &Path) -> Result<Self, CellosError> {
        // scope: symlink discipline mirrors W6 SEC-25
        // `cellos_core::trust_keys::load_trust_verify_keys_file`: the
        // final path component MUST not be a symlink an attacker can
        // swap to redirect the loader at a wrong file.
        #[cfg(unix)]
        let file = {
            use std::os::unix::fs::OpenOptionsExt;
            // Constants intentionally mirror the trust_keys module —
            // platform-specific because libc is not in cellos-core's
            // dep graph and we apply the same discipline here for
            // consistency. See `cellos_core::trust_keys` for the
            // canonical commentary.
            #[cfg(target_os = "linux")]
            const O_NOFOLLOW: i32 = 0x20000;
            #[cfg(any(
                target_os = "macos",
                target_os = "ios",
                target_os = "freebsd",
                target_os = "netbsd",
                target_os = "openbsd",
                target_os = "dragonfly",
            ))]
            const O_NOFOLLOW: i32 = 0x100;
            #[cfg(not(any(
                target_os = "linux",
                target_os = "macos",
                target_os = "ios",
                target_os = "freebsd",
                target_os = "netbsd",
                target_os = "openbsd",
                target_os = "dragonfly",
            )))]
            compile_error!(
                "cellos-supervisor::resolver_refresh::dnssec: O_NOFOLLOW value not yet defined for \
                 this Unix target — add the platform-specific value (see <fcntl.h>) before building."
            );
            let mut opts = OpenOptions::new();
            opts.read(true).custom_flags(O_NOFOLLOW);
            opts.open(path).map_err(|e| {
                CellosError::InvalidSpec(format!(
                    "dnssec trust anchors: cannot open {} (symlink? missing?): {e}",
                    path.display()
                ))
            })?
        };
        #[cfg(not(unix))]
        let file = OpenOptions::new().read(true).open(path).map_err(|e| {
            CellosError::InvalidSpec(format!(
                "dnssec trust anchors: cannot open {}: {e}",
                path.display()
            ))
        })?;

        // Pre-flight size check via metadata BEFORE reading — refuses
        // a hostile multi-GiB file at zero allocation cost. The
        // post-read length check below is the belt-and-braces backup
        // for filesystems where `metadata()` reports stale size
        // (e.g. some FUSE implementations). Either gate trips →
        // reject with the same error class.
        if let Ok(meta) = file.metadata() {
            if meta.len() > TRUST_ANCHORS_MAX_BYTES {
                return Err(CellosError::InvalidSpec(format!(
                    "dnssec trust anchors: file {} is {} bytes, exceeds {} byte ceiling",
                    path.display(),
                    meta.len(),
                    TRUST_ANCHORS_MAX_BYTES
                )));
            }
        }

        let mut buf = Vec::with_capacity(TRUST_ANCHORS_MAX_BYTES as usize);
        // `take` caps the reader at one byte over the ceiling so we
        // can detect "file streamed more bytes than metadata reported"
        // without growing `buf` beyond the bound.
        file.take(TRUST_ANCHORS_MAX_BYTES + 1)
            .read_to_end(&mut buf)
            .map_err(|e| {
                CellosError::InvalidSpec(format!(
                    "dnssec trust anchors: cannot read {}: {e}",
                    path.display()
                ))
            })?;

        if buf.len() as u64 > TRUST_ANCHORS_MAX_BYTES {
            return Err(CellosError::InvalidSpec(format!(
                "dnssec trust anchors: file {} streamed {}+ bytes, exceeds {} byte ceiling",
                path.display(),
                buf.len(),
                TRUST_ANCHORS_MAX_BYTES
            )));
        }

        // Source descriptor stamped into events: basename ONLY (full
        // path leak is a soft information disclosure into the audit
        // stream — the basename is enough to disambiguate which file
        // the operator wired without revealing layout).
        let source = path
            .file_name()
            .and_then(|s| s.to_str())
            .unwrap_or("path-anchor")
            .to_string();

        Ok(Self {
            bytes: buf,
            source,
            path: Some(path.to_path_buf()),
        })
    }

    /// Convenience for the IANA-default sentinel.
    #[must_use]
    pub fn iana_default() -> Self {
        Self {
            bytes: Vec::new(),
            source: TRUST_ANCHOR_SOURCE_IANA_DEFAULT.to_string(),
            path: None,
        }
    }

    /// Returns true when this anchor set is the IANA-default sentinel
    /// (no override loaded). Used by the validator wrapper to decide
    /// whether to pass through to hickory's default-anchor path.
    #[must_use]
    pub fn is_iana_default(&self) -> bool {
        self.bytes.is_empty()
            && self.source == TRUST_ANCHOR_SOURCE_IANA_DEFAULT
            && self.path.is_none()
    }

    /// SEC-21 Phase 3h.2 — operator-supplied anchor path forwarded into
    /// hickory's `ResolverOpts.trust_anchor`. `None` means the
    /// IANA-default sentinel; hickory falls back to its bundled
    /// defaults. The path has already been pre-validated for
    /// O_NOFOLLOW + 32 KiB ceiling by [`Self::load_from_path`].
    #[must_use]
    pub fn path(&self) -> Option<&Path> {
        self.path.as_deref()
    }

    /// Test-only setter for [`Self::path`] used by the
    /// [`super::hickory_resolve::tests::operator_anchor_path_is_actually_consulted`]
    /// regression to drive the hickory-side wiring with a synthetic
    /// path that bypasses the production `O_NOFOLLOW` + 32 KiB gate.
    ///
    /// Must NOT be used in production code — production callers
    /// MUST go through [`Self::load`] / [`Self::load_from_path`] so
    /// the symlink + size discipline is enforced.
    #[cfg(test)]
    pub(crate) fn set_path_for_test(&mut self, path: Option<PathBuf>) {
        self.path = path;
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::tempdir;

    /// Per-test guard that clears the env var on drop so tests in this
    /// module do not pollute each other when they touch the env knob.
    /// The whole module's tests must be assumed to share process state
    /// because `std::env::set_var` is process-global.
    struct EnvGuard {
        prior: Option<String>,
    }
    impl EnvGuard {
        fn new() -> Self {
            let prior = std::env::var(ENV_TRUST_ANCHORS_PATH).ok();
            std::env::remove_var(ENV_TRUST_ANCHORS_PATH);
            Self { prior }
        }
    }
    impl Drop for EnvGuard {
        fn drop(&mut self) {
            match self.prior.take() {
                Some(v) => std::env::set_var(ENV_TRUST_ANCHORS_PATH, v),
                None => std::env::remove_var(ENV_TRUST_ANCHORS_PATH),
            }
        }
    }

    #[test]
    fn loads_iana_default_when_no_path_set() {
        let _guard = EnvGuard::new();
        // No env, no spec — falls through to the IANA-default sentinel.
        let ta = TrustAnchors::load(None).expect("default load ok");
        assert!(
            ta.is_iana_default(),
            "fall-through must yield IANA default; got source={}",
            ta.source
        );
        assert_eq!(ta.source, TRUST_ANCHOR_SOURCE_IANA_DEFAULT);
        assert!(ta.bytes.is_empty());

        // Empty-string spec is treated as None for ergonomic templating
        // (operators often plumb empty-string defaults through).
        let ta2 = TrustAnchors::load(Some("")).expect("empty-spec default load ok");
        assert!(
            ta2.is_iana_default(),
            "empty-string spec must fall through to IANA default"
        );
    }

    #[test]
    fn loads_path_with_o_nofollow_unix() {
        let _guard = EnvGuard::new();
        let dir = tempdir().expect("tempdir");
        let path = dir.path().join("trust-anchor.bin");
        // Write a tiny well-formed-shaped blob — content is not parsed
        // by this loader (parsing happens later in hickory; the loader
        // job is byte-bounding + symlink rejection).
        let payload = b"DNSKEY-PUBLIC-KEY-BYTES";
        std::fs::write(&path, payload).expect("write anchor bytes");

        let ta = TrustAnchors::load_from_path(&path).expect("path-load ok");
        assert_eq!(ta.bytes, payload, "bytes must round-trip from disk");
        assert_eq!(
            ta.source, "trust-anchor.bin",
            "source must be the basename, NOT the full path (no fs layout leak)"
        );
        // SEC-21 Phase 3h.2 — operator-supplied path is now retained
        // for forwarding into hickory's `ResolverOpts.trust_anchor`.
        // Regression: a path-loaded TrustAnchors MUST surface the
        // exact path the loader validated (full path, not basename —
        // hickory needs to re-open the file).
        assert_eq!(
            ta.path(),
            Some(path.as_path()),
            "operator-supplied path must be retained verbatim for ResolverOpts.trust_anchor"
        );
        assert!(
            !ta.is_iana_default(),
            "operator-supplied anchor must not be classified as IANA default"
        );

        // Drive the env-precedence path too: env > spec.
        std::env::set_var(ENV_TRUST_ANCHORS_PATH, path.to_str().unwrap());
        let ta_env = TrustAnchors::load(Some("/some/spec/path-that-should-be-overridden"))
            .expect("env-precedence load ok");
        assert_eq!(
            ta_env.bytes, payload,
            "env-set path must win over spec-set path"
        );
        assert_eq!(ta_env.source, "trust-anchor.bin");
        assert_eq!(
            ta_env.path(),
            Some(path.as_path()),
            "env-set path must also be retained for ResolverOpts.trust_anchor"
        );
    }

    #[test]
    fn iana_default_sentinel_has_no_path() {
        // SEC-21 Phase 3h.2 — the IANA-default sentinel must NOT
        // present a path to the validator (otherwise hickory would
        // try to open it and fail). `path()` returning `None` is the
        // signal that hickory should fall back to its bundled
        // default anchors.
        let ta = TrustAnchors::iana_default();
        assert!(ta.path().is_none(), "IANA default must have no path");
        assert!(ta.is_iana_default());
    }

    #[test]
    fn rejects_oversized_trust_anchors_file() {
        let _guard = EnvGuard::new();
        let dir = tempdir().expect("tempdir");
        let path = dir.path().join("oversize.bin");
        // 33 KiB — comfortably over the 32 KiB ceiling.
        let payload = vec![0xABu8; (TRUST_ANCHORS_MAX_BYTES + 1024) as usize];
        std::fs::write(&path, &payload).expect("write oversize");

        let err = TrustAnchors::load_from_path(&path).expect_err("oversize file must be rejected");
        let msg = format!("{err}");
        assert!(
            msg.contains("exceeds") && msg.contains(&TRUST_ANCHORS_MAX_BYTES.to_string()),
            "rejection must mention the size ceiling for operator triage; got {msg}"
        );
    }

    #[cfg(unix)]
    #[test]
    fn rejects_symlink_at_path() {
        let _guard = EnvGuard::new();
        let dir = tempdir().expect("tempdir");
        let real_path = dir.path().join("real-anchor.bin");
        let symlink_path = dir.path().join("symlinked-anchor.bin");
        std::fs::write(&real_path, b"REAL-KEY-BYTES").expect("write real");
        std::os::unix::fs::symlink(&real_path, &symlink_path).expect("create symlink");

        let err = TrustAnchors::load_from_path(&symlink_path)
            .expect_err("symlink at final component MUST be rejected by O_NOFOLLOW");
        let msg = format!("{err}");
        // The OS-specific errno text varies (ELOOP on Linux, similar on macOS) —
        // we only assert that the failure surfaced through CellosError::InvalidSpec
        // with the path stamped in for triage, which is enough to prove the kernel
        // refused the open.
        assert!(
            msg.contains(symlink_path.to_str().unwrap()),
            "rejection must include the symlinked path for operator triage; got {msg}"
        );
        // Sanity: the real file IS loadable (proves the test env is sane).
        let ok = TrustAnchors::load_from_path(&real_path).expect("real path loadable");
        assert_eq!(ok.bytes, b"REAL-KEY-BYTES");
    }
}