Skip to main content

nucleus/security/
landlock.rs

1use crate::error::{NucleusError, Result};
2use landlock::{
3    Access, AccessFs, PathBeneath, PathFd, Ruleset, RulesetAttr, RulesetCreatedAttr, RulesetError,
4    RulesetStatus, ABI,
5};
6use std::path::PathBuf;
7use tracing::{debug, info, warn};
8
9/// Target ABI – covers up to Linux 6.12 features (Truncate, IoctlDev, Refer, etc.).
10/// The landlock crate gracefully degrades for older kernels.
11const TARGET_ABI: ABI = ABI::V5;
12
13/// Minimum Landlock ABI version required for production mode.
14///
15/// V3 adds LANDLOCK_ACCESS_FS_TRUNCATE which prevents silent data truncation
16/// that V1/V2 cannot control. This is the minimum we consider safe for
17/// production workloads.
18const MINIMUM_PRODUCTION_ABI: ABI = ABI::V3;
19
20/// Landlock filesystem access-control manager
21///
22/// Implements fine-grained, path-based filesystem restrictions as an additional
23/// defense layer on top of namespaces, capabilities, and seccomp.
24///
25/// Properties (matching Nucleus security invariants):
26/// - Irreversible: once restrict_self() is called, restrictions cannot be lifted
27/// - Stackable: layered with seccomp and capability dropping
28/// - Unprivileged: works in rootless mode
29pub struct LandlockManager {
30    applied: bool,
31    /// Additional paths to grant read+write access to (e.g. volume mounts).
32    extra_rw_paths: Vec<String>,
33}
34
35impl LandlockManager {
36    pub fn new() -> Self {
37        Self {
38            applied: false,
39            extra_rw_paths: Vec::new(),
40        }
41    }
42
43    /// Register additional paths that need read+write access.
44    /// Used for volume mounts that aren't under the default allowed paths.
45    pub fn add_rw_path(&mut self, path: &str) {
46        self.extra_rw_paths.push(path.to_string());
47    }
48
49    /// Apply the container Landlock policy.
50    ///
51    /// Rules:
52    /// - `/` (root):         read-only traversal (ReadDir) so path resolution works
53    /// - `/bin`, `/usr`:     read + execute (for running agent binaries)
54    /// - `/lib`, `/lib64`:   read (shared libraries)
55    /// - `/etc`:             read (config / resolv.conf / nsswitch)
56    /// - `/dev`:             read (already minimal device nodes)
57    /// - `/proc`:            read (already mounted read-only)
58    /// - `/tmp`:             read + write + create + remove (agent scratch space)
59    /// - `/context`:         read-only (pre-populated agent data)
60    ///
61    /// Everything else is denied by the ruleset.
62    pub fn apply_container_policy(&mut self) -> Result<bool> {
63        self.apply_container_policy_with_mode(false)
64    }
65
66    /// Assert that the kernel supports at least the minimum Landlock ABI version
67    /// required for production workloads.
68    ///
69    /// Returns Ok(()) if the ABI is sufficient, or Err if the kernel is too old.
70    /// In best-effort mode, a too-old kernel is logged but not fatal.
71    pub fn assert_minimum_abi(&self, production_mode: bool) -> Result<()> {
72        // Probe the kernel's Landlock ABI version by attempting to create a ruleset
73        // with the minimum ABI's access rights. If the kernel doesn't support the
74        // minimum ABI, the ruleset will be NotEnforced or PartiallyEnforced.
75        let min_access = AccessFs::from_all(MINIMUM_PRODUCTION_ABI);
76        let target_access = AccessFs::from_all(TARGET_ABI);
77
78        // If the minimum access set equals the target, the kernel supports everything
79        // If the minimum is a subset, check that at least the minimum rights are present
80        if min_access != target_access {
81            info!(
82                "Landlock ABI: target={:?}, minimum_production={:?}",
83                TARGET_ABI, MINIMUM_PRODUCTION_ABI
84            );
85        }
86
87        // The actual enforcement check happens in build_and_restrict().
88        // Here we do a lightweight check: if the kernel supports the target ABI,
89        // it certainly supports the minimum. The landlock crate handles this
90        // gracefully, but we want an explicit assertion for production.
91        match Ruleset::default().handle_access(AccessFs::from_all(MINIMUM_PRODUCTION_ABI)) {
92            Ok(_) => {
93                info!("Landlock ABI >= V3 confirmed");
94                Ok(())
95            }
96            Err(e) => {
97                let msg = format!(
98                    "Kernel Landlock ABI is below minimum required version (V3): {}",
99                    e
100                );
101                if production_mode {
102                    Err(ll_err(e))
103                } else {
104                    warn!("{}", msg);
105                    Ok(())
106                }
107            }
108        }
109    }
110
111    /// Apply with configurable failure behavior.
112    ///
113    /// When `best_effort` is true, failures (e.g. kernel without Landlock) are
114    /// logged and execution continues.
115    pub fn apply_container_policy_with_mode(&mut self, best_effort: bool) -> Result<bool> {
116        if self.applied {
117            debug!("Landlock policy already applied, skipping");
118            return Ok(true);
119        }
120
121        info!("Applying Landlock filesystem policy");
122
123        match self.build_and_restrict() {
124            Ok(status) => match status {
125                RulesetStatus::FullyEnforced => {
126                    self.applied = true;
127                    info!("Landlock policy fully enforced");
128                    Ok(true)
129                }
130                RulesetStatus::PartiallyEnforced => {
131                    if best_effort {
132                        self.applied = true;
133                        info!(
134                            "Landlock policy partially enforced (kernel lacks some access rights)"
135                        );
136                        Ok(true)
137                    } else {
138                        Err(NucleusError::LandlockError(
139                            "Landlock policy only partially enforced; strict mode requires full target ABI support".to_string(),
140                        ))
141                    }
142                }
143                RulesetStatus::NotEnforced => {
144                    if best_effort {
145                        warn!("Landlock not enforced (kernel does not support Landlock)");
146                        Ok(false)
147                    } else {
148                        Err(NucleusError::LandlockError(
149                            "Landlock not enforced (kernel does not support Landlock)".to_string(),
150                        ))
151                    }
152                }
153            },
154            Err(e) => {
155                if best_effort {
156                    warn!(
157                        "Failed to apply Landlock policy: {} (continuing without Landlock)",
158                        e
159                    );
160                    Ok(false)
161                } else {
162                    Err(e)
163                }
164            }
165        }
166    }
167
168    /// Apply an execute-only allowlist for host-side supervisor processes.
169    ///
170    /// This policy handles only `LANDLOCK_ACCESS_FS_EXECUTE`, leaving normal
171    /// read/write access untouched. It is intended for runtimes like gVisor
172    /// that need a narrow post-namespace executable allowlist while still
173    /// blocking arbitrary host executable and setuid-wrapper execs after the
174    /// supervisor has entered its setup namespace.
175    pub fn apply_execute_allowlist_policy(
176        &mut self,
177        allowed_roots: &[PathBuf],
178        best_effort: bool,
179    ) -> Result<bool> {
180        if self.applied {
181            debug!("Landlock execute allowlist already applied, skipping");
182            return Ok(true);
183        }
184
185        info!(
186            allowed_roots = ?allowed_roots,
187            "Applying Landlock execute allowlist policy"
188        );
189
190        match self.build_execute_allowlist_and_restrict(allowed_roots) {
191            Ok(status) => match status {
192                RulesetStatus::FullyEnforced => {
193                    self.applied = true;
194                    info!("Landlock execute allowlist fully enforced");
195                    Ok(true)
196                }
197                RulesetStatus::PartiallyEnforced => {
198                    if best_effort {
199                        self.applied = true;
200                        info!("Landlock execute allowlist partially enforced");
201                        Ok(true)
202                    } else {
203                        Err(NucleusError::LandlockError(
204                            "Landlock execute allowlist only partially enforced; strict mode requires full enforcement".to_string(),
205                        ))
206                    }
207                }
208                RulesetStatus::NotEnforced => {
209                    if best_effort {
210                        warn!("Landlock execute allowlist not enforced");
211                        Ok(false)
212                    } else {
213                        Err(NucleusError::LandlockError(
214                            "Landlock execute allowlist not enforced".to_string(),
215                        ))
216                    }
217                }
218            },
219            Err(e) => {
220                if best_effort {
221                    warn!(
222                        "Failed to apply Landlock execute allowlist: {} (continuing without it)",
223                        e
224                    );
225                    Ok(false)
226                } else {
227                    Err(e)
228                }
229            }
230        }
231    }
232
233    /// Build the ruleset and call restrict_self().
234    fn build_and_restrict(&self) -> Result<RulesetStatus> {
235        let access_all = AccessFs::from_all(TARGET_ABI);
236        let access_read = AccessFs::from_read(TARGET_ABI);
237
238        // Read + execute for binary paths
239        let access_read_exec = access_read | AccessFs::Execute;
240
241        // Write access set for /tmp – full read+write but no execute.
242        // Executing from /tmp is a common attack pattern (drop-and-exec).
243        let mut access_tmp = access_all;
244        access_tmp.remove(AccessFs::Execute);
245
246        let mut ruleset = Ruleset::default()
247            .handle_access(access_all)
248            .map_err(ll_err)?
249            .create()
250            .map_err(ll_err)?;
251
252        // Root directory: minimal traversal only
253        // We add ReadDir so that path resolution through / works
254        if let Ok(fd) = PathFd::new("/") {
255            ruleset = ruleset
256                .add_rule(PathBeneath::new(fd, AccessFs::ReadDir))
257                .map_err(ll_err)?;
258        }
259
260        // M13: Mandatory paths that must exist for a functional container.
261        // Warn (or error in strict mode) when these are missing.
262        const MANDATORY_PATHS: &[&str] = &["/bin", "/usr", "/lib", "/etc"];
263        for path in MANDATORY_PATHS {
264            if !std::path::Path::new(path).exists() {
265                warn!(
266                    "Landlock: mandatory path {} does not exist; container may not function correctly",
267                    path
268                );
269            }
270        }
271
272        // Binary paths: read + execute
273        for path in &["/bin", "/usr", "/sbin"] {
274            if let Ok(fd) = PathFd::new(path) {
275                ruleset = ruleset
276                    .add_rule(PathBeneath::new(fd, access_read_exec))
277                    .map_err(ll_err)?;
278            }
279        }
280
281        // Shared libraries: read
282        for path in &["/lib", "/lib64", "/lib32"] {
283            if let Ok(fd) = PathFd::new(path) {
284                ruleset = ruleset
285                    .add_rule(PathBeneath::new(fd, access_read))
286                    .map_err(ll_err)?;
287            }
288        }
289
290        // Config/device/proc: read
291        for path in &["/etc", "/dev", "/proc"] {
292            if let Ok(fd) = PathFd::new(path) {
293                ruleset = ruleset
294                    .add_rule(PathBeneath::new(fd, access_read))
295                    .map_err(ll_err)?;
296            }
297        }
298
299        // /dev/shm: read+write for POSIX shared memory (shm_open).
300        // Required by PostgreSQL, Redis, and other programs.
301        // No execute – same policy as /tmp.
302        if let Ok(fd) = PathFd::new("/dev/shm") {
303            ruleset = ruleset
304                .add_rule(PathBeneath::new(fd, access_tmp))
305                .map_err(ll_err)?;
306        }
307
308        // /tmp: full read+write+create+remove
309        if let Ok(fd) = PathFd::new("/tmp") {
310            ruleset = ruleset
311                .add_rule(PathBeneath::new(fd, access_tmp))
312                .map_err(ll_err)?;
313        }
314
315        // /nix/store: read + execute (NixOS binaries and libraries)
316        if let Ok(fd) = PathFd::new("/nix/store") {
317            ruleset = ruleset
318                .add_rule(PathBeneath::new(fd, access_read_exec))
319                .map_err(ll_err)?;
320        }
321
322        // /run/secrets: read-only (container secrets mounted on tmpfs)
323        if let Ok(fd) = PathFd::new("/run/secrets") {
324            ruleset = ruleset
325                .add_rule(PathBeneath::new(fd, access_read))
326                .map_err(ll_err)?;
327        }
328
329        // /context: read-only (agent data)
330        if let Ok(fd) = PathFd::new("/context") {
331            ruleset = ruleset
332                .add_rule(PathBeneath::new(fd, access_read))
333                .map_err(ll_err)?;
334        }
335
336        // Volume mounts and other dynamically registered paths: full read+write
337        // (but no execute – same policy as /tmp to prevent drop-and-exec).
338        for path in &self.extra_rw_paths {
339            if let Ok(fd) = PathFd::new(path) {
340                debug!("Landlock: granting rw access to volume path {:?}", path);
341                ruleset = ruleset
342                    .add_rule(PathBeneath::new(fd, access_tmp))
343                    .map_err(ll_err)?;
344            }
345        }
346
347        let status = ruleset.restrict_self().map_err(ll_err)?;
348        Ok(status.ruleset)
349    }
350
351    fn build_execute_allowlist_and_restrict(
352        &self,
353        allowed_roots: &[PathBuf],
354    ) -> Result<RulesetStatus> {
355        let access_execute = AccessFs::Execute;
356        let mut ruleset = Ruleset::default()
357            .handle_access(access_execute)
358            .map_err(ll_err)?
359            .create()
360            .map_err(ll_err)?;
361
362        let mut added_rules = 0usize;
363        for root in allowed_roots {
364            let canonical = std::fs::canonicalize(root).unwrap_or_else(|_| root.clone());
365            match PathFd::new(canonical.as_path()) {
366                Ok(fd) => {
367                    ruleset = ruleset
368                        .add_rule(PathBeneath::new(fd, access_execute))
369                        .map_err(ll_err)?;
370                    added_rules += 1;
371                }
372                Err(err) => {
373                    warn!(
374                        "Landlock execute allowlist skipped {:?}: {}",
375                        canonical, err
376                    );
377                }
378            }
379        }
380        if added_rules == 0 {
381            return Err(NucleusError::LandlockError(
382                "Landlock execute allowlist has no valid executable roots".to_string(),
383            ));
384        }
385
386        let status = ruleset.restrict_self().map_err(ll_err)?;
387        Ok(status.ruleset)
388    }
389
390    /// Check if Landlock policy has been applied
391    pub fn is_applied(&self) -> bool {
392        self.applied
393    }
394}
395
396impl Default for LandlockManager {
397    fn default() -> Self {
398        Self::new()
399    }
400}
401
402/// Convert a landlock RulesetError into NucleusError::LandlockError
403fn ll_err(e: RulesetError) -> NucleusError {
404    NucleusError::LandlockError(e.to_string())
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410
411    #[test]
412    fn test_landlock_manager_initial_state() {
413        let mgr = LandlockManager::new();
414        assert!(!mgr.is_applied());
415    }
416
417    #[test]
418    fn test_apply_idempotent() {
419        let mut mgr = LandlockManager::new();
420        // Best-effort so it succeeds even without Landlock support
421        let _ = mgr.apply_container_policy_with_mode(true);
422        // Second call should be a no-op
423        let result = mgr.apply_container_policy_with_mode(true);
424        assert!(result.is_ok());
425    }
426
427    #[test]
428    fn test_best_effort_on_unsupported_kernel() {
429        let mut mgr = LandlockManager::new();
430        // Should not error even if kernel has no Landlock
431        let result = mgr.apply_container_policy_with_mode(true);
432        assert!(result.is_ok());
433    }
434
435    /// Extract the body of a function from source text by brace-matching,
436    /// avoiding fragile hardcoded character-window offsets (SEC-MED-03).
437    fn extract_fn_body<'a>(source: &'a str, fn_signature: &str) -> &'a str {
438        let fn_start = source
439            .find(fn_signature)
440            .unwrap_or_else(|| panic!("function '{}' not found in source", fn_signature));
441        let after = &source[fn_start..];
442        let open = after
443            .find('{')
444            .unwrap_or_else(|| panic!("no opening brace found for '{}'", fn_signature));
445        let mut depth = 0u32;
446        let mut end = open;
447        for (i, ch) in after[open..].char_indices() {
448            match ch {
449                '{' => depth += 1,
450                '}' => {
451                    depth -= 1;
452                    if depth == 0 {
453                        end = open + i + 1;
454                        break;
455                    }
456                }
457                _ => {}
458            }
459        }
460        &after[..end]
461    }
462
463    #[test]
464    fn test_policy_covers_nix_store_and_secrets() {
465        // Landlock policy must include rules for /nix/store (read+exec) and
466        // /run/secrets (read) so NixOS binaries can execute and secrets are readable.
467        // NOTE: The Landlock API does not expose the ruleset for inspection, so
468        // this remains a source-text check – but uses brace-matched function
469        // body extraction instead of hardcoded char offsets.
470        let source = include_str!("landlock.rs");
471        let fn_body = extract_fn_body(source, "fn build_and_restrict");
472        assert!(
473            fn_body.contains("\"/nix/store\"") || fn_body.contains("\"/nix\""),
474            "Landlock build_and_restrict must include a rule for /nix/store or /nix"
475        );
476        assert!(
477            fn_body.contains("\"/run/secrets\"") || fn_body.contains("\"/run\""),
478            "Landlock build_and_restrict must include a rule for /run/secrets"
479        );
480    }
481
482    #[test]
483    fn test_tmp_access_excludes_execute() {
484        // L-5: /tmp should have read+write but NOT execute permission.
485        // Verify at the type level that our access_tmp definition
486        // does not include Execute.
487        let access_all = AccessFs::from_all(TARGET_ABI);
488        let mut access_tmp = access_all;
489        access_tmp.remove(AccessFs::Execute);
490        assert!(!access_tmp.contains(AccessFs::Execute));
491        // But it should still have write capabilities
492        assert!(access_tmp.contains(AccessFs::WriteFile));
493        assert!(access_tmp.contains(AccessFs::RemoveFile));
494    }
495
496    #[test]
497    fn test_execute_allowlist_handles_only_execute() {
498        let source = include_str!("landlock.rs");
499        let fn_body = extract_fn_body(source, "fn build_execute_allowlist_and_restrict");
500        assert!(
501            fn_body.contains("let access_execute = AccessFs::Execute"),
502            "execute allowlist must handle only execute access"
503        );
504        assert!(
505            fn_body.contains("handle_access(access_execute)"),
506            "execute allowlist must not handle read/write filesystem rights"
507        );
508        assert!(
509            !fn_body.contains("from_all"),
510            "execute allowlist must not accidentally become a broad filesystem policy"
511        );
512    }
513
514    #[test]
515    fn test_execute_allowlist_keeps_default_no_new_privs() {
516        let source = include_str!("landlock.rs");
517        let fn_body = extract_fn_body(source, "fn build_execute_allowlist_and_restrict");
518        assert!(
519            !fn_body.contains(".set_no_new_privs(false)"),
520            "gVisor supervisor execute allowlist must retain Landlock's default no_new_privs setting"
521        );
522    }
523
524    #[test]
525    fn test_container_policy_keeps_default_no_new_privs() {
526        let source = include_str!("landlock.rs");
527        let fn_body = extract_fn_body(source, "fn build_and_restrict");
528        assert!(
529            !fn_body.contains(".set_no_new_privs(false)"),
530            "container Landlock policy must retain the landlock crate default no_new_privs setting"
531        );
532    }
533
534    #[test]
535    fn test_not_enforced_returns_error_in_strict_mode() {
536        // SEC-11: When best_effort=false, NotEnforced must return Err, not Ok(false)
537        let source = include_str!("landlock.rs");
538        let fn_body = extract_fn_body(source, "fn apply_container_policy_with_mode");
539        // Find the NotEnforced match arm within the function body
540        let not_enforced_start = fn_body
541            .find("NotEnforced")
542            .expect("function must handle NotEnforced status");
543        // Search from NotEnforced to the next match arm ('=>' after a '}')
544        let rest = &fn_body[not_enforced_start..];
545        let arm_end = rest
546            .find("RestrictionStatus::")
547            .unwrap_or(rest.len().min(500));
548        let not_enforced_block = &rest[..arm_end];
549        assert!(
550            not_enforced_block.contains("best_effort") && not_enforced_block.contains("Err"),
551            "NotEnforced must return Err when best_effort=false. Block: {}",
552            not_enforced_block
553        );
554    }
555
556    #[test]
557    fn test_partially_enforced_returns_error_in_strict_mode() {
558        let source = include_str!("landlock.rs");
559        let fn_body = extract_fn_body(source, "fn apply_container_policy_with_mode");
560        let partial_start = fn_body
561            .find("PartiallyEnforced")
562            .expect("function must handle PartiallyEnforced status");
563        let rest = &fn_body[partial_start..];
564        let arm_end = rest.find("NotEnforced").unwrap_or(rest.len().min(500));
565        let partial_block = &rest[..arm_end];
566        assert!(
567            partial_block.contains("best_effort") && partial_block.contains("Err"),
568            "PartiallyEnforced must return Err when best_effort=false. Block: {}",
569            partial_block
570        );
571    }
572}