Skip to main content

sandlock_core/
policy_fn.rs

1//! Dynamic policy — live policy modification via syscall event callbacks.
2//!
3//! Allows a user-provided callback to inspect syscall events and adjust
4//! sandbox permissions at runtime (grant, restrict, per-PID overrides).
5//!
6//! ```ignore
7//! let policy = Policy::builder()
8//!     .fs_read("/usr").fs_read("/lib")
9//!     .net_allow_host("127.0.0.1")
10//!     .policy_fn(|event, ctx| {
11//!         if event.syscall == "execve" && event.path_contains("untrusted") {
12//!             ctx.restrict_network(&[]);  // block all network
13//!         }
14//!     })
15//!     .build()?;
16//! ```
17
18use std::collections::{HashMap, HashSet};
19use std::net::IpAddr;
20use std::sync::{Arc, RwLock};
21
22// ============================================================
23// SyscallCategory
24// ============================================================
25
26/// High-level category of a syscall event.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
28pub enum SyscallCategory {
29    /// Filesystem operations (openat, unlinkat, mkdirat, etc.)
30    File,
31    /// Network operations (connect, sendto, bind, etc.)
32    Network,
33    /// Process lifecycle (clone, execve, vfork, etc.)
34    Process,
35    /// Memory management (mmap, munmap, brk, etc.)
36    Memory,
37}
38
39// ============================================================
40// SyscallEvent
41// ============================================================
42
43/// An intercepted syscall event observed by the seccomp supervisor.
44#[derive(Debug, Clone)]
45pub struct SyscallEvent {
46    /// Syscall name (e.g., "connect", "openat", "execve", "clone").
47    pub syscall: String,
48    /// High-level category.
49    pub category: SyscallCategory,
50    /// PID of the process that made the syscall.
51    pub pid: u32,
52    /// Parent PID (read from /proc/{pid}/stat).
53    pub parent_pid: Option<u32>,
54    /// Resolved filesystem path (for openat, execve, etc.).
55    pub path: Option<String>,
56    /// Destination IP address (for connect, sendto).
57    pub host: Option<IpAddr>,
58    /// Destination port (for connect, sendto, bind).
59    pub port: Option<u16>,
60    /// Size argument (for mmap, brk).
61    pub size: Option<u64>,
62    /// Command arguments (for execve/execveat).
63    pub argv: Option<Vec<String>>,
64    /// Whether the supervisor denied this syscall.
65    pub denied: bool,
66}
67
68impl SyscallEvent {
69    /// Check if the path contains a substring.
70    pub fn path_contains(&self, s: &str) -> bool {
71        self.path.as_ref().map_or(false, |p| p.contains(s))
72    }
73
74    /// Check if any argv element contains a substring.
75    pub fn argv_contains(&self, s: &str) -> bool {
76        self.argv.as_ref().map_or(false, |args| args.iter().any(|a| a.contains(s)))
77    }
78}
79
80// ============================================================
81// LivePolicy — atomically swappable runtime policy
82// ============================================================
83
84/// Runtime policy state that can be modified by the policy callback.
85///
86/// This is separate from the static `Policy` — it holds only the fields
87/// that can be dynamically adjusted at runtime.
88#[derive(Debug, Clone)]
89pub struct LivePolicy {
90    /// Allowed destination IPs for outbound connections.
91    pub allowed_ips: HashSet<IpAddr>,
92    /// Maximum memory in bytes (0 = unlimited).
93    pub max_memory_bytes: u64,
94    /// Maximum number of forks.
95    pub max_processes: u32,
96}
97
98// ============================================================
99// PolicyContext
100// ============================================================
101
102/// Context passed to the policy callback for inspecting and modifying policy.
103///
104/// - `grant()`: expand permissions up to the ceiling (reversible)
105/// - `restrict()`: permanently shrink permissions (irreversible)
106/// - `restrict_pid()`: apply per-PID network overrides
107pub struct PolicyContext {
108    live: Arc<RwLock<LivePolicy>>,
109    ceiling: LivePolicy,
110    restricted: HashSet<&'static str>,
111    pid_overrides: Arc<RwLock<HashMap<u32, HashSet<IpAddr>>>>,
112    denied_paths: Arc<RwLock<HashSet<String>>>,
113}
114
115impl PolicyContext {
116    pub(crate) fn new(
117        live: Arc<RwLock<LivePolicy>>,
118        ceiling: LivePolicy,
119        pid_overrides: Arc<RwLock<HashMap<u32, HashSet<IpAddr>>>>,
120        denied_paths: Arc<RwLock<HashSet<String>>>,
121    ) -> Self {
122        Self {
123            live,
124            ceiling,
125            restricted: HashSet::new(),
126            pid_overrides,
127            denied_paths,
128        }
129    }
130
131    /// Current effective policy (snapshot).
132    pub fn current(&self) -> LivePolicy {
133        self.live.read().unwrap().clone()
134    }
135
136    /// Maximum permissions (immutable ceiling).
137    pub fn ceiling(&self) -> &LivePolicy {
138        &self.ceiling
139    }
140
141    // ---- Grant (expand within ceiling) ----
142
143    /// Expand allowed IPs. Cannot exceed ceiling. Fails if restricted.
144    pub fn grant_network(&mut self, ips: &[IpAddr]) -> Result<(), PolicyFnError> {
145        self.check_not_restricted("allowed_ips")?;
146        let mut live = self.live.write().unwrap();
147        for ip in ips {
148            if self.ceiling.allowed_ips.contains(ip) {
149                live.allowed_ips.insert(*ip);
150            }
151        }
152        Ok(())
153    }
154
155    /// Expand max memory. Cannot exceed ceiling. Fails if restricted.
156    pub fn grant_max_memory(&mut self, bytes: u64) -> Result<(), PolicyFnError> {
157        self.check_not_restricted("max_memory_bytes")?;
158        let mut live = self.live.write().unwrap();
159        live.max_memory_bytes = bytes.min(self.ceiling.max_memory_bytes);
160        Ok(())
161    }
162
163    /// Expand max processes. Cannot exceed ceiling. Fails if restricted.
164    pub fn grant_max_processes(&mut self, n: u32) -> Result<(), PolicyFnError> {
165        self.check_not_restricted("max_processes")?;
166        let mut live = self.live.write().unwrap();
167        live.max_processes = n.min(self.ceiling.max_processes);
168        Ok(())
169    }
170
171    // ---- Restrict (permanent shrink) ----
172
173    /// Permanently restrict allowed IPs. Cannot be granted back.
174    pub fn restrict_network(&mut self, ips: &[IpAddr]) {
175        self.restricted.insert("allowed_ips");
176        let mut live = self.live.write().unwrap();
177        live.allowed_ips = ips.iter().copied().collect();
178    }
179
180    /// Permanently restrict max memory. Cannot be granted back.
181    pub fn restrict_max_memory(&mut self, bytes: u64) {
182        self.restricted.insert("max_memory_bytes");
183        let mut live = self.live.write().unwrap();
184        live.max_memory_bytes = bytes;
185    }
186
187    /// Permanently restrict max processes. Cannot be granted back.
188    pub fn restrict_max_processes(&mut self, n: u32) {
189        self.restricted.insert("max_processes");
190        let mut live = self.live.write().unwrap();
191        live.max_processes = n;
192    }
193
194    // ---- Per-PID overrides ----
195
196    /// Restrict network for a specific PID (tighter than global policy).
197    pub fn restrict_pid_network(&self, pid: u32, ips: &[IpAddr]) {
198        let mut overrides = self.pid_overrides.write().unwrap();
199        overrides.insert(pid, ips.iter().copied().collect());
200    }
201
202    /// Remove per-PID override, falling back to global policy.
203    pub fn clear_pid_override(&self, pid: u32) {
204        let mut overrides = self.pid_overrides.write().unwrap();
205        overrides.remove(&pid);
206    }
207
208    // ---- Filesystem restriction ----
209
210    /// Deny access to a path (and all children). Checked by the supervisor
211    /// on openat/stat/access syscalls. Takes effect immediately.
212    pub fn deny_path(&self, path: &str) {
213        let mut denied = self.denied_paths.write().unwrap();
214        denied.insert(path.to_string());
215    }
216
217    /// Remove a previously denied path.
218    pub fn allow_path(&self, path: &str) {
219        let mut denied = self.denied_paths.write().unwrap();
220        denied.remove(path);
221    }
222
223    // ---- Internal ----
224
225    fn check_not_restricted(&self, field: &str) -> Result<(), PolicyFnError> {
226        if self.restricted.contains(field) {
227            Err(PolicyFnError::FieldRestricted(field.to_string()))
228        } else {
229            Ok(())
230        }
231    }
232}
233
234// ============================================================
235// Error type
236// ============================================================
237
238/// Errors from policy callback operations.
239#[derive(Debug, thiserror::Error)]
240pub enum PolicyFnError {
241    #[error("cannot grant restricted field: {0}")]
242    FieldRestricted(String),
243}
244
245// ============================================================
246// PolicyCallback type
247// ============================================================
248
249/// Verdict returned by the policy callback for the current syscall.
250#[derive(Debug, Clone, PartialEq, Eq)]
251pub enum Verdict {
252    /// Allow the syscall to proceed (default).
253    Allow,
254    /// Allow but flag for audit logging.
255    Audit,
256    /// Deny the syscall with EPERM.
257    Deny,
258    /// Deny the syscall with a specific errno.
259    DenyWith(i32),
260}
261
262impl Default for Verdict {
263    fn default() -> Self { Verdict::Allow }
264}
265
266/// A callback function invoked for each intercepted syscall.
267///
268/// Called synchronously on a dedicated thread. For `execve` syscalls,
269/// the child process is held until the callback returns.
270///
271/// Return `Verdict::Deny` to block the current syscall. Only effective
272/// for held syscalls (execve/execveat) and network syscalls (connect/sendto).
273///
274/// Wrapped in `Arc` so that `Policy` remains `Clone`.
275pub type PolicyCallback = Arc<dyn Fn(SyscallEvent, &mut PolicyContext) -> Verdict + Send + Sync + 'static>;
276
277// ============================================================
278// Event channel types (used by supervisor integration)
279// ============================================================
280
281/// An event sent from the supervisor to the policy callback thread.
282pub struct PolicyEvent {
283    pub event: SyscallEvent,
284    /// If Some, the supervisor blocks until this is signaled.
285    /// Used for execve to allow pre-execution policy changes.
286    /// The Verdict is sent back to control allow/deny.
287    pub gate: Option<tokio::sync::oneshot::Sender<Verdict>>,
288}
289
290// ============================================================
291// Policy callback runner
292// ============================================================
293
294/// Spawn a thread that receives syscall events and calls the policy callback.
295///
296/// Returns a sender for the supervisor to push events into.
297pub(crate) fn spawn_policy_fn(
298    callback: PolicyCallback,
299    live: Arc<RwLock<LivePolicy>>,
300    ceiling: LivePolicy,
301    pid_overrides: Arc<RwLock<HashMap<u32, HashSet<IpAddr>>>>,
302    denied_paths: Arc<RwLock<HashSet<String>>>,
303) -> tokio::sync::mpsc::UnboundedSender<PolicyEvent> {
304    let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<PolicyEvent>();
305
306    std::thread::Builder::new()
307        .name("sandlock-policy-fn".to_string())
308        .spawn(move || {
309            let mut ctx = PolicyContext::new(live, ceiling, pid_overrides, denied_paths);
310
311            while let Some(pe) = rx.blocking_recv() {
312                let verdict = callback(pe.event, &mut ctx);
313
314                // Signal the supervisor with the verdict.
315                // For execve, this unblocks the child.
316                if let Some(gate) = pe.gate {
317                    let _ = gate.send(verdict);
318                }
319            }
320        })
321        .expect("failed to spawn policy-fn thread");
322
323    tx
324}
325
326// ============================================================
327// Tests
328// ============================================================
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    fn test_live() -> LivePolicy {
335        LivePolicy {
336            allowed_ips: ["127.0.0.1", "10.0.0.1"]
337                .iter()
338                .map(|s| s.parse().unwrap())
339                .collect(),
340            max_memory_bytes: 1024 * 1024 * 1024,
341            max_processes: 64,
342        }
343    }
344
345    #[test]
346    fn test_grant_within_ceiling() {
347        let live = Arc::new(RwLock::new(LivePolicy {
348            allowed_ips: HashSet::new(),
349            max_memory_bytes: 0,
350            max_processes: 0,
351        }));
352        let ceiling = test_live();
353        let pid_overrides = Arc::new(RwLock::new(HashMap::new()));
354        let denied_paths = Arc::new(RwLock::new(HashSet::new()));
355        let mut ctx = PolicyContext::new(live.clone(), ceiling, pid_overrides, denied_paths);
356
357        let ip: IpAddr = "127.0.0.1".parse().unwrap();
358        ctx.grant_network(&[ip]).unwrap();
359        assert!(live.read().unwrap().allowed_ips.contains(&ip));
360    }
361
362    #[test]
363    fn test_grant_capped_to_ceiling() {
364        let live = Arc::new(RwLock::new(LivePolicy {
365            allowed_ips: HashSet::new(),
366            max_memory_bytes: 0,
367            max_processes: 0,
368        }));
369        let ceiling = test_live();
370        let pid_overrides = Arc::new(RwLock::new(HashMap::new()));
371        let denied_paths = Arc::new(RwLock::new(HashSet::new()));
372        let mut ctx = PolicyContext::new(live.clone(), ceiling, pid_overrides, denied_paths);
373
374        // Try to grant an IP not in ceiling — should be silently ignored
375        let foreign: IpAddr = "8.8.8.8".parse().unwrap();
376        ctx.grant_network(&[foreign]).unwrap();
377        assert!(!live.read().unwrap().allowed_ips.contains(&foreign));
378    }
379
380    #[test]
381    fn test_restrict_then_grant_fails() {
382        let live = Arc::new(RwLock::new(test_live()));
383        let ceiling = test_live();
384        let pid_overrides = Arc::new(RwLock::new(HashMap::new()));
385        let denied_paths = Arc::new(RwLock::new(HashSet::new()));
386        let mut ctx = PolicyContext::new(live, ceiling, pid_overrides, denied_paths);
387
388        ctx.restrict_network(&[]);
389        let ip: IpAddr = "127.0.0.1".parse().unwrap();
390        assert!(ctx.grant_network(&[ip]).is_err());
391    }
392
393    #[test]
394    fn test_restrict_max_memory() {
395        let live = Arc::new(RwLock::new(test_live()));
396        let ceiling = test_live();
397        let pid_overrides = Arc::new(RwLock::new(HashMap::new()));
398        let denied_paths = Arc::new(RwLock::new(HashSet::new()));
399        let mut ctx = PolicyContext::new(live.clone(), ceiling, pid_overrides, denied_paths);
400
401        ctx.restrict_max_memory(256 * 1024 * 1024);
402        assert_eq!(live.read().unwrap().max_memory_bytes, 256 * 1024 * 1024);
403    }
404
405    #[test]
406    fn test_pid_override() {
407        let live = Arc::new(RwLock::new(test_live()));
408        let ceiling = test_live();
409        let pid_overrides = Arc::new(RwLock::new(HashMap::new()));
410        let denied_paths = Arc::new(RwLock::new(HashSet::new()));
411        let ctx = PolicyContext::new(live, ceiling, pid_overrides.clone(), denied_paths);
412
413        let localhost: IpAddr = "127.0.0.1".parse().unwrap();
414        ctx.restrict_pid_network(1234, &[localhost]);
415
416        let overrides = pid_overrides.read().unwrap();
417        let pid_ips = overrides.get(&1234).unwrap();
418        assert!(pid_ips.contains(&localhost));
419        assert_eq!(pid_ips.len(), 1);
420    }
421
422    #[test]
423    fn test_clear_pid_override() {
424        let live = Arc::new(RwLock::new(test_live()));
425        let ceiling = test_live();
426        let pid_overrides = Arc::new(RwLock::new(HashMap::new()));
427        let denied_paths = Arc::new(RwLock::new(HashSet::new()));
428        let ctx = PolicyContext::new(live, ceiling, pid_overrides.clone(), denied_paths);
429
430        let localhost: IpAddr = "127.0.0.1".parse().unwrap();
431        ctx.restrict_pid_network(1234, &[localhost]);
432        ctx.clear_pid_override(1234);
433        assert!(!pid_overrides.read().unwrap().contains_key(&1234));
434    }
435
436    #[test]
437    fn test_event_path_contains() {
438        let event = SyscallEvent {
439            syscall: "execve".to_string(),
440            category: SyscallCategory::Process,
441            pid: 1,
442            parent_pid: Some(0),
443            path: Some("/usr/bin/python3".to_string()),
444            host: None,
445            port: None,
446            size: None,
447            argv: Some(vec!["python3".into(), "-c".into(), "print(1)".into()]),
448            denied: false,
449        };
450        assert!(event.argv_contains("python3"));
451        assert!(event.argv_contains("-c"));
452        assert!(!event.argv_contains("ruby"));
453        assert_eq!(event.category, SyscallCategory::Process);
454        assert!(event.path_contains("python"));
455        assert!(!event.path_contains("ruby"));
456    }
457}