extrasafe_multiarch/builtins/
systemio.rs

1//! Contains a [`RuleSet`] for allowing IO-related syscalls, like file opening, reading, and writing.
2
3use std::collections::{HashSet, HashMap};
4use std::fs::File;
5use std::os::unix::io::AsRawFd;
6
7#[cfg(feature = "landlock")]
8use std::path::{Path, PathBuf};
9
10use crate::syscalls::Sysno;
11
12#[cfg(feature = "landlock")]
13use crate::LandlockRule;
14#[cfg(feature = "landlock")]
15use crate::landlock::{access, AccessFs, BitFlags};
16
17use crate::{RuleSet, SeccompRule};
18use super::YesReally;
19
20pub(crate) const IO_READ_SYSCALLS: &[Sysno] = &[
21    Sysno::read,
22    Sysno::readv,
23    Sysno::preadv,
24    Sysno::preadv2,
25    Sysno::pread64,
26    Sysno::lseek,
27];
28pub(crate) const IO_WRITE_SYSCALLS: &[Sysno] = &[
29    Sysno::write,
30    Sysno::writev,
31    Sysno::pwritev,
32    Sysno::pwritev2,
33    Sysno::pwrite64,
34    Sysno::fsync,
35    Sysno::fdatasync,
36    Sysno::lseek,
37];
38pub(crate) const IO_OPEN_SYSCALLS: &[Sysno] = &[
39    #[cfg(enabled_arch = "x86_64")]
40    Sysno::open,
41    Sysno::openat,
42    Sysno::openat2
43];
44pub(crate) const IO_IOCTL_SYSCALLS: &[Sysno] = &[Sysno::ioctl, Sysno::fcntl];
45// TODO: may want to separate fd-based and filename-based?
46pub(crate) const IO_METADATA_SYSCALLS: &[Sysno] = &[
47    #[cfg(enabled_arch = "x86_64")]
48    Sysno::stat,
49    Sysno::fstat,
50    #[cfg(enabled_arch = "x86_64")]
51    Sysno::newfstatat,
52    #[cfg(any(enabled_arch = "aarch64", enabled_arch = "riscv64"))]
53    Sysno::fstatat,
54    #[cfg(enabled_arch = "x86_64")]
55    Sysno::lstat,
56    Sysno::statx,
57    #[cfg(enabled_arch = "x86_64")]
58    Sysno::getdents,
59    Sysno::getdents64,
60    Sysno::getcwd,
61];
62pub(crate) const IO_CLOSE_SYSCALLS: &[Sysno] = &[Sysno::close, Sysno::close_range];
63pub(crate) const IO_UNLINK_SYSCALLS: &[Sysno] = &[
64    #[cfg(enabled_arch = "x86_64")]
65    Sysno::unlink,
66    Sysno::unlinkat
67];
68
69// TODO: split into SystemIO, SystemIOLandlock, SystemIOSeccompRestricted so that you can't call a
70// landlock function after using a seccomp argument filter function (or vice versa). You can still
71// do it in separate .enable() calls so it doesn't make that big a difference but it would be nice
72// to have.
73
74/// A [`RuleSet`] representing syscalls that perform IO - open/close/read/write/seek/stat.
75///
76/// Configurable to allow subsets of IO syscalls and specific fds.
77#[must_use]
78pub struct SystemIO {
79    /// Syscalls that are allowed
80    allowed: HashSet<Sysno>,
81    /// Syscalls that are allowed with custom rules, e.g. only allow to specific fds
82    custom: HashMap<Sysno, Vec<SeccompRule>>,
83    #[cfg(feature = "landlock")]
84    /// Landlock rules
85    landlock_rules: HashMap<PathBuf, LandlockRule>,
86}
87
88impl SystemIO {
89    /// By default, allow no IO syscalls.
90    pub fn nothing() -> SystemIO {
91        SystemIO {
92            allowed: HashSet::new(),
93            custom: HashMap::new(),
94            #[cfg(feature = "landlock")]
95            landlock_rules: HashMap::new()
96        }
97    }
98
99    /// Allow all IO syscalls.
100    pub fn everything() -> SystemIO {
101        SystemIO::nothing()
102            .allow_read()
103            .allow_write()
104            .allow_open().yes_really()
105            .allow_metadata()
106            .allow_unlink()
107            .allow_close()
108    }
109
110    /// Allow `read` syscalls.
111    pub fn allow_read(mut self) -> SystemIO {
112        self.allowed.extend(IO_READ_SYSCALLS);
113
114        self
115    }
116
117    /// Allow `write` syscalls.
118    pub fn allow_write(mut self) -> SystemIO {
119        self.allowed.extend(IO_WRITE_SYSCALLS);
120
121        self
122    }
123
124    /// Allow `unlink` syscalls.
125    pub fn allow_unlink(mut self) -> SystemIO {
126        self.allowed.extend(IO_UNLINK_SYSCALLS);
127
128        self
129    }
130
131    /// Allow `open` syscalls.
132    ///
133    /// # Security
134    ///
135    /// The reason this function returns a [`YesReally`] is because it's easy to accidentally combine
136    /// it with another ruleset that allows `write` - for example the Network ruleset - even if you
137    /// only want to read files. Consider using `allow_open_directory()` or `allow_open_file()`.
138    pub fn allow_open(mut self) -> YesReally<SystemIO> {
139        self.allowed.extend(IO_OPEN_SYSCALLS);
140
141        YesReally::new(self)
142    }
143
144    /// Allow `open` syscalls but not with write flags.
145    ///
146    /// Note that the `openat2` syscall (which is not exposed by glibc anyway according to the
147    /// syscall manpage, and so probably isn't very common) is not supported here because it has a
148    /// separate configuration struct instead of a flag bitset.
149    pub fn allow_open_readonly(mut self) -> SystemIO {
150        const O_WRONLY: u64 = libc::O_WRONLY as u64;
151        const O_RDWR: u64 = libc::O_RDWR as u64;
152        const O_APPEND: u64 = libc::O_APPEND as u64;
153        const O_CREAT: u64 = libc::O_CREAT as u64;
154        const O_EXCL: u64 = libc::O_EXCL as u64;
155        // TMPFILE causes problems because it's defined as __O_TMPFILE | O_DIRECTORY
156        // i.e. just the tmpfile bit or the o_directory bit. O_DIRECTORY by itself is fine because
157        // it just causse the open to fail if it's a directory. however the manpage states that
158        // WRONLY or RDWR is required for O_TMPFILE so we're fine to leave it out anyway.
159        // const O_TMPFILE: u64 = libc::O_TMPFILE as u64;
160
161        const WRITECREATE: u64 = O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_EXCL;// | O_TMPFILE;
162
163        // flags are the second argument for open but the third for openat
164        #[cfg(enabled_arch = "x86_64")]
165        {
166            let rule = SeccompRule::new(Sysno::open)
167                .and_condition(seccomp_arg_filter!(arg1 & WRITECREATE == 0));
168            self.custom.entry(Sysno::open)
169                .or_insert_with(Vec::new)
170                .push(rule);
171        }
172
173        let rule = SeccompRule::new(Sysno::openat)
174            .and_condition(seccomp_arg_filter!(arg2 & WRITECREATE == 0));
175        self.custom.entry(Sysno::openat)
176            .or_insert_with(Vec::new)
177            .push(rule);
178
179        self
180    }
181
182    /// Allow `stat` syscalls.
183    pub fn allow_metadata(mut self) -> SystemIO {
184        self.allowed.extend(IO_METADATA_SYSCALLS);
185
186        self
187    }
188
189    /// Allow `ioctl` and `fcntl` syscalls.
190    pub fn allow_ioctl(mut self) -> SystemIO {
191        self.allowed.extend(IO_IOCTL_SYSCALLS);
192
193        self
194    }
195
196    /// Allow `close` syscalls.
197    pub fn allow_close(mut self) -> SystemIO {
198        self.allowed.extend(IO_CLOSE_SYSCALLS);
199
200        self
201    }
202
203    /// Allow reading from stdin
204    pub fn allow_stdin(mut self) -> SystemIO {
205        let rule = SeccompRule::new(Sysno::read)
206            .and_condition(seccomp_arg_filter!(arg0 == 0));
207        self.custom.entry(Sysno::read)
208            .or_insert_with(Vec::new)
209            .push(rule);
210
211        self
212    }
213
214    /// Allow writing to stdout
215    pub fn allow_stdout(mut self) -> SystemIO {
216        let rule = SeccompRule::new(Sysno::write)
217            .and_condition(seccomp_arg_filter!(arg0 == 1));
218        self.custom.entry(Sysno::write)
219            .or_insert_with(Vec::new)
220            .push(rule);
221
222        self
223    }
224
225    /// Allow writing to stderr
226    pub fn allow_stderr(mut self) -> SystemIO {
227        let rule = SeccompRule::new(Sysno::write)
228            .and_condition(seccomp_arg_filter!(arg0 == 2));
229        self.custom.entry(Sysno::write)
230            .or_insert_with(Vec::new)
231            .push(rule);
232
233        self
234    }
235
236    /// Allow reading a given open [File]. Note that with just this function, you will not be able
237    /// to close the file under this context.
238    ///
239    /// # Security considerations
240    ///
241    /// If another file or socket is opened after the file provided to this function is closed,
242    /// it's possible that the fd will be reused and therefore may be read from.
243    #[allow(clippy::missing_panics_doc)]
244    pub fn allow_file_read(mut self, file: &File) -> SystemIO {
245        let fd = file.as_raw_fd().try_into().expect("provided fd was negative");
246        for &syscall in IO_READ_SYSCALLS {
247            let rule = SeccompRule::new(syscall)
248                .and_condition(seccomp_arg_filter!(arg0 == fd));
249            self.custom.entry(syscall)
250                .or_insert_with(Vec::new)
251                .push(rule);
252        }
253        for &syscall in IO_METADATA_SYSCALLS {
254            let rule = SeccompRule::new(syscall)
255                .and_condition(seccomp_arg_filter!(arg0 == fd));
256            self.custom.entry(syscall)
257                .or_insert_with(Vec::new)
258                .push(rule);
259        }
260
261        self
262    }
263
264    /// Allow writing to a given open [File]. Note that with just this, you will not be able to
265    /// close the file under this context.
266    ///
267    /// # Security considerations
268    ///
269    /// If another file or socket is opened after the file provided to this function is closed,
270    /// it's possible that the fd will be reused and therefore may be written to.
271    #[allow(clippy::missing_panics_doc)]
272    pub fn allow_file_write(mut self, file: &File) -> SystemIO {
273        let fd = file.as_raw_fd().try_into().expect("provided fd was negative");
274        let rule = SeccompRule::new(Sysno::write)
275            .and_condition(seccomp_arg_filter!(arg0 == fd));
276        self.custom.entry(Sysno::write)
277            .or_insert_with(Vec::new)
278            .push(rule);
279
280        self
281    }
282}
283
284impl RuleSet for SystemIO {
285    fn simple_rules(&self) -> Vec<crate::syscalls::Sysno> {
286        self.allowed.iter().copied().collect()
287    }
288
289    fn conditional_rules(&self) -> HashMap<crate::syscalls::Sysno, Vec<SeccompRule>> {
290        self.custom.clone()
291    }
292
293    #[cfg(feature = "landlock")]
294    fn landlock_rules(&self) -> Vec<LandlockRule> {
295        self.landlock_rules.values().cloned().collect()
296    }
297
298    fn name(&self) -> &'static str {
299        "SystemIO"
300    }
301}
302
303// landlock impls for SystemIO
304
305#[cfg(feature = "landlock")]
306impl SystemIO {
307    fn insert_flags<P: AsRef<Path>>(&mut self, path: P, new_flags: BitFlags<AccessFs>) {
308        let path = path.as_ref().to_path_buf();
309        let _flag = self.landlock_rules.entry(path.clone())
310            .and_modify(|existing_flags| existing_flags.access_rules.insert(new_flags))
311            .or_insert_with(|| LandlockRule::new(&path, new_flags));
312    }
313
314    /// Use Landlock to allow only files within the specified directory, or the specific file, to
315    /// be read. If this function is called multiple times, all directories and files passed will
316    /// be allowed.
317    ///
318    /// Note that if this is used with [`allow_open_readonly`] or other syscall-argument restricting
319    /// methods, applying the `SafetyContext` will fail.
320    pub fn allow_read_path<P: AsRef<Path>>(mut self, path: P) -> SystemIO {
321        let new_flags = access::read_path();
322        self.insert_flags(path, new_flags);
323
324        // allow relevant syscalls as well
325        self.allow_close()
326            .allow_read()
327            .allow_metadata()
328            .allow_open().yes_really()
329    }
330
331    /// Use Landlock to allow only the specified file to be written to. If this function is called
332    /// multiple times, all files passed will be allowed.
333    ///
334    /// Note that if this is used with [`allow_open_readonly`] or other syscall-argument restricting
335    /// methods, applying the `SafetyContext` will fail.
336    pub fn allow_write_file<P: AsRef<Path>>(mut self, path: P) -> SystemIO {
337        let new_flags = access::write_file();
338        self.insert_flags(path, new_flags);
339
340        // allow relevant syscalls as well
341        self.allow_close()
342            .allow_write()
343            .allow_metadata()
344            .allow_open().yes_really()
345    }
346
347    /// Use Landlock to allow files to be created in the given directory. If this function is called
348    /// multiple times, all directories passed will be allowed.
349    ///
350    /// Note that if this is used with [`allow_open_readonly`] or other syscall-argument restricting
351    /// methods, applying the `SafetyContext` will fail.
352    pub fn allow_create_in_dir<P: AsRef<Path>>(mut self, path: P) -> SystemIO {
353        // write file here allows us to create files, but in order to actually write to them, you'd
354        // need to enable the write syscall.
355        let new_flags = access::create_file() | access::write_file();
356        self.insert_flags(path, new_flags);
357
358        // allow relevant syscalls as well
359        self.allowed.extend(&[Sysno::creat]);
360        self.allow_open().yes_really()
361    }
362
363    /// Use Landlock to allow listing the contents of the given directory. If this function is
364    /// called multiple times, all directories passed will be allowed.
365    pub fn allow_list_dir<P: AsRef<Path>>(mut self, path: P) -> SystemIO {
366        let new_flags = access::list_dir();
367        self.insert_flags(path, new_flags);
368
369        // allow relevant syscalls as well
370        self.allow_metadata()
371            .allow_close()
372            .allow_ioctl()
373            .allow_open().yes_really()
374    }
375
376    /// Use Landlock to allow creating directories. If this function is called multiple times, all
377    /// directories passed will be allowed.
378    pub fn allow_create_dir<P: AsRef<Path>>(mut self, path: P) -> SystemIO {
379        let new_flags = access::create_dir();
380        self.insert_flags(path, new_flags);
381
382        // allow relevant syscalls as well
383        self.allowed.extend(&[Sysno::mkdir, Sysno::mkdirat]);
384        self
385    }
386
387    /// Use Landlock to allow deleting files. If this function is called multiple times, all files
388    /// passed will be allowed.
389    pub fn allow_remove_file<P: AsRef<Path>>(mut self, path: P) -> SystemIO {
390        let new_flags = access::delete_file();
391        self.insert_flags(path, new_flags);
392
393        // allow relevant syscalls as well
394        self.allowed.extend(&[Sysno::unlink, Sysno::unlinkat]);
395        self
396    }
397
398    /// Use Landlock to allow deleting directories. If this function is called multiple times, all
399    /// directories passed will be allowed.
400    ///
401    /// Note that this allows you to delete the contents of the *subdirectories* of this directory,
402    /// not the directory itself.
403    ///
404    /// Also recall that that in order to delete a directory with `unlink` or `rmdir` it must be
405    /// empty.
406    pub fn allow_remove_dir<P: AsRef<Path>>(mut self, path: P) -> SystemIO {
407        let new_flags = access::delete_dir();
408        self.insert_flags(path, new_flags);
409
410        // allow relevant syscalls as well
411        // unlinkat may be be used to remove directories as well so we include it here, since files
412        // will be protected by landlock anyway.
413        self.allowed.extend(&[Sysno::rmdir, Sysno::unlinkat]);
414        self
415    }
416}
417
418// TODO: figure out a good way to put this into the Networking Ruleset?
419// the biggest issue is that stuff like allow_close, allow_read is defined here and there's not a
420// great way to compose different parts from different RuleSets. It might be best to directly
421// expose the internal allowed, conditional_rules, landlock_rules as &mut pointers (and then also
422// keep the gather_rules) so that you can basically mix and match from different rulesets in a
423// single function
424#[cfg(feature = "landlock")]
425impl SystemIO {
426    /// Use Landlock to allow access to SSL certificates in /etc/ssl, /etc/ca-certificates, etc
427    ///
428    /// Note that crates using rustls and webpki-roots you actually don't need these because the
429    /// certificates are embedded in the output binary.
430    pub fn allow_ssl_files(mut self) -> SystemIO {
431        let new_flags = access::read_path() | access::list_dir();
432        for path in &["/etc/ssl/certs", "/etc/ca-certificates"] {
433            self.insert_flags(path, new_flags);
434        }
435        // I'm not 100% sure why openssl is checking localtime but it appears to be doing so
436        self.insert_flags("/etc/localtime", access::read_path());
437
438        // allow relevant syscalls as well
439        self.allow_close()
440            .allow_read()
441            .allow_metadata()
442            .allow_open().yes_really()
443    }
444
445    /// Use Landlock to allow access to DNS files, like /etc/resolv.conf
446    pub fn allow_dns_files(mut self) -> SystemIO {
447        let new_flags = access::read_path();
448        // TODO: libnss exec perms?
449        for path in &["/etc/resolv.conf", "/etc/hosts", "/etc/host.conf", "/etc/nsswitch.conf", "/etc/gai.conf"] {
450            self.insert_flags(path, new_flags);
451        }
452        // allow relevant syscalls as well
453        self.allow_close()
454            .allow_read()
455            .allow_metadata()
456            .allow_open().yes_really()
457    }
458}