Skip to main content

secure_exec_kernel/
device_layer.rs

1use crate::vfs::{
2    VfsError, VfsResult, VirtualDirEntry, VirtualFileSystem, VirtualStat, VirtualUtimeSpec,
3};
4use getrandom::getrandom;
5use std::time::{SystemTime, UNIX_EPOCH};
6
7const DEVICE_PATHS: &[&str] = &[
8    "/dev/null",
9    "/dev/zero",
10    "/dev/stdin",
11    "/dev/stdout",
12    "/dev/stderr",
13    "/dev/urandom",
14];
15
16const DEVICE_DIRS: &[&str] = &["/dev/fd", "/dev/pts"];
17const DEFAULT_STREAM_DEVICE_READ_BYTES: usize = 4096;
18const DEV_DIR_ENTRIES: &[(&str, bool)] = &[
19    ("null", false),
20    ("zero", false),
21    ("stdin", false),
22    ("stdout", false),
23    ("stderr", false),
24    ("urandom", false),
25    ("fd", true),
26];
27
28#[derive(Debug, Clone)]
29pub struct DeviceLayer<V> {
30    inner: V,
31}
32
33pub fn create_device_layer<V>(vfs: V) -> DeviceLayer<V> {
34    DeviceLayer { inner: vfs }
35}
36
37impl<V> DeviceLayer<V> {
38    pub fn into_inner(self) -> V {
39        self.inner
40    }
41
42    pub fn inner(&self) -> &V {
43        &self.inner
44    }
45
46    pub fn inner_mut(&mut self) -> &mut V {
47        &mut self.inner
48    }
49}
50
51impl<V: VirtualFileSystem> VirtualFileSystem for DeviceLayer<V> {
52    fn read_file(&mut self, path: &str) -> VfsResult<Vec<u8>> {
53        if let Some(bytes) = read_stream_device(path, DEFAULT_STREAM_DEVICE_READ_BYTES) {
54            return bytes;
55        }
56
57        self.inner.read_file(path)
58    }
59
60    fn read_dir(&mut self, path: &str) -> VfsResult<Vec<String>> {
61        if path == "/dev" {
62            return Ok(DEV_DIR_ENTRIES
63                .iter()
64                .map(|(name, _)| String::from(*name))
65                .collect());
66        }
67        if DEVICE_DIRS.contains(&path) {
68            return Ok(Vec::new());
69        }
70        self.inner.read_dir(path)
71    }
72
73    fn read_dir_limited(&mut self, path: &str, max_entries: usize) -> VfsResult<Vec<String>> {
74        if path == "/dev" {
75            let entries = DEV_DIR_ENTRIES
76                .iter()
77                .map(|(name, _)| String::from(*name))
78                .collect::<Vec<_>>();
79            if entries.len() > max_entries {
80                return Err(VfsError::new(
81                    "ENOMEM",
82                    format!(
83                        "directory listing for '{path}' exceeds configured limit of {max_entries} entries"
84                    ),
85                ));
86            }
87            return Ok(entries);
88        }
89        if DEVICE_DIRS.contains(&path) {
90            return Ok(Vec::new());
91        }
92        self.inner.read_dir_limited(path, max_entries)
93    }
94
95    fn read_dir_with_types(&mut self, path: &str) -> VfsResult<Vec<VirtualDirEntry>> {
96        if path == "/dev" {
97            return Ok(DEV_DIR_ENTRIES
98                .iter()
99                .map(|(name, is_directory)| VirtualDirEntry {
100                    name: String::from(*name),
101                    is_directory: *is_directory,
102                    is_symbolic_link: false,
103                })
104                .collect());
105        }
106        if DEVICE_DIRS.contains(&path) {
107            return Ok(Vec::new());
108        }
109        self.inner.read_dir_with_types(path)
110    }
111
112    fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
113        if is_sink_device_path(path) {
114            let _ = content.into();
115            return Ok(());
116        }
117        self.inner.write_file(path, content)
118    }
119
120    fn create_file_exclusive(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
121        if is_device_path(path) || is_device_dir(path) {
122            let _ = content.into();
123            return Err(VfsError::new(
124                "EEXIST",
125                format!("file already exists, open '{path}'"),
126            ));
127        }
128        self.inner.create_file_exclusive(path, content)
129    }
130
131    fn append_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<u64> {
132        if is_sink_device_path(path) {
133            return Ok(content.into().len() as u64);
134        }
135        self.inner.append_file(path, content)
136    }
137
138    fn create_dir(&mut self, path: &str) -> VfsResult<()> {
139        if is_device_dir(path) {
140            return Ok(());
141        }
142        self.inner.create_dir(path)
143    }
144
145    fn mkdir(&mut self, path: &str, recursive: bool) -> VfsResult<()> {
146        if is_device_dir(path) {
147            return Ok(());
148        }
149        self.inner.mkdir(path, recursive)
150    }
151
152    fn exists(&self, path: &str) -> bool {
153        if is_device_path(path) || is_device_dir(path) {
154            return true;
155        }
156        self.inner.exists(path)
157    }
158
159    fn stat(&mut self, path: &str) -> VfsResult<VirtualStat> {
160        if is_device_path(path) {
161            return Ok(device_stat(path));
162        }
163        if is_device_dir(path) {
164            return Ok(device_dir_stat(path));
165        }
166        self.inner.stat(path)
167    }
168
169    fn remove_file(&mut self, path: &str) -> VfsResult<()> {
170        if is_device_path(path) {
171            return Err(VfsError::permission_denied("unlink", path));
172        }
173        self.inner.remove_file(path)
174    }
175
176    fn remove_dir(&mut self, path: &str) -> VfsResult<()> {
177        if is_device_dir(path) {
178            return Err(VfsError::permission_denied("rmdir", path));
179        }
180        self.inner.remove_dir(path)
181    }
182
183    fn rename(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
184        if is_device_path(old_path) || is_device_path(new_path) {
185            return Err(VfsError::permission_denied("rename", old_path));
186        }
187        self.inner.rename(old_path, new_path)
188    }
189
190    fn realpath(&self, path: &str) -> VfsResult<String> {
191        if is_device_path(path) || is_device_dir(path) {
192            return Ok(String::from(path));
193        }
194        self.inner.realpath(path)
195    }
196
197    fn symlink(&mut self, target: &str, link_path: &str) -> VfsResult<()> {
198        self.inner.symlink(target, link_path)
199    }
200
201    fn read_link(&self, path: &str) -> VfsResult<String> {
202        self.inner.read_link(path)
203    }
204
205    fn lstat(&self, path: &str) -> VfsResult<VirtualStat> {
206        if is_device_path(path) {
207            return Ok(device_stat(path));
208        }
209        if is_device_dir(path) {
210            return Ok(device_dir_stat(path));
211        }
212        self.inner.lstat(path)
213    }
214
215    fn link(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
216        if is_device_path(old_path) {
217            return Err(VfsError::permission_denied("link", old_path));
218        }
219        self.inner.link(old_path, new_path)
220    }
221
222    fn chmod(&mut self, path: &str, mode: u32) -> VfsResult<()> {
223        if is_device_path(path) {
224            return Ok(());
225        }
226        self.inner.chmod(path, mode)
227    }
228
229    fn chown(&mut self, path: &str, uid: u32, gid: u32) -> VfsResult<()> {
230        if is_device_path(path) {
231            return Ok(());
232        }
233        self.inner.chown(path, uid, gid)
234    }
235
236    fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> VfsResult<()> {
237        if is_device_path(path) {
238            return Ok(());
239        }
240        self.inner.utimes(path, atime_ms, mtime_ms)
241    }
242
243    fn utimes_spec(
244        &mut self,
245        path: &str,
246        atime: VirtualUtimeSpec,
247        mtime: VirtualUtimeSpec,
248        follow_symlinks: bool,
249    ) -> VfsResult<()> {
250        if is_device_path(path) {
251            return Ok(());
252        }
253        self.inner.utimes_spec(path, atime, mtime, follow_symlinks)
254    }
255
256    fn truncate(&mut self, path: &str, length: u64) -> VfsResult<()> {
257        if is_sink_device_path(path) {
258            let _ = length;
259            return Ok(());
260        }
261        self.inner.truncate(path, length)
262    }
263
264    fn pread(&mut self, path: &str, offset: u64, length: usize) -> VfsResult<Vec<u8>> {
265        if let Some(bytes) = read_stream_device(path, length) {
266            return bytes;
267        }
268
269        self.inner.pread(path, offset, length)
270    }
271}
272
273fn is_device_path(path: &str) -> bool {
274    DEVICE_PATHS.contains(&path) || path.starts_with("/dev/fd/") || path.starts_with("/dev/pts/")
275}
276
277/// Standard emulated character devices (`/dev/null`, `/dev/zero`, `/dev/urandom`,
278/// `/dev/std{in,out,err}`). On Linux these are world-readable/writable and have no
279/// host backing — they are pure kernel emulations whose semantics this device layer
280/// already enforces (read/write for the stream devices, `EPERM` for unlink/rename).
281/// The permission layer therefore treats them as always accessible, so guest fs ops
282/// (`readFileSync`/`existsSync`/redirects on `/dev/null`, …) behave like native Linux
283/// regardless of the VM file-permission policy. Excludes `/dev/fd` and `/dev/pts`,
284/// which carry process-specific semantics the policy may legitimately govern.
285pub fn is_standard_device_path(path: &str) -> bool {
286    DEVICE_PATHS.contains(&path)
287}
288
289fn is_sink_device_path(path: &str) -> bool {
290    matches!(
291        path,
292        "/dev/null" | "/dev/zero" | "/dev/stdout" | "/dev/stderr" | "/dev/urandom"
293    )
294}
295
296fn is_device_dir(path: &str) -> bool {
297    path == "/dev" || DEVICE_DIRS.contains(&path)
298}
299
300fn device_stat(path: &str) -> VirtualStat {
301    let now = now_ms();
302    VirtualStat {
303        mode: 0o666,
304        size: 0,
305        blocks: 0,
306        dev: 2,
307        rdev: device_rdev(path),
308        is_directory: false,
309        is_symbolic_link: false,
310        atime_ms: now,
311        atime_nsec: 0,
312        mtime_ms: now,
313        mtime_nsec: 0,
314        ctime_ms: now,
315        ctime_nsec: 0,
316        birthtime_ms: now,
317        ino: device_ino(path),
318        nlink: 1,
319        uid: 0,
320        gid: 0,
321    }
322}
323
324fn device_dir_stat(path: &str) -> VirtualStat {
325    let now = now_ms();
326    VirtualStat {
327        mode: 0o755,
328        size: 0,
329        blocks: 0,
330        dev: 2,
331        rdev: 0,
332        is_directory: true,
333        is_symbolic_link: false,
334        atime_ms: now,
335        atime_nsec: 0,
336        mtime_ms: now,
337        mtime_nsec: 0,
338        ctime_ms: now,
339        ctime_nsec: 0,
340        birthtime_ms: now,
341        ino: device_ino(path),
342        nlink: 2,
343        uid: 0,
344        gid: 0,
345    }
346}
347
348fn device_ino(path: &str) -> u64 {
349    match path {
350        "/dev/null" => 0xffff_0001,
351        "/dev/zero" => 0xffff_0002,
352        "/dev/stdin" => 0xffff_0003,
353        "/dev/stdout" => 0xffff_0004,
354        "/dev/stderr" => 0xffff_0005,
355        "/dev/urandom" => 0xffff_0006,
356        _ => 0xffff_0000,
357    }
358}
359
360fn device_rdev(path: &str) -> u64 {
361    match path {
362        "/dev/null" => encode_device_id(1, 3),
363        "/dev/zero" => encode_device_id(1, 5),
364        "/dev/stdin" => encode_device_id(5, 0),
365        "/dev/stdout" => encode_device_id(5, 1),
366        "/dev/stderr" => encode_device_id(5, 2),
367        "/dev/urandom" => encode_device_id(1, 9),
368        _ => 0,
369    }
370}
371
372fn encode_device_id(major: u64, minor: u64) -> u64 {
373    (major << 8) | minor
374}
375
376fn random_bytes(length: usize) -> VfsResult<Vec<u8>> {
377    let mut buffer = vec![0; length];
378    getrandom(&mut buffer)
379        .map_err(|error| VfsError::io(format!("failed to read system random bytes: {error}")))?;
380    Ok(buffer)
381}
382
383fn read_stream_device(path: &str, length: usize) -> Option<VfsResult<Vec<u8>>> {
384    match path {
385        "/dev/null" => Some(Ok(Vec::new())),
386        "/dev/zero" => Some(Ok(vec![0; length])),
387        "/dev/urandom" => Some(random_bytes(length)),
388        _ => None,
389    }
390}
391
392fn now_ms() -> u64 {
393    SystemTime::now()
394        .duration_since(UNIX_EPOCH)
395        .unwrap_or_default()
396        .as_millis() as u64
397}