Skip to main content

secure_exec_kernel/
root_fs.rs

1use crate::overlay_fs::{OverlayFileSystem, OverlayMode};
2use crate::resource_accounting::{
3    ResourceLimits, DEFAULT_MAX_FILESYSTEM_BYTES, DEFAULT_MAX_INODE_COUNT,
4};
5use crate::vfs::{
6    normalize_path, MemoryFileSystem, VfsError, VfsResult, VirtualFileSystem, VirtualUtimeSpec,
7    MAX_PATH_LENGTH,
8};
9use base64::Engine;
10use serde::Deserialize;
11use std::collections::BTreeSet;
12
13// The base filesystem fixture is staged into OUT_DIR by build.rs: copied from
14// the canonical `packages/secure-exec-core/fixtures/base-filesystem.json`
15// during in-tree builds, or from the vendored `assets/base-filesystem.json`
16// copy bundled in the published crate.
17const BUNDLED_BASE_FILESYSTEM_JSON: &str =
18    include_str!(concat!(env!("OUT_DIR"), "/base-filesystem.json"));
19pub const ROOT_FILESYSTEM_SNAPSHOT_FORMAT: &str = "secure_exec_filesystem_snapshot_v1";
20const LEGACY_AGENT_OS_ROOT_FILESYSTEM_SNAPSHOT_FORMAT: &str = "agent_os_filesystem_snapshot_v1";
21const ROOT_FILESYSTEM_SNAPSHOT_FIXED_OVERHEAD_BYTES: usize = 4 * 1024;
22const ROOT_FILESYSTEM_SNAPSHOT_ENTRY_OVERHEAD_BYTES: usize = MAX_PATH_LENGTH + 1024;
23const DEFAULT_ROOT_DIRECTORIES: &[&str] = &[
24    "/",
25    "/dev",
26    "/proc",
27    "/tmp",
28    "/bin",
29    "/lib",
30    "/sbin",
31    "/boot",
32    "/etc",
33    "/root",
34    "/run",
35    "/srv",
36    "/sys",
37    "/opt",
38    "/mnt",
39    "/media",
40    "/home",
41    "/usr",
42    "/usr/bin",
43    "/usr/games",
44    "/usr/include",
45    "/usr/lib",
46    "/usr/libexec",
47    "/usr/man",
48    "/usr/local",
49    "/usr/local/bin",
50    "/usr/sbin",
51    "/usr/share",
52    "/usr/share/man",
53    "/var",
54    "/var/cache",
55    "/var/empty",
56    "/var/lib",
57    "/var/lock",
58    "/var/log",
59    "/var/run",
60    "/var/spool",
61    "/var/tmp",
62    "/etc/agentos",
63];
64const KERNEL_RESERVED_BOOTSTRAP_PATH_PREFIXES: &[&str] = &["/dev", "/proc", "/sys"];
65
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub struct RootFilesystemError {
68    message: String,
69}
70
71impl RootFilesystemError {
72    fn new(message: impl Into<String>) -> Self {
73        Self {
74            message: message.into(),
75        }
76    }
77}
78
79impl std::fmt::Display for RootFilesystemError {
80    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81        f.write_str(&self.message)
82    }
83}
84
85impl std::error::Error for RootFilesystemError {}
86
87impl From<VfsError> for RootFilesystemError {
88    fn from(error: VfsError) -> Self {
89        Self::new(error.to_string())
90    }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum FilesystemEntryKind {
95    File,
96    Directory,
97    Symlink,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub struct FilesystemEntry {
102    pub path: String,
103    pub kind: FilesystemEntryKind,
104    pub mode: u32,
105    pub uid: u32,
106    pub gid: u32,
107    pub content: Option<Vec<u8>>,
108    pub target: Option<String>,
109}
110
111impl FilesystemEntry {
112    pub fn directory(path: impl Into<String>) -> Self {
113        Self {
114            path: path.into(),
115            kind: FilesystemEntryKind::Directory,
116            mode: 0o755,
117            uid: 0,
118            gid: 0,
119            content: None,
120            target: None,
121        }
122    }
123
124    pub fn file(path: impl Into<String>, content: impl Into<Vec<u8>>) -> Self {
125        Self {
126            path: path.into(),
127            kind: FilesystemEntryKind::File,
128            mode: 0o644,
129            uid: 0,
130            gid: 0,
131            content: Some(content.into()),
132            target: None,
133        }
134    }
135
136    pub fn symlink(path: impl Into<String>, target: impl Into<String>) -> Self {
137        Self {
138            path: path.into(),
139            kind: FilesystemEntryKind::Symlink,
140            mode: 0o777,
141            uid: 0,
142            gid: 0,
143            content: None,
144            target: Some(target.into()),
145        }
146    }
147}
148
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct RootFilesystemSnapshot {
151    pub entries: Vec<FilesystemEntry>,
152}
153
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub struct RootFilesystemImportLimits {
156    pub max_encoded_snapshot_bytes: Option<usize>,
157    pub max_filesystem_bytes: Option<u64>,
158    pub max_inode_count: Option<usize>,
159}
160
161impl RootFilesystemImportLimits {
162    pub fn from_resource_limits(limits: &ResourceLimits) -> Self {
163        Self {
164            max_encoded_snapshot_bytes: encoded_snapshot_limit(
165                limits.max_filesystem_bytes,
166                limits.max_inode_count,
167            ),
168            max_filesystem_bytes: limits.max_filesystem_bytes,
169            max_inode_count: limits.max_inode_count,
170        }
171    }
172}
173
174impl Default for RootFilesystemImportLimits {
175    fn default() -> Self {
176        Self {
177            max_encoded_snapshot_bytes: encoded_snapshot_limit(
178                Some(DEFAULT_MAX_FILESYSTEM_BYTES),
179                Some(DEFAULT_MAX_INODE_COUNT),
180            ),
181            max_filesystem_bytes: Some(DEFAULT_MAX_FILESYSTEM_BYTES),
182            max_inode_count: Some(DEFAULT_MAX_INODE_COUNT),
183        }
184    }
185}
186
187#[derive(Debug, Clone, Copy, PartialEq, Eq)]
188pub enum RootFilesystemMode {
189    Ephemeral,
190    ReadOnly,
191}
192
193#[derive(Debug, Clone, PartialEq, Eq)]
194pub struct RootFilesystemDescriptor {
195    pub mode: RootFilesystemMode,
196    pub disable_default_base_layer: bool,
197    pub lowers: Vec<RootFilesystemSnapshot>,
198    pub bootstrap_entries: Vec<FilesystemEntry>,
199}
200
201impl Default for RootFilesystemDescriptor {
202    fn default() -> Self {
203        Self {
204            mode: RootFilesystemMode::Ephemeral,
205            disable_default_base_layer: false,
206            lowers: Vec::new(),
207            bootstrap_entries: Vec::new(),
208        }
209    }
210}
211
212#[derive(Debug)]
213pub struct RootFileSystem {
214    overlay: OverlayFileSystem,
215    mode: RootFilesystemMode,
216    bootstrap_finished: bool,
217}
218
219impl RootFileSystem {
220    pub fn from_descriptor(
221        descriptor: RootFilesystemDescriptor,
222    ) -> Result<Self, RootFilesystemError> {
223        Self::from_descriptor_with_import_limits(descriptor, &RootFilesystemImportLimits::default())
224    }
225
226    pub fn from_descriptor_with_import_limits(
227        descriptor: RootFilesystemDescriptor,
228        limits: &RootFilesystemImportLimits,
229    ) -> Result<Self, RootFilesystemError> {
230        let mut lower_snapshots = descriptor.lowers.clone();
231        if !descriptor.disable_default_base_layer {
232            lower_snapshots.push(load_bundled_base_snapshot_with_limits(limits)?);
233        } else if lower_snapshots.is_empty() {
234            lower_snapshots.push(minimal_root_snapshot());
235        }
236        validate_descriptor_import_limits(
237            &lower_snapshots,
238            &descriptor.bootstrap_entries,
239            limits,
240            "root filesystem descriptor",
241        )?;
242
243        let lowers = lower_snapshots
244            .iter()
245            .map(snapshot_to_memory_filesystem)
246            .collect::<Result<Vec<_>, _>>()?;
247
248        let mut root = Self {
249            overlay: OverlayFileSystem::new(lowers, OverlayMode::Ephemeral),
250            mode: descriptor.mode,
251            bootstrap_finished: false,
252        };
253        root.apply_bootstrap_entries(&descriptor.bootstrap_entries)?;
254        Ok(root)
255    }
256
257    pub fn apply_bootstrap_entries(
258        &mut self,
259        entries: &[FilesystemEntry],
260    ) -> Result<(), RootFilesystemError> {
261        if self.bootstrap_finished {
262            return Err(RootFilesystemError::new(
263                "root filesystem bootstrap is already finished",
264            ));
265        }
266
267        for entry in sort_entries(entries.to_vec()) {
268            if is_kernel_reserved_bootstrap_path(&entry.path) {
269                continue;
270            }
271            apply_entry(&mut self.overlay, &entry)?;
272        }
273        Ok(())
274    }
275
276    pub fn finish_bootstrap(&mut self) {
277        if self.bootstrap_finished {
278            return;
279        }
280        self.bootstrap_finished = true;
281        if self.mode == RootFilesystemMode::ReadOnly {
282            self.overlay.lock_writes();
283        }
284    }
285
286    pub fn snapshot(&mut self) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
287        Ok(RootFilesystemSnapshot {
288            entries: snapshot_virtual_filesystem(&mut self.overlay, "/")?,
289        })
290    }
291
292    pub fn check_rename_copy_up_limits(
293        &mut self,
294        old_path: &str,
295        new_path: &str,
296        max_bytes: Option<u64>,
297        max_inodes: Option<usize>,
298    ) -> VfsResult<()> {
299        self.overlay
300            .check_rename_copy_up_limits(old_path, new_path, max_bytes, max_inodes)
301    }
302}
303
304impl VirtualFileSystem for RootFileSystem {
305    fn read_file(&mut self, path: &str) -> VfsResult<Vec<u8>> {
306        self.overlay.read_file(path)
307    }
308
309    fn read_dir(&mut self, path: &str) -> VfsResult<Vec<String>> {
310        self.overlay.read_dir(path)
311    }
312
313    fn read_dir_limited(&mut self, path: &str, max_entries: usize) -> VfsResult<Vec<String>> {
314        self.overlay.read_dir_limited(path, max_entries)
315    }
316
317    fn read_dir_with_types(&mut self, path: &str) -> VfsResult<Vec<crate::vfs::VirtualDirEntry>> {
318        self.overlay.read_dir_with_types(path)
319    }
320
321    fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
322        self.overlay.write_file(path, content.into())
323    }
324
325    fn create_file_exclusive(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
326        self.overlay.create_file_exclusive(path, content.into())
327    }
328
329    fn append_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<u64> {
330        self.overlay.append_file(path, content.into())
331    }
332
333    fn create_dir(&mut self, path: &str) -> VfsResult<()> {
334        self.overlay.create_dir(path)
335    }
336
337    fn mkdir(&mut self, path: &str, recursive: bool) -> VfsResult<()> {
338        self.overlay.mkdir(path, recursive)
339    }
340
341    fn exists(&self, path: &str) -> bool {
342        self.overlay.exists(path)
343    }
344
345    fn stat(&mut self, path: &str) -> VfsResult<crate::vfs::VirtualStat> {
346        self.overlay.stat(path)
347    }
348
349    fn remove_file(&mut self, path: &str) -> VfsResult<()> {
350        self.overlay.remove_file(path)
351    }
352
353    fn remove_dir(&mut self, path: &str) -> VfsResult<()> {
354        self.overlay.remove_dir(path)
355    }
356
357    fn rename(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
358        self.overlay.rename(old_path, new_path)
359    }
360
361    fn realpath(&self, path: &str) -> VfsResult<String> {
362        self.overlay.realpath(path)
363    }
364
365    fn symlink(&mut self, target: &str, link_path: &str) -> VfsResult<()> {
366        self.overlay.symlink(target, link_path)
367    }
368
369    fn read_link(&self, path: &str) -> VfsResult<String> {
370        self.overlay.read_link(path)
371    }
372
373    fn lstat(&self, path: &str) -> VfsResult<crate::vfs::VirtualStat> {
374        self.overlay.lstat(path)
375    }
376
377    fn link(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
378        self.overlay.link(old_path, new_path)
379    }
380
381    fn chmod(&mut self, path: &str, mode: u32) -> VfsResult<()> {
382        self.overlay.chmod(path, mode)
383    }
384
385    fn chown(&mut self, path: &str, uid: u32, gid: u32) -> VfsResult<()> {
386        self.overlay.chown(path, uid, gid)
387    }
388
389    fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> VfsResult<()> {
390        self.overlay.utimes(path, atime_ms, mtime_ms)
391    }
392
393    fn utimes_spec(
394        &mut self,
395        path: &str,
396        atime: VirtualUtimeSpec,
397        mtime: VirtualUtimeSpec,
398        follow_symlinks: bool,
399    ) -> VfsResult<()> {
400        self.overlay
401            .utimes_spec(path, atime, mtime, follow_symlinks)
402    }
403
404    fn truncate(&mut self, path: &str, length: u64) -> VfsResult<()> {
405        self.overlay.truncate(path, length)
406    }
407
408    fn pread(&mut self, path: &str, offset: u64, length: usize) -> VfsResult<Vec<u8>> {
409        self.overlay.pread(path, offset, length)
410    }
411}
412
413#[derive(Debug, Deserialize)]
414struct RawBaseFilesystemSnapshot {
415    filesystem: RawFilesystemEntries,
416}
417
418#[derive(Debug, Deserialize)]
419struct RawFilesystemEntries {
420    entries: Vec<RawFilesystemEntry>,
421}
422
423#[derive(Debug, Deserialize)]
424struct RawFilesystemEntry {
425    path: String,
426    #[serde(rename = "type")]
427    kind: RawFilesystemEntryKind,
428    mode: String,
429    uid: u32,
430    gid: u32,
431    #[serde(default)]
432    content: Option<String>,
433    #[serde(default)]
434    encoding: Option<String>,
435    #[serde(default)]
436    target: Option<String>,
437}
438
439#[derive(Debug, Deserialize)]
440#[serde(rename_all = "snake_case")]
441enum RawFilesystemEntryKind {
442    File,
443    Directory,
444    Symlink,
445}
446
447#[derive(Debug, Deserialize)]
448struct RawSnapshotExport {
449    format: String,
450    filesystem: RawFilesystemEntries,
451}
452
453#[derive(Debug, serde::Serialize)]
454struct SnapshotExport<'a> {
455    format: &'static str,
456    filesystem: SnapshotFilesystem<'a>,
457}
458
459#[derive(Debug, serde::Serialize)]
460struct SnapshotFilesystem<'a> {
461    entries: Vec<SerializedFilesystemEntry<'a>>,
462}
463
464#[derive(Debug, serde::Serialize)]
465struct SerializedFilesystemEntry<'a> {
466    path: &'a str,
467    #[serde(rename = "type")]
468    kind: &'static str,
469    mode: String,
470    uid: u32,
471    gid: u32,
472    #[serde(skip_serializing_if = "Option::is_none")]
473    content: Option<String>,
474    #[serde(skip_serializing_if = "Option::is_none")]
475    encoding: Option<&'static str>,
476    #[serde(skip_serializing_if = "Option::is_none")]
477    target: Option<&'a str>,
478}
479
480pub fn encode_snapshot(snapshot: &RootFilesystemSnapshot) -> Result<Vec<u8>, RootFilesystemError> {
481    let serialized_entries = snapshot
482        .entries
483        .iter()
484        .map(|entry| SerializedFilesystemEntry {
485            path: &entry.path,
486            kind: match entry.kind {
487                FilesystemEntryKind::File => "file",
488                FilesystemEntryKind::Directory => "directory",
489                FilesystemEntryKind::Symlink => "symlink",
490            },
491            mode: format!("{:o}", entry.mode),
492            uid: entry.uid,
493            gid: entry.gid,
494            content: entry
495                .content
496                .as_ref()
497                .map(|bytes| base64::engine::general_purpose::STANDARD.encode(bytes)),
498            encoding: entry.content.as_ref().map(|_| "base64"),
499            target: entry.target.as_deref(),
500        })
501        .collect::<Vec<_>>();
502
503    serde_json::to_vec(&SnapshotExport {
504        format: ROOT_FILESYSTEM_SNAPSHOT_FORMAT,
505        filesystem: SnapshotFilesystem {
506            entries: serialized_entries,
507        },
508    })
509    .map_err(|error| RootFilesystemError::new(format!("serialize root snapshot: {error}")))
510}
511
512pub fn decode_snapshot(bytes: &[u8]) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
513    decode_snapshot_with_import_limits(bytes, &RootFilesystemImportLimits::default())
514}
515
516pub fn decode_snapshot_with_import_limits(
517    bytes: &[u8],
518    limits: &RootFilesystemImportLimits,
519) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
520    validate_encoded_snapshot_size(bytes, limits, "root snapshot")?;
521    let raw: RawSnapshotExport = serde_json::from_slice(bytes)
522        .map_err(|error| RootFilesystemError::new(format!("parse root snapshot: {error}")))?;
523    if !is_supported_root_filesystem_snapshot_format(&raw.format) {
524        return Err(RootFilesystemError::new(format!(
525            "unsupported root snapshot format: {}",
526            raw.format
527        )));
528    }
529    raw_entries_to_snapshot(raw.filesystem.entries, limits, "root snapshot")
530}
531
532pub fn is_supported_root_filesystem_snapshot_format(format: &str) -> bool {
533    format == ROOT_FILESYSTEM_SNAPSHOT_FORMAT
534        || format == LEGACY_AGENT_OS_ROOT_FILESYSTEM_SNAPSHOT_FORMAT
535}
536
537fn load_bundled_base_snapshot_with_limits(
538    limits: &RootFilesystemImportLimits,
539) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
540    validate_encoded_snapshot_size(
541        BUNDLED_BASE_FILESYSTEM_JSON.as_bytes(),
542        limits,
543        "bundled base filesystem",
544    )?;
545    let raw: RawBaseFilesystemSnapshot = serde_json::from_str(BUNDLED_BASE_FILESYSTEM_JSON)
546        .map_err(|error| {
547            RootFilesystemError::new(format!("parse bundled base filesystem: {error}"))
548        })?;
549    raw_entries_to_snapshot(raw.filesystem.entries, limits, "bundled base filesystem")
550}
551
552fn minimal_root_snapshot() -> RootFilesystemSnapshot {
553    let mut entries = DEFAULT_ROOT_DIRECTORIES
554        .iter()
555        .map(|path| FilesystemEntry::directory(*path))
556        .collect::<Vec<_>>();
557    entries.push(FilesystemEntry::file("/usr/bin/env", Vec::new()));
558    RootFilesystemSnapshot { entries }
559}
560
561fn convert_raw_entry(raw: RawFilesystemEntry) -> Result<FilesystemEntry, RootFilesystemError> {
562    let content = match raw.content {
563        Some(content) => match raw.encoding.as_deref() {
564            Some("base64") => Some(
565                base64::engine::general_purpose::STANDARD
566                    .decode(content)
567                    .map_err(|error| {
568                        RootFilesystemError::new(format!(
569                            "decode base64 content for {}: {error}",
570                            raw.path
571                        ))
572                    })?,
573            ),
574            Some("utf8") | None => Some(content.into_bytes()),
575            Some(other) => {
576                return Err(RootFilesystemError::new(format!(
577                    "unsupported content encoding for {}: {other}",
578                    raw.path
579                )));
580            }
581        },
582        None => None,
583    };
584
585    Ok(FilesystemEntry {
586        path: raw.path,
587        kind: match raw.kind {
588            RawFilesystemEntryKind::File => FilesystemEntryKind::File,
589            RawFilesystemEntryKind::Directory => FilesystemEntryKind::Directory,
590            RawFilesystemEntryKind::Symlink => FilesystemEntryKind::Symlink,
591        },
592        mode: u32::from_str_radix(&raw.mode, 8).map_err(|error| {
593            RootFilesystemError::new(format!("parse mode {}: {error}", raw.mode))
594        })?,
595        uid: raw.uid,
596        gid: raw.gid,
597        content,
598        target: raw.target,
599    })
600}
601
602fn raw_entries_to_snapshot(
603    raw_entries: Vec<RawFilesystemEntry>,
604    limits: &RootFilesystemImportLimits,
605    context: &str,
606) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
607    if let Some(limit) = limits.max_inode_count {
608        if raw_entries.len() > limit {
609            return Err(RootFilesystemError::new(format!(
610                "{context} contains {} entries, exceeding limit {limit}",
611                raw_entries.len()
612            )));
613        }
614    }
615
616    let entries = raw_entries
617        .into_iter()
618        .map(convert_raw_entry)
619        .collect::<Result<Vec<_>, _>>()?;
620    validate_entry_import_limits(&entries, limits, context)?;
621    Ok(RootFilesystemSnapshot { entries })
622}
623
624pub fn validate_snapshot_import_limits(
625    snapshot: &RootFilesystemSnapshot,
626    limits: &RootFilesystemImportLimits,
627    context: &str,
628) -> Result<(), RootFilesystemError> {
629    validate_entry_import_limits(&snapshot.entries, limits, context)
630}
631
632fn validate_descriptor_import_limits(
633    lowers: &[RootFilesystemSnapshot],
634    bootstrap_entries: &[FilesystemEntry],
635    limits: &RootFilesystemImportLimits,
636    context: &str,
637) -> Result<(), RootFilesystemError> {
638    let explicit_entry_count = lowers
639        .iter()
640        .map(|snapshot| snapshot.entries.len())
641        .sum::<usize>()
642        .saturating_add(bootstrap_entries.len());
643    let mut inode_paths = BTreeSet::new();
644    for snapshot in lowers {
645        collect_materialized_entry_paths(&snapshot.entries, &mut inode_paths);
646    }
647    collect_materialized_entry_paths(bootstrap_entries, &mut inode_paths);
648    let inode_count = inode_paths.len();
649    if let Some(limit) = limits.max_inode_count {
650        if explicit_entry_count > limit {
651            return Err(RootFilesystemError::new(format!(
652                "{context} contains {explicit_entry_count} entries, exceeding limit {limit}"
653            )));
654        }
655
656        if inode_count > limit {
657            return Err(RootFilesystemError::new(format!(
658                "{context} contains {inode_count} entries, exceeding limit {limit}"
659            )));
660        }
661    }
662
663    let mut bytes = 0_u64;
664    for snapshot in lowers {
665        bytes = bytes.saturating_add(entry_content_bytes(&snapshot.entries));
666    }
667    bytes = bytes.saturating_add(entry_content_bytes(bootstrap_entries));
668    if let Some(limit) = limits.max_filesystem_bytes {
669        if bytes > limit {
670            return Err(RootFilesystemError::new(format!(
671                "{context} contains {bytes} bytes, exceeding limit {limit}"
672            )));
673        }
674    }
675    Ok(())
676}
677
678fn validate_entry_import_limits(
679    entries: &[FilesystemEntry],
680    limits: &RootFilesystemImportLimits,
681    context: &str,
682) -> Result<(), RootFilesystemError> {
683    if let Some(limit) = limits.max_inode_count {
684        if entries.len() > limit {
685            return Err(RootFilesystemError::new(format!(
686                "{context} contains {} entries, exceeding limit {limit}",
687                entries.len()
688            )));
689        }
690
691        let inode_count = materialized_entry_inode_count(entries);
692        if inode_count > limit {
693            return Err(RootFilesystemError::new(format!(
694                "{context} contains {inode_count} entries, exceeding limit {limit}"
695            )));
696        }
697    }
698
699    let bytes = entry_content_bytes(entries);
700    if let Some(limit) = limits.max_filesystem_bytes {
701        if bytes > limit {
702            return Err(RootFilesystemError::new(format!(
703                "{context} contains {bytes} bytes, exceeding limit {limit}"
704            )));
705        }
706    }
707    Ok(())
708}
709
710fn validate_encoded_snapshot_size(
711    bytes: &[u8],
712    limits: &RootFilesystemImportLimits,
713    context: &str,
714) -> Result<(), RootFilesystemError> {
715    if let Some(limit) = limits.max_encoded_snapshot_bytes {
716        if bytes.len() > limit {
717            return Err(RootFilesystemError::new(format!(
718                "{context} contains {} encoded bytes, exceeding limit {limit}",
719                bytes.len()
720            )));
721        }
722    }
723    Ok(())
724}
725
726fn entry_content_bytes(entries: &[FilesystemEntry]) -> u64 {
727    entries.iter().fold(0_u64, |total, entry| {
728        total.saturating_add(match entry.kind {
729            FilesystemEntryKind::File => entry
730                .content
731                .as_ref()
732                .map(|content| usize_to_u64(content.len()))
733                .unwrap_or(0),
734            FilesystemEntryKind::Directory => 0,
735            FilesystemEntryKind::Symlink => entry
736                .target
737                .as_ref()
738                .map(|target| usize_to_u64(target.len()))
739                .unwrap_or(0),
740        })
741    })
742}
743
744fn materialized_entry_inode_count(entries: &[FilesystemEntry]) -> usize {
745    let mut paths = BTreeSet::new();
746    collect_materialized_entry_paths(entries, &mut paths);
747    paths.len()
748}
749
750fn collect_materialized_entry_paths(entries: &[FilesystemEntry], paths: &mut BTreeSet<String>) {
751    for entry in entries {
752        collect_materialized_path(&entry.path, paths);
753    }
754}
755
756fn collect_materialized_path(path: &str, paths: &mut BTreeSet<String>) {
757    let normalized = normalize_path(path);
758    paths.insert(normalized.clone());
759
760    let mut parent = String::new();
761    let segments = normalized
762        .split('/')
763        .filter(|segment| !segment.is_empty())
764        .collect::<Vec<_>>();
765    for segment in segments.iter().take(segments.len().saturating_sub(1)) {
766        parent.push('/');
767        parent.push_str(segment);
768        paths.insert(parent.clone());
769    }
770}
771
772fn usize_to_u64(value: usize) -> u64 {
773    u64::try_from(value).unwrap_or(u64::MAX)
774}
775
776const fn u64_limit_to_usize(value: u64) -> usize {
777    if value > usize::MAX as u64 {
778        usize::MAX
779    } else {
780        value as usize
781    }
782}
783
784const fn encoded_snapshot_limit(
785    max_filesystem_bytes: Option<u64>,
786    max_inode_count: Option<usize>,
787) -> Option<usize> {
788    let Some(max_filesystem_bytes) = max_filesystem_bytes else {
789        return None;
790    };
791
792    Some(
793        u64_limit_to_usize(max_filesystem_bytes)
794            .saturating_mul(2)
795            .saturating_add(match max_inode_count {
796                Some(max_inode_count) => {
797                    max_inode_count.saturating_mul(ROOT_FILESYSTEM_SNAPSHOT_ENTRY_OVERHEAD_BYTES)
798                }
799                None => 0,
800            })
801            .saturating_add(ROOT_FILESYSTEM_SNAPSHOT_FIXED_OVERHEAD_BYTES),
802    )
803}
804
805fn snapshot_to_memory_filesystem(
806    snapshot: &RootFilesystemSnapshot,
807) -> Result<MemoryFileSystem, RootFilesystemError> {
808    let mut filesystem = MemoryFileSystem::new();
809    for entry in sort_entries(snapshot.entries.clone()) {
810        apply_entry_to_memory_filesystem(&mut filesystem, &entry)?;
811    }
812    Ok(filesystem)
813}
814
815fn apply_entry_to_memory_filesystem(
816    filesystem: &mut MemoryFileSystem,
817    entry: &FilesystemEntry,
818) -> Result<(), RootFilesystemError> {
819    ensure_parent_directories(filesystem, &entry.path)?;
820
821    match entry.kind {
822        FilesystemEntryKind::Directory => {
823            filesystem.mkdir(&entry.path, true)?;
824            filesystem.chmod(&entry.path, entry.mode)?;
825            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
826        }
827        FilesystemEntryKind::File => {
828            filesystem.write_file(&entry.path, entry.content.clone().unwrap_or_default())?;
829            filesystem.chmod(&entry.path, entry.mode)?;
830            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
831        }
832        FilesystemEntryKind::Symlink => {
833            let Some(target) = entry.target.as_deref() else {
834                return Err(RootFilesystemError::new(format!(
835                    "missing symlink target for {}",
836                    entry.path
837                )));
838            };
839            filesystem.symlink_with_metadata(
840                target,
841                &entry.path,
842                entry.mode,
843                entry.uid,
844                entry.gid,
845            )?;
846        }
847    }
848
849    Ok(())
850}
851
852fn apply_entry(
853    filesystem: &mut impl VirtualFileSystem,
854    entry: &FilesystemEntry,
855) -> Result<(), RootFilesystemError> {
856    ensure_parent_directories(filesystem, &entry.path)?;
857
858    match entry.kind {
859        FilesystemEntryKind::Directory => {
860            filesystem.mkdir(&entry.path, true)?;
861            filesystem.chmod(&entry.path, entry.mode)?;
862            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
863        }
864        FilesystemEntryKind::File => {
865            filesystem.write_file(&entry.path, entry.content.clone().unwrap_or_default())?;
866            filesystem.chmod(&entry.path, entry.mode)?;
867            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
868        }
869        FilesystemEntryKind::Symlink => {
870            let Some(target) = entry.target.as_deref() else {
871                return Err(RootFilesystemError::new(format!(
872                    "missing symlink target for {}",
873                    entry.path
874                )));
875            };
876            filesystem.symlink(target, &entry.path)?;
877        }
878    }
879
880    Ok(())
881}
882
883fn ensure_parent_directories(
884    filesystem: &mut impl VirtualFileSystem,
885    path: &str,
886) -> Result<(), RootFilesystemError> {
887    let normalized = normalize_path(path);
888    let mut current = String::new();
889    let segments = normalized
890        .split('/')
891        .filter(|segment| !segment.is_empty())
892        .collect::<Vec<_>>();
893
894    for segment in segments.iter().take(segments.len().saturating_sub(1)) {
895        current.push('/');
896        current.push_str(segment);
897
898        if filesystem.exists(&current) {
899            continue;
900        }
901
902        filesystem.create_dir(&current)?;
903        filesystem.chmod(&current, 0o755)?;
904        filesystem.chown(&current, 0, 0)?;
905    }
906
907    Ok(())
908}
909
910fn sort_entries(mut entries: Vec<FilesystemEntry>) -> Vec<FilesystemEntry> {
911    entries.sort_by(|left, right| {
912        let depth_left = if left.path == "/" {
913            0
914        } else {
915            left.path.split('/').filter(|part| !part.is_empty()).count()
916        };
917        let depth_right = if right.path == "/" {
918            0
919        } else {
920            right
921                .path
922                .split('/')
923                .filter(|part| !part.is_empty())
924                .count()
925        };
926        depth_left
927            .cmp(&depth_right)
928            .then_with(|| left.path.cmp(&right.path))
929    });
930    entries
931}
932
933fn snapshot_virtual_filesystem(
934    filesystem: &mut impl VirtualFileSystem,
935    root_path: &str,
936) -> Result<Vec<FilesystemEntry>, RootFilesystemError> {
937    let mut entries = Vec::new();
938    snapshot_path(filesystem, root_path, &mut entries)?;
939    Ok(entries)
940}
941
942fn snapshot_path(
943    filesystem: &mut impl VirtualFileSystem,
944    path: &str,
945    entries: &mut Vec<FilesystemEntry>,
946) -> Result<(), RootFilesystemError> {
947    let stat = if path == "/" {
948        filesystem.stat(path)?
949    } else {
950        filesystem.lstat(path)?
951    };
952
953    if stat.is_symbolic_link {
954        entries.push(FilesystemEntry {
955            path: path.to_owned(),
956            kind: FilesystemEntryKind::Symlink,
957            mode: stat.mode,
958            uid: stat.uid,
959            gid: stat.gid,
960            content: None,
961            target: Some(filesystem.read_link(path)?),
962        });
963        return Ok(());
964    }
965
966    if stat.is_directory {
967        entries.push(FilesystemEntry {
968            path: path.to_owned(),
969            kind: FilesystemEntryKind::Directory,
970            mode: stat.mode,
971            uid: stat.uid,
972            gid: stat.gid,
973            content: None,
974            target: None,
975        });
976
977        let mut children = filesystem
978            .read_dir_with_types(path)?
979            .into_iter()
980            .map(|entry| entry.name)
981            .filter(|name| name != "." && name != "..")
982            .collect::<Vec<_>>();
983        children.sort();
984
985        for child in children {
986            let child_path = if path == "/" {
987                format!("/{child}")
988            } else {
989                format!("{path}/{child}")
990            };
991            snapshot_path(filesystem, &child_path, entries)?;
992        }
993        return Ok(());
994    }
995
996    entries.push(FilesystemEntry {
997        path: path.to_owned(),
998        kind: FilesystemEntryKind::File,
999        mode: stat.mode,
1000        uid: stat.uid,
1001        gid: stat.gid,
1002        content: Some(filesystem.read_file(path)?),
1003        target: None,
1004    });
1005    Ok(())
1006}
1007
1008fn is_kernel_reserved_bootstrap_path(path: &str) -> bool {
1009    let normalized = normalize_path(path);
1010    KERNEL_RESERVED_BOOTSTRAP_PATH_PREFIXES
1011        .iter()
1012        .any(|prefix| normalized == *prefix || normalized.starts_with(&format!("{prefix}/")))
1013}