Skip to main content

secure_exec_vfs_core/posix/
root_fs.rs

1use super::overlay_fs::{OverlayFileSystem, OverlayMode};
2use super::usage::{
3    RootFilesystemResourceLimits, DEFAULT_MAX_FILESYSTEM_BYTES, DEFAULT_MAX_INODE_COUNT,
4};
5use super::vfs::{
6    normalize_path, MemoryFileSystem, VfsError, VfsResult, VirtualFileSystem, VirtualStat,
7    VirtualUtimeSpec, MAX_PATH_LENGTH,
8};
9use crate::posix::vfs::VirtualDirEntry;
10use base64::Engine;
11use serde::Deserialize;
12use std::collections::BTreeSet;
13
14// The base filesystem fixture is staged into OUT_DIR by build.rs: copied from
15// the canonical `packages/secure-exec-core/fixtures/base-filesystem.json`
16// during in-tree builds, or from the vendored `assets/base-filesystem.json`
17// copy bundled in the published crate.
18const BUNDLED_BASE_FILESYSTEM_JSON: &str =
19    include_str!(concat!(env!("OUT_DIR"), "/base-filesystem.json"));
20pub const ROOT_FILESYSTEM_SNAPSHOT_FORMAT: &str = "secure_exec_filesystem_snapshot_v1";
21const LEGACY_AGENTOS_ROOT_FILESYSTEM_SNAPSHOT_FORMAT: &str = "agentos_filesystem_snapshot_v1";
22const ROOT_FILESYSTEM_SNAPSHOT_FIXED_OVERHEAD_BYTES: usize = 4 * 1024;
23const ROOT_FILESYSTEM_SNAPSHOT_ENTRY_OVERHEAD_BYTES: usize = MAX_PATH_LENGTH + 1024;
24const DEFAULT_ROOT_DIRECTORIES: &[&str] = &[
25    "/",
26    "/dev",
27    "/proc",
28    "/tmp",
29    "/bin",
30    "/lib",
31    "/sbin",
32    "/boot",
33    "/etc",
34    "/root",
35    "/run",
36    "/srv",
37    "/sys",
38    "/opt",
39    "/mnt",
40    "/media",
41    "/home",
42    "/usr",
43    "/usr/bin",
44    "/usr/games",
45    "/usr/include",
46    "/usr/lib",
47    "/usr/libexec",
48    "/usr/man",
49    "/usr/local",
50    "/usr/local/bin",
51    "/usr/sbin",
52    "/usr/share",
53    "/usr/share/man",
54    "/var",
55    "/var/cache",
56    "/var/empty",
57    "/var/lib",
58    "/var/lock",
59    "/var/log",
60    "/var/run",
61    "/var/spool",
62    "/var/tmp",
63    "/etc/agentos",
64];
65const KERNEL_RESERVED_BOOTSTRAP_PATH_PREFIXES: &[&str] = &["/dev", "/proc", "/sys"];
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct RootFilesystemError {
69    message: String,
70}
71
72impl RootFilesystemError {
73    fn new(message: impl Into<String>) -> Self {
74        Self {
75            message: message.into(),
76        }
77    }
78}
79
80impl std::fmt::Display for RootFilesystemError {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        f.write_str(&self.message)
83    }
84}
85
86impl std::error::Error for RootFilesystemError {}
87
88impl From<VfsError> for RootFilesystemError {
89    fn from(error: VfsError) -> Self {
90        Self::new(error.to_string())
91    }
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub enum FilesystemEntryKind {
96    File,
97    Directory,
98    Symlink,
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct FilesystemEntry {
103    pub path: String,
104    pub kind: FilesystemEntryKind,
105    pub mode: u32,
106    pub uid: u32,
107    pub gid: u32,
108    pub content: Option<Vec<u8>>,
109    pub target: Option<String>,
110}
111
112impl FilesystemEntry {
113    pub fn directory(path: impl Into<String>) -> Self {
114        Self {
115            path: path.into(),
116            kind: FilesystemEntryKind::Directory,
117            mode: 0o755,
118            uid: 0,
119            gid: 0,
120            content: None,
121            target: None,
122        }
123    }
124
125    pub fn file(path: impl Into<String>, content: impl Into<Vec<u8>>) -> Self {
126        Self {
127            path: path.into(),
128            kind: FilesystemEntryKind::File,
129            mode: 0o644,
130            uid: 0,
131            gid: 0,
132            content: Some(content.into()),
133            target: None,
134        }
135    }
136
137    pub fn symlink(path: impl Into<String>, target: impl Into<String>) -> Self {
138        Self {
139            path: path.into(),
140            kind: FilesystemEntryKind::Symlink,
141            mode: 0o777,
142            uid: 0,
143            gid: 0,
144            content: None,
145            target: Some(target.into()),
146        }
147    }
148}
149
150#[derive(Debug, Clone, PartialEq, Eq)]
151pub struct RootFilesystemSnapshot {
152    pub entries: Vec<FilesystemEntry>,
153}
154
155#[derive(Debug, Clone, Copy, PartialEq, Eq)]
156pub struct RootFilesystemImportLimits {
157    pub max_encoded_snapshot_bytes: Option<usize>,
158    pub max_filesystem_bytes: Option<u64>,
159    pub max_inode_count: Option<usize>,
160}
161
162impl RootFilesystemImportLimits {
163    pub fn from_resource_limits(limits: &impl RootFilesystemResourceLimits) -> Self {
164        Self {
165            max_encoded_snapshot_bytes: encoded_snapshot_limit(
166                limits.max_filesystem_bytes(),
167                limits.max_inode_count(),
168            ),
169            max_filesystem_bytes: limits.max_filesystem_bytes(),
170            max_inode_count: limits.max_inode_count(),
171        }
172    }
173}
174
175impl Default for RootFilesystemImportLimits {
176    fn default() -> Self {
177        Self {
178            max_encoded_snapshot_bytes: encoded_snapshot_limit(
179                Some(DEFAULT_MAX_FILESYSTEM_BYTES),
180                Some(DEFAULT_MAX_INODE_COUNT),
181            ),
182            max_filesystem_bytes: Some(DEFAULT_MAX_FILESYSTEM_BYTES),
183            max_inode_count: Some(DEFAULT_MAX_INODE_COUNT),
184        }
185    }
186}
187
188#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189pub enum RootFilesystemMode {
190    Ephemeral,
191    ReadOnly,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct RootFilesystemDescriptor {
196    pub mode: RootFilesystemMode,
197    pub disable_default_base_layer: bool,
198    pub lowers: Vec<RootFilesystemSnapshot>,
199    pub bootstrap_entries: Vec<FilesystemEntry>,
200}
201
202impl Default for RootFilesystemDescriptor {
203    fn default() -> Self {
204        Self {
205            mode: RootFilesystemMode::Ephemeral,
206            disable_default_base_layer: false,
207            lowers: Vec::new(),
208            bootstrap_entries: Vec::new(),
209        }
210    }
211}
212
213#[derive(Debug)]
214pub struct RootFileSystem {
215    overlay: OverlayFileSystem,
216    mode: RootFilesystemMode,
217    bootstrap_finished: bool,
218}
219
220impl RootFileSystem {
221    pub fn from_descriptor(
222        descriptor: RootFilesystemDescriptor,
223    ) -> Result<Self, RootFilesystemError> {
224        Self::from_descriptor_with_import_limits(descriptor, &RootFilesystemImportLimits::default())
225    }
226
227    pub fn from_descriptor_with_import_limits(
228        descriptor: RootFilesystemDescriptor,
229        limits: &RootFilesystemImportLimits,
230    ) -> Result<Self, RootFilesystemError> {
231        let mut lower_snapshots = descriptor.lowers.clone();
232        if !descriptor.disable_default_base_layer {
233            lower_snapshots.push(load_bundled_base_snapshot_with_limits(limits)?);
234        } else if lower_snapshots.is_empty() {
235            lower_snapshots.push(minimal_root_snapshot());
236        }
237        validate_descriptor_import_limits(
238            &lower_snapshots,
239            &descriptor.bootstrap_entries,
240            limits,
241            "root filesystem descriptor",
242        )?;
243
244        let lowers = lower_snapshots
245            .iter()
246            .map(snapshot_to_memory_filesystem)
247            .collect::<Result<Vec<_>, _>>()?;
248
249        let mut root = Self {
250            overlay: OverlayFileSystem::new(lowers, OverlayMode::Ephemeral),
251            mode: descriptor.mode,
252            bootstrap_finished: false,
253        };
254        root.apply_bootstrap_entries(&descriptor.bootstrap_entries)?;
255        Ok(root)
256    }
257
258    pub fn apply_bootstrap_entries(
259        &mut self,
260        entries: &[FilesystemEntry],
261    ) -> Result<(), RootFilesystemError> {
262        if self.bootstrap_finished {
263            return Err(RootFilesystemError::new(
264                "root filesystem bootstrap is already finished",
265            ));
266        }
267
268        for entry in sort_entries(entries.to_vec()) {
269            if is_kernel_reserved_bootstrap_path(&entry.path) {
270                continue;
271            }
272            apply_entry(&mut self.overlay, &entry)?;
273        }
274        Ok(())
275    }
276
277    pub fn finish_bootstrap(&mut self) {
278        if self.bootstrap_finished {
279            return;
280        }
281        self.bootstrap_finished = true;
282        if self.mode == RootFilesystemMode::ReadOnly {
283            self.overlay.lock_writes();
284        }
285    }
286
287    pub fn snapshot(&mut self) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
288        Ok(RootFilesystemSnapshot {
289            entries: snapshot_virtual_filesystem(&mut self.overlay, "/")?,
290        })
291    }
292
293    pub fn check_rename_copy_up_limits(
294        &mut self,
295        old_path: &str,
296        new_path: &str,
297        max_bytes: Option<u64>,
298        max_inodes: Option<usize>,
299    ) -> VfsResult<()> {
300        self.overlay
301            .check_rename_copy_up_limits(old_path, new_path, max_bytes, max_inodes)
302    }
303}
304
305impl VirtualFileSystem for RootFileSystem {
306    fn read_file(&mut self, path: &str) -> VfsResult<Vec<u8>> {
307        self.overlay.read_file(path)
308    }
309
310    fn read_dir(&mut self, path: &str) -> VfsResult<Vec<String>> {
311        self.overlay.read_dir(path)
312    }
313
314    fn read_dir_limited(&mut self, path: &str, max_entries: usize) -> VfsResult<Vec<String>> {
315        self.overlay.read_dir_limited(path, max_entries)
316    }
317
318    fn read_dir_with_types(&mut self, path: &str) -> VfsResult<Vec<VirtualDirEntry>> {
319        self.overlay.read_dir_with_types(path)
320    }
321
322    fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
323        self.overlay.write_file(path, content.into())
324    }
325
326    fn create_file_exclusive(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
327        self.overlay.create_file_exclusive(path, content.into())
328    }
329
330    fn append_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<u64> {
331        self.overlay.append_file(path, content.into())
332    }
333
334    fn create_dir(&mut self, path: &str) -> VfsResult<()> {
335        self.overlay.create_dir(path)
336    }
337
338    fn mkdir(&mut self, path: &str, recursive: bool) -> VfsResult<()> {
339        self.overlay.mkdir(path, recursive)
340    }
341
342    fn exists(&self, path: &str) -> bool {
343        self.overlay.exists(path)
344    }
345
346    fn stat(&mut self, path: &str) -> VfsResult<VirtualStat> {
347        self.overlay.stat(path)
348    }
349
350    fn remove_file(&mut self, path: &str) -> VfsResult<()> {
351        self.overlay.remove_file(path)
352    }
353
354    fn remove_dir(&mut self, path: &str) -> VfsResult<()> {
355        self.overlay.remove_dir(path)
356    }
357
358    fn rename(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
359        self.overlay.rename(old_path, new_path)
360    }
361
362    fn realpath(&self, path: &str) -> VfsResult<String> {
363        self.overlay.realpath(path)
364    }
365
366    fn symlink(&mut self, target: &str, link_path: &str) -> VfsResult<()> {
367        self.overlay.symlink(target, link_path)
368    }
369
370    fn read_link(&self, path: &str) -> VfsResult<String> {
371        self.overlay.read_link(path)
372    }
373
374    fn lstat(&self, path: &str) -> VfsResult<VirtualStat> {
375        self.overlay.lstat(path)
376    }
377
378    fn link(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
379        self.overlay.link(old_path, new_path)
380    }
381
382    fn chmod(&mut self, path: &str, mode: u32) -> VfsResult<()> {
383        self.overlay.chmod(path, mode)
384    }
385
386    fn chown(&mut self, path: &str, uid: u32, gid: u32) -> VfsResult<()> {
387        self.overlay.chown(path, uid, gid)
388    }
389
390    fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> VfsResult<()> {
391        self.overlay.utimes(path, atime_ms, mtime_ms)
392    }
393
394    fn utimes_spec(
395        &mut self,
396        path: &str,
397        atime: VirtualUtimeSpec,
398        mtime: VirtualUtimeSpec,
399        follow_symlinks: bool,
400    ) -> VfsResult<()> {
401        self.overlay
402            .utimes_spec(path, atime, mtime, follow_symlinks)
403    }
404
405    fn truncate(&mut self, path: &str, length: u64) -> VfsResult<()> {
406        self.overlay.truncate(path, length)
407    }
408
409    fn pread(&mut self, path: &str, offset: u64, length: usize) -> VfsResult<Vec<u8>> {
410        self.overlay.pread(path, offset, length)
411    }
412}
413
414#[derive(Debug, Deserialize)]
415struct RawBaseFilesystemSnapshot {
416    filesystem: RawFilesystemEntries,
417}
418
419#[derive(Debug, Deserialize)]
420struct RawFilesystemEntries {
421    entries: Vec<RawFilesystemEntry>,
422}
423
424#[derive(Debug, Deserialize)]
425struct RawFilesystemEntry {
426    path: String,
427    #[serde(rename = "type")]
428    kind: RawFilesystemEntryKind,
429    mode: String,
430    uid: u32,
431    gid: u32,
432    #[serde(default)]
433    content: Option<String>,
434    #[serde(default)]
435    encoding: Option<String>,
436    #[serde(default)]
437    target: Option<String>,
438}
439
440#[derive(Debug, Deserialize)]
441#[serde(rename_all = "snake_case")]
442enum RawFilesystemEntryKind {
443    File,
444    Directory,
445    Symlink,
446}
447
448#[derive(Debug, Deserialize)]
449struct RawSnapshotExport {
450    format: String,
451    filesystem: RawFilesystemEntries,
452}
453
454#[derive(Debug, serde::Serialize)]
455struct SnapshotExport<'a> {
456    format: &'static str,
457    filesystem: SnapshotFilesystem<'a>,
458}
459
460#[derive(Debug, serde::Serialize)]
461struct SnapshotFilesystem<'a> {
462    entries: Vec<SerializedFilesystemEntry<'a>>,
463}
464
465#[derive(Debug, serde::Serialize)]
466struct SerializedFilesystemEntry<'a> {
467    path: &'a str,
468    #[serde(rename = "type")]
469    kind: &'static str,
470    mode: String,
471    uid: u32,
472    gid: u32,
473    #[serde(skip_serializing_if = "Option::is_none")]
474    content: Option<String>,
475    #[serde(skip_serializing_if = "Option::is_none")]
476    encoding: Option<&'static str>,
477    #[serde(skip_serializing_if = "Option::is_none")]
478    target: Option<&'a str>,
479}
480
481pub fn encode_snapshot(snapshot: &RootFilesystemSnapshot) -> Result<Vec<u8>, RootFilesystemError> {
482    let serialized_entries = snapshot
483        .entries
484        .iter()
485        .map(|entry| SerializedFilesystemEntry {
486            path: &entry.path,
487            kind: match entry.kind {
488                FilesystemEntryKind::File => "file",
489                FilesystemEntryKind::Directory => "directory",
490                FilesystemEntryKind::Symlink => "symlink",
491            },
492            mode: format!("{:o}", entry.mode),
493            uid: entry.uid,
494            gid: entry.gid,
495            content: entry
496                .content
497                .as_ref()
498                .map(|bytes| base64::engine::general_purpose::STANDARD.encode(bytes)),
499            encoding: entry.content.as_ref().map(|_| "base64"),
500            target: entry.target.as_deref(),
501        })
502        .collect::<Vec<_>>();
503
504    serde_json::to_vec(&SnapshotExport {
505        format: ROOT_FILESYSTEM_SNAPSHOT_FORMAT,
506        filesystem: SnapshotFilesystem {
507            entries: serialized_entries,
508        },
509    })
510    .map_err(|error| RootFilesystemError::new(format!("serialize root snapshot: {error}")))
511}
512
513pub fn decode_snapshot(bytes: &[u8]) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
514    decode_snapshot_with_import_limits(bytes, &RootFilesystemImportLimits::default())
515}
516
517pub fn decode_snapshot_with_import_limits(
518    bytes: &[u8],
519    limits: &RootFilesystemImportLimits,
520) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
521    validate_encoded_snapshot_size(bytes, limits, "root snapshot")?;
522    let raw: RawSnapshotExport = serde_json::from_slice(bytes)
523        .map_err(|error| RootFilesystemError::new(format!("parse root snapshot: {error}")))?;
524    if !is_supported_root_filesystem_snapshot_format(&raw.format) {
525        return Err(RootFilesystemError::new(format!(
526            "unsupported root snapshot format: {}",
527            raw.format
528        )));
529    }
530    raw_entries_to_snapshot(raw.filesystem.entries, limits, "root snapshot")
531}
532
533pub fn is_supported_root_filesystem_snapshot_format(format: &str) -> bool {
534    format == ROOT_FILESYSTEM_SNAPSHOT_FORMAT
535        || format == LEGACY_AGENTOS_ROOT_FILESYSTEM_SNAPSHOT_FORMAT
536}
537
538fn load_bundled_base_snapshot_with_limits(
539    limits: &RootFilesystemImportLimits,
540) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
541    validate_encoded_snapshot_size(
542        BUNDLED_BASE_FILESYSTEM_JSON.as_bytes(),
543        limits,
544        "bundled base filesystem",
545    )?;
546    let raw: RawBaseFilesystemSnapshot = serde_json::from_str(BUNDLED_BASE_FILESYSTEM_JSON)
547        .map_err(|error| {
548            RootFilesystemError::new(format!("parse bundled base filesystem: {error}"))
549        })?;
550    raw_entries_to_snapshot(raw.filesystem.entries, limits, "bundled base filesystem")
551}
552
553fn minimal_root_snapshot() -> RootFilesystemSnapshot {
554    let mut entries = DEFAULT_ROOT_DIRECTORIES
555        .iter()
556        .map(|path| FilesystemEntry::directory(*path))
557        .collect::<Vec<_>>();
558    entries.push(FilesystemEntry::file("/usr/bin/env", Vec::new()));
559    RootFilesystemSnapshot { entries }
560}
561
562fn convert_raw_entry(raw: RawFilesystemEntry) -> Result<FilesystemEntry, RootFilesystemError> {
563    let content = match raw.content {
564        Some(content) => match raw.encoding.as_deref() {
565            Some("base64") => Some(
566                base64::engine::general_purpose::STANDARD
567                    .decode(content)
568                    .map_err(|error| {
569                        RootFilesystemError::new(format!(
570                            "decode base64 content for {}: {error}",
571                            raw.path
572                        ))
573                    })?,
574            ),
575            Some("utf8") | None => Some(content.into_bytes()),
576            Some(other) => {
577                return Err(RootFilesystemError::new(format!(
578                    "unsupported content encoding for {}: {other}",
579                    raw.path
580                )));
581            }
582        },
583        None => None,
584    };
585
586    Ok(FilesystemEntry {
587        path: raw.path,
588        kind: match raw.kind {
589            RawFilesystemEntryKind::File => FilesystemEntryKind::File,
590            RawFilesystemEntryKind::Directory => FilesystemEntryKind::Directory,
591            RawFilesystemEntryKind::Symlink => FilesystemEntryKind::Symlink,
592        },
593        mode: u32::from_str_radix(&raw.mode, 8).map_err(|error| {
594            RootFilesystemError::new(format!("parse mode {}: {error}", raw.mode))
595        })?,
596        uid: raw.uid,
597        gid: raw.gid,
598        content,
599        target: raw.target,
600    })
601}
602
603fn raw_entries_to_snapshot(
604    raw_entries: Vec<RawFilesystemEntry>,
605    limits: &RootFilesystemImportLimits,
606    context: &str,
607) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
608    if let Some(limit) = limits.max_inode_count {
609        if raw_entries.len() > limit {
610            return Err(RootFilesystemError::new(format!(
611                "{context} contains {} entries, exceeding limit {limit}",
612                raw_entries.len()
613            )));
614        }
615    }
616
617    let entries = raw_entries
618        .into_iter()
619        .map(convert_raw_entry)
620        .collect::<Result<Vec<_>, _>>()?;
621    validate_entry_import_limits(&entries, limits, context)?;
622    Ok(RootFilesystemSnapshot { entries })
623}
624
625pub fn validate_snapshot_import_limits(
626    snapshot: &RootFilesystemSnapshot,
627    limits: &RootFilesystemImportLimits,
628    context: &str,
629) -> Result<(), RootFilesystemError> {
630    validate_entry_import_limits(&snapshot.entries, limits, context)
631}
632
633fn validate_descriptor_import_limits(
634    lowers: &[RootFilesystemSnapshot],
635    bootstrap_entries: &[FilesystemEntry],
636    limits: &RootFilesystemImportLimits,
637    context: &str,
638) -> Result<(), RootFilesystemError> {
639    let explicit_entry_count = lowers
640        .iter()
641        .map(|snapshot| snapshot.entries.len())
642        .sum::<usize>()
643        .saturating_add(bootstrap_entries.len());
644    let mut inode_paths = BTreeSet::new();
645    for snapshot in lowers {
646        collect_materialized_entry_paths(&snapshot.entries, &mut inode_paths);
647    }
648    collect_materialized_entry_paths(bootstrap_entries, &mut inode_paths);
649    let inode_count = inode_paths.len();
650    if let Some(limit) = limits.max_inode_count {
651        if explicit_entry_count > limit {
652            return Err(RootFilesystemError::new(format!(
653                "{context} contains {explicit_entry_count} entries, exceeding limit {limit}"
654            )));
655        }
656
657        if inode_count > limit {
658            return Err(RootFilesystemError::new(format!(
659                "{context} contains {inode_count} entries, exceeding limit {limit}"
660            )));
661        }
662    }
663
664    let mut bytes = 0_u64;
665    for snapshot in lowers {
666        bytes = bytes.saturating_add(entry_content_bytes(&snapshot.entries));
667    }
668    bytes = bytes.saturating_add(entry_content_bytes(bootstrap_entries));
669    if let Some(limit) = limits.max_filesystem_bytes {
670        if bytes > limit {
671            return Err(RootFilesystemError::new(format!(
672                "{context} contains {bytes} bytes, exceeding limit {limit}"
673            )));
674        }
675    }
676    Ok(())
677}
678
679fn validate_entry_import_limits(
680    entries: &[FilesystemEntry],
681    limits: &RootFilesystemImportLimits,
682    context: &str,
683) -> Result<(), RootFilesystemError> {
684    if let Some(limit) = limits.max_inode_count {
685        if entries.len() > limit {
686            return Err(RootFilesystemError::new(format!(
687                "{context} contains {} entries, exceeding limit {limit}",
688                entries.len()
689            )));
690        }
691
692        let inode_count = materialized_entry_inode_count(entries);
693        if inode_count > limit {
694            return Err(RootFilesystemError::new(format!(
695                "{context} contains {inode_count} entries, exceeding limit {limit}"
696            )));
697        }
698    }
699
700    let bytes = entry_content_bytes(entries);
701    if let Some(limit) = limits.max_filesystem_bytes {
702        if bytes > limit {
703            return Err(RootFilesystemError::new(format!(
704                "{context} contains {bytes} bytes, exceeding limit {limit}"
705            )));
706        }
707    }
708    Ok(())
709}
710
711fn validate_encoded_snapshot_size(
712    bytes: &[u8],
713    limits: &RootFilesystemImportLimits,
714    context: &str,
715) -> Result<(), RootFilesystemError> {
716    if let Some(limit) = limits.max_encoded_snapshot_bytes {
717        if bytes.len() > limit {
718            return Err(RootFilesystemError::new(format!(
719                "{context} contains {} encoded bytes, exceeding limit {limit}",
720                bytes.len()
721            )));
722        }
723    }
724    Ok(())
725}
726
727fn entry_content_bytes(entries: &[FilesystemEntry]) -> u64 {
728    entries.iter().fold(0_u64, |total, entry| {
729        total.saturating_add(match entry.kind {
730            FilesystemEntryKind::File => entry
731                .content
732                .as_ref()
733                .map(|content| usize_to_u64(content.len()))
734                .unwrap_or(0),
735            FilesystemEntryKind::Directory => 0,
736            FilesystemEntryKind::Symlink => entry
737                .target
738                .as_ref()
739                .map(|target| usize_to_u64(target.len()))
740                .unwrap_or(0),
741        })
742    })
743}
744
745fn materialized_entry_inode_count(entries: &[FilesystemEntry]) -> usize {
746    let mut paths = BTreeSet::new();
747    collect_materialized_entry_paths(entries, &mut paths);
748    paths.len()
749}
750
751fn collect_materialized_entry_paths(entries: &[FilesystemEntry], paths: &mut BTreeSet<String>) {
752    for entry in entries {
753        collect_materialized_path(&entry.path, paths);
754    }
755}
756
757fn collect_materialized_path(path: &str, paths: &mut BTreeSet<String>) {
758    let normalized = normalize_path(path);
759    paths.insert(normalized.clone());
760
761    let mut parent = String::new();
762    let segments = normalized
763        .split('/')
764        .filter(|segment| !segment.is_empty())
765        .collect::<Vec<_>>();
766    for segment in segments.iter().take(segments.len().saturating_sub(1)) {
767        parent.push('/');
768        parent.push_str(segment);
769        paths.insert(parent.clone());
770    }
771}
772
773fn usize_to_u64(value: usize) -> u64 {
774    u64::try_from(value).unwrap_or(u64::MAX)
775}
776
777const fn u64_limit_to_usize(value: u64) -> usize {
778    if value > usize::MAX as u64 {
779        usize::MAX
780    } else {
781        value as usize
782    }
783}
784
785const fn encoded_snapshot_limit(
786    max_filesystem_bytes: Option<u64>,
787    max_inode_count: Option<usize>,
788) -> Option<usize> {
789    let Some(max_filesystem_bytes) = max_filesystem_bytes else {
790        return None;
791    };
792
793    Some(
794        u64_limit_to_usize(max_filesystem_bytes)
795            .saturating_mul(2)
796            .saturating_add(match max_inode_count {
797                Some(max_inode_count) => {
798                    max_inode_count.saturating_mul(ROOT_FILESYSTEM_SNAPSHOT_ENTRY_OVERHEAD_BYTES)
799                }
800                None => 0,
801            })
802            .saturating_add(ROOT_FILESYSTEM_SNAPSHOT_FIXED_OVERHEAD_BYTES),
803    )
804}
805
806fn snapshot_to_memory_filesystem(
807    snapshot: &RootFilesystemSnapshot,
808) -> Result<MemoryFileSystem, RootFilesystemError> {
809    let mut filesystem = MemoryFileSystem::new();
810    for entry in sort_entries(snapshot.entries.clone()) {
811        apply_entry_to_memory_filesystem(&mut filesystem, &entry)?;
812    }
813    Ok(filesystem)
814}
815
816fn apply_entry_to_memory_filesystem(
817    filesystem: &mut MemoryFileSystem,
818    entry: &FilesystemEntry,
819) -> Result<(), RootFilesystemError> {
820    ensure_parent_directories(filesystem, &entry.path)?;
821
822    match entry.kind {
823        FilesystemEntryKind::Directory => {
824            filesystem.mkdir(&entry.path, true)?;
825            filesystem.chmod(&entry.path, entry.mode)?;
826            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
827        }
828        FilesystemEntryKind::File => {
829            filesystem.write_file(&entry.path, entry.content.clone().unwrap_or_default())?;
830            filesystem.chmod(&entry.path, entry.mode)?;
831            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
832        }
833        FilesystemEntryKind::Symlink => {
834            let Some(target) = entry.target.as_deref() else {
835                return Err(RootFilesystemError::new(format!(
836                    "missing symlink target for {}",
837                    entry.path
838                )));
839            };
840            filesystem.symlink_with_metadata(
841                target,
842                &entry.path,
843                entry.mode,
844                entry.uid,
845                entry.gid,
846            )?;
847        }
848    }
849
850    Ok(())
851}
852
853fn apply_entry(
854    filesystem: &mut impl VirtualFileSystem,
855    entry: &FilesystemEntry,
856) -> Result<(), RootFilesystemError> {
857    ensure_parent_directories(filesystem, &entry.path)?;
858
859    match entry.kind {
860        FilesystemEntryKind::Directory => {
861            filesystem.mkdir(&entry.path, true)?;
862            filesystem.chmod(&entry.path, entry.mode)?;
863            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
864        }
865        FilesystemEntryKind::File => {
866            filesystem.write_file(&entry.path, entry.content.clone().unwrap_or_default())?;
867            filesystem.chmod(&entry.path, entry.mode)?;
868            filesystem.chown(&entry.path, entry.uid, entry.gid)?;
869        }
870        FilesystemEntryKind::Symlink => {
871            let Some(target) = entry.target.as_deref() else {
872                return Err(RootFilesystemError::new(format!(
873                    "missing symlink target for {}",
874                    entry.path
875                )));
876            };
877            filesystem.symlink(target, &entry.path)?;
878        }
879    }
880
881    Ok(())
882}
883
884fn ensure_parent_directories(
885    filesystem: &mut impl VirtualFileSystem,
886    path: &str,
887) -> Result<(), RootFilesystemError> {
888    let normalized = normalize_path(path);
889    let mut current = String::new();
890    let segments = normalized
891        .split('/')
892        .filter(|segment| !segment.is_empty())
893        .collect::<Vec<_>>();
894
895    for segment in segments.iter().take(segments.len().saturating_sub(1)) {
896        current.push('/');
897        current.push_str(segment);
898
899        if filesystem.exists(&current) {
900            continue;
901        }
902
903        filesystem.create_dir(&current)?;
904        filesystem.chmod(&current, 0o755)?;
905        filesystem.chown(&current, 0, 0)?;
906    }
907
908    Ok(())
909}
910
911fn sort_entries(mut entries: Vec<FilesystemEntry>) -> Vec<FilesystemEntry> {
912    entries.sort_by(|left, right| {
913        let depth_left = if left.path == "/" {
914            0
915        } else {
916            left.path.split('/').filter(|part| !part.is_empty()).count()
917        };
918        let depth_right = if right.path == "/" {
919            0
920        } else {
921            right
922                .path
923                .split('/')
924                .filter(|part| !part.is_empty())
925                .count()
926        };
927        depth_left
928            .cmp(&depth_right)
929            .then_with(|| left.path.cmp(&right.path))
930    });
931    entries
932}
933
934fn snapshot_virtual_filesystem(
935    filesystem: &mut impl VirtualFileSystem,
936    root_path: &str,
937) -> Result<Vec<FilesystemEntry>, RootFilesystemError> {
938    let mut entries = Vec::new();
939    snapshot_path(filesystem, root_path, &mut entries)?;
940    Ok(entries)
941}
942
943fn snapshot_path(
944    filesystem: &mut impl VirtualFileSystem,
945    path: &str,
946    entries: &mut Vec<FilesystemEntry>,
947) -> Result<(), RootFilesystemError> {
948    let stat = if path == "/" {
949        filesystem.stat(path)?
950    } else {
951        filesystem.lstat(path)?
952    };
953
954    if stat.is_symbolic_link {
955        entries.push(FilesystemEntry {
956            path: path.to_owned(),
957            kind: FilesystemEntryKind::Symlink,
958            mode: stat.mode,
959            uid: stat.uid,
960            gid: stat.gid,
961            content: None,
962            target: Some(filesystem.read_link(path)?),
963        });
964        return Ok(());
965    }
966
967    if stat.is_directory {
968        entries.push(FilesystemEntry {
969            path: path.to_owned(),
970            kind: FilesystemEntryKind::Directory,
971            mode: stat.mode,
972            uid: stat.uid,
973            gid: stat.gid,
974            content: None,
975            target: None,
976        });
977
978        let mut children = filesystem
979            .read_dir_with_types(path)?
980            .into_iter()
981            .map(|entry| entry.name)
982            .filter(|name| name != "." && name != "..")
983            .collect::<Vec<_>>();
984        children.sort();
985
986        for child in children {
987            let child_path = if path == "/" {
988                format!("/{child}")
989            } else {
990                format!("{path}/{child}")
991            };
992            snapshot_path(filesystem, &child_path, entries)?;
993        }
994        return Ok(());
995    }
996
997    entries.push(FilesystemEntry {
998        path: path.to_owned(),
999        kind: FilesystemEntryKind::File,
1000        mode: stat.mode,
1001        uid: stat.uid,
1002        gid: stat.gid,
1003        content: Some(filesystem.read_file(path)?),
1004        target: None,
1005    });
1006    Ok(())
1007}
1008
1009fn is_kernel_reserved_bootstrap_path(path: &str) -> bool {
1010    let normalized = normalize_path(path);
1011    KERNEL_RESERVED_BOOTSTRAP_PATH_PREFIXES
1012        .iter()
1013        .any(|prefix| normalized == *prefix || normalized.starts_with(&format!("{prefix}/")))
1014}