1use crate::overlay_fs::{OverlayFileSystem, OverlayMode};
2use crate::resource_accounting::{
3 ResourceLimits, DEFAULT_MAX_FILESYSTEM_BYTES, DEFAULT_MAX_INODE_COUNT,
4};
5use crate::vfs::{
6 normalize_path, MemoryFileSystem, VfsError, VfsResult, VirtualFileSystem, VirtualUtimeSpec,
7 MAX_PATH_LENGTH,
8};
9use base64::Engine;
10use serde::Deserialize;
11use std::collections::BTreeSet;
12
13const BUNDLED_BASE_FILESYSTEM_JSON: &str =
18 include_str!(concat!(env!("OUT_DIR"), "/base-filesystem.json"));
19pub const ROOT_FILESYSTEM_SNAPSHOT_FORMAT: &str = "secure_exec_filesystem_snapshot_v1";
20const LEGACY_AGENT_OS_ROOT_FILESYSTEM_SNAPSHOT_FORMAT: &str = "agent_os_filesystem_snapshot_v1";
21const ROOT_FILESYSTEM_SNAPSHOT_FIXED_OVERHEAD_BYTES: usize = 4 * 1024;
22const ROOT_FILESYSTEM_SNAPSHOT_ENTRY_OVERHEAD_BYTES: usize = MAX_PATH_LENGTH + 1024;
23const DEFAULT_ROOT_DIRECTORIES: &[&str] = &[
24 "/",
25 "/dev",
26 "/proc",
27 "/tmp",
28 "/bin",
29 "/lib",
30 "/sbin",
31 "/boot",
32 "/etc",
33 "/root",
34 "/run",
35 "/srv",
36 "/sys",
37 "/opt",
38 "/mnt",
39 "/media",
40 "/home",
41 "/usr",
42 "/usr/bin",
43 "/usr/games",
44 "/usr/include",
45 "/usr/lib",
46 "/usr/libexec",
47 "/usr/man",
48 "/usr/local",
49 "/usr/local/bin",
50 "/usr/sbin",
51 "/usr/share",
52 "/usr/share/man",
53 "/var",
54 "/var/cache",
55 "/var/empty",
56 "/var/lib",
57 "/var/lock",
58 "/var/log",
59 "/var/run",
60 "/var/spool",
61 "/var/tmp",
62 "/etc/agentos",
63];
64const KERNEL_RESERVED_BOOTSTRAP_PATH_PREFIXES: &[&str] = &["/dev", "/proc", "/sys"];
65
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub struct RootFilesystemError {
68 message: String,
69}
70
71impl RootFilesystemError {
72 fn new(message: impl Into<String>) -> Self {
73 Self {
74 message: message.into(),
75 }
76 }
77}
78
79impl std::fmt::Display for RootFilesystemError {
80 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81 f.write_str(&self.message)
82 }
83}
84
85impl std::error::Error for RootFilesystemError {}
86
87impl From<VfsError> for RootFilesystemError {
88 fn from(error: VfsError) -> Self {
89 Self::new(error.to_string())
90 }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum FilesystemEntryKind {
95 File,
96 Directory,
97 Symlink,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub struct FilesystemEntry {
102 pub path: String,
103 pub kind: FilesystemEntryKind,
104 pub mode: u32,
105 pub uid: u32,
106 pub gid: u32,
107 pub content: Option<Vec<u8>>,
108 pub target: Option<String>,
109}
110
111impl FilesystemEntry {
112 pub fn directory(path: impl Into<String>) -> Self {
113 Self {
114 path: path.into(),
115 kind: FilesystemEntryKind::Directory,
116 mode: 0o755,
117 uid: 0,
118 gid: 0,
119 content: None,
120 target: None,
121 }
122 }
123
124 pub fn file(path: impl Into<String>, content: impl Into<Vec<u8>>) -> Self {
125 Self {
126 path: path.into(),
127 kind: FilesystemEntryKind::File,
128 mode: 0o644,
129 uid: 0,
130 gid: 0,
131 content: Some(content.into()),
132 target: None,
133 }
134 }
135
136 pub fn symlink(path: impl Into<String>, target: impl Into<String>) -> Self {
137 Self {
138 path: path.into(),
139 kind: FilesystemEntryKind::Symlink,
140 mode: 0o777,
141 uid: 0,
142 gid: 0,
143 content: None,
144 target: Some(target.into()),
145 }
146 }
147}
148
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct RootFilesystemSnapshot {
151 pub entries: Vec<FilesystemEntry>,
152}
153
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub struct RootFilesystemImportLimits {
156 pub max_encoded_snapshot_bytes: Option<usize>,
157 pub max_filesystem_bytes: Option<u64>,
158 pub max_inode_count: Option<usize>,
159}
160
161impl RootFilesystemImportLimits {
162 pub fn from_resource_limits(limits: &ResourceLimits) -> Self {
163 Self {
164 max_encoded_snapshot_bytes: encoded_snapshot_limit(
165 limits.max_filesystem_bytes,
166 limits.max_inode_count,
167 ),
168 max_filesystem_bytes: limits.max_filesystem_bytes,
169 max_inode_count: limits.max_inode_count,
170 }
171 }
172}
173
174impl Default for RootFilesystemImportLimits {
175 fn default() -> Self {
176 Self {
177 max_encoded_snapshot_bytes: encoded_snapshot_limit(
178 Some(DEFAULT_MAX_FILESYSTEM_BYTES),
179 Some(DEFAULT_MAX_INODE_COUNT),
180 ),
181 max_filesystem_bytes: Some(DEFAULT_MAX_FILESYSTEM_BYTES),
182 max_inode_count: Some(DEFAULT_MAX_INODE_COUNT),
183 }
184 }
185}
186
187#[derive(Debug, Clone, Copy, PartialEq, Eq)]
188pub enum RootFilesystemMode {
189 Ephemeral,
190 ReadOnly,
191}
192
193#[derive(Debug, Clone, PartialEq, Eq)]
194pub struct RootFilesystemDescriptor {
195 pub mode: RootFilesystemMode,
196 pub disable_default_base_layer: bool,
197 pub lowers: Vec<RootFilesystemSnapshot>,
198 pub bootstrap_entries: Vec<FilesystemEntry>,
199}
200
201impl Default for RootFilesystemDescriptor {
202 fn default() -> Self {
203 Self {
204 mode: RootFilesystemMode::Ephemeral,
205 disable_default_base_layer: false,
206 lowers: Vec::new(),
207 bootstrap_entries: Vec::new(),
208 }
209 }
210}
211
212#[derive(Debug)]
213pub struct RootFileSystem {
214 overlay: OverlayFileSystem,
215 mode: RootFilesystemMode,
216 bootstrap_finished: bool,
217}
218
219impl RootFileSystem {
220 pub fn from_descriptor(
221 descriptor: RootFilesystemDescriptor,
222 ) -> Result<Self, RootFilesystemError> {
223 Self::from_descriptor_with_import_limits(descriptor, &RootFilesystemImportLimits::default())
224 }
225
226 pub fn from_descriptor_with_import_limits(
227 descriptor: RootFilesystemDescriptor,
228 limits: &RootFilesystemImportLimits,
229 ) -> Result<Self, RootFilesystemError> {
230 let mut lower_snapshots = descriptor.lowers.clone();
231 if !descriptor.disable_default_base_layer {
232 lower_snapshots.push(load_bundled_base_snapshot_with_limits(limits)?);
233 } else if lower_snapshots.is_empty() {
234 lower_snapshots.push(minimal_root_snapshot());
235 }
236 validate_descriptor_import_limits(
237 &lower_snapshots,
238 &descriptor.bootstrap_entries,
239 limits,
240 "root filesystem descriptor",
241 )?;
242
243 let lowers = lower_snapshots
244 .iter()
245 .map(snapshot_to_memory_filesystem)
246 .collect::<Result<Vec<_>, _>>()?;
247
248 let mut root = Self {
249 overlay: OverlayFileSystem::new(lowers, OverlayMode::Ephemeral),
250 mode: descriptor.mode,
251 bootstrap_finished: false,
252 };
253 root.apply_bootstrap_entries(&descriptor.bootstrap_entries)?;
254 Ok(root)
255 }
256
257 pub fn apply_bootstrap_entries(
258 &mut self,
259 entries: &[FilesystemEntry],
260 ) -> Result<(), RootFilesystemError> {
261 if self.bootstrap_finished {
262 return Err(RootFilesystemError::new(
263 "root filesystem bootstrap is already finished",
264 ));
265 }
266
267 for entry in sort_entries(entries.to_vec()) {
268 if is_kernel_reserved_bootstrap_path(&entry.path) {
269 continue;
270 }
271 apply_entry(&mut self.overlay, &entry)?;
272 }
273 Ok(())
274 }
275
276 pub fn finish_bootstrap(&mut self) {
277 if self.bootstrap_finished {
278 return;
279 }
280 self.bootstrap_finished = true;
281 if self.mode == RootFilesystemMode::ReadOnly {
282 self.overlay.lock_writes();
283 }
284 }
285
286 pub fn snapshot(&mut self) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
287 Ok(RootFilesystemSnapshot {
288 entries: snapshot_virtual_filesystem(&mut self.overlay, "/")?,
289 })
290 }
291
292 pub fn check_rename_copy_up_limits(
293 &mut self,
294 old_path: &str,
295 new_path: &str,
296 max_bytes: Option<u64>,
297 max_inodes: Option<usize>,
298 ) -> VfsResult<()> {
299 self.overlay
300 .check_rename_copy_up_limits(old_path, new_path, max_bytes, max_inodes)
301 }
302}
303
304impl VirtualFileSystem for RootFileSystem {
305 fn read_file(&mut self, path: &str) -> VfsResult<Vec<u8>> {
306 self.overlay.read_file(path)
307 }
308
309 fn read_dir(&mut self, path: &str) -> VfsResult<Vec<String>> {
310 self.overlay.read_dir(path)
311 }
312
313 fn read_dir_limited(&mut self, path: &str, max_entries: usize) -> VfsResult<Vec<String>> {
314 self.overlay.read_dir_limited(path, max_entries)
315 }
316
317 fn read_dir_with_types(&mut self, path: &str) -> VfsResult<Vec<crate::vfs::VirtualDirEntry>> {
318 self.overlay.read_dir_with_types(path)
319 }
320
321 fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
322 self.overlay.write_file(path, content.into())
323 }
324
325 fn create_file_exclusive(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
326 self.overlay.create_file_exclusive(path, content.into())
327 }
328
329 fn append_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<u64> {
330 self.overlay.append_file(path, content.into())
331 }
332
333 fn create_dir(&mut self, path: &str) -> VfsResult<()> {
334 self.overlay.create_dir(path)
335 }
336
337 fn mkdir(&mut self, path: &str, recursive: bool) -> VfsResult<()> {
338 self.overlay.mkdir(path, recursive)
339 }
340
341 fn exists(&self, path: &str) -> bool {
342 self.overlay.exists(path)
343 }
344
345 fn stat(&mut self, path: &str) -> VfsResult<crate::vfs::VirtualStat> {
346 self.overlay.stat(path)
347 }
348
349 fn remove_file(&mut self, path: &str) -> VfsResult<()> {
350 self.overlay.remove_file(path)
351 }
352
353 fn remove_dir(&mut self, path: &str) -> VfsResult<()> {
354 self.overlay.remove_dir(path)
355 }
356
357 fn rename(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
358 self.overlay.rename(old_path, new_path)
359 }
360
361 fn realpath(&self, path: &str) -> VfsResult<String> {
362 self.overlay.realpath(path)
363 }
364
365 fn symlink(&mut self, target: &str, link_path: &str) -> VfsResult<()> {
366 self.overlay.symlink(target, link_path)
367 }
368
369 fn read_link(&self, path: &str) -> VfsResult<String> {
370 self.overlay.read_link(path)
371 }
372
373 fn lstat(&self, path: &str) -> VfsResult<crate::vfs::VirtualStat> {
374 self.overlay.lstat(path)
375 }
376
377 fn link(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
378 self.overlay.link(old_path, new_path)
379 }
380
381 fn chmod(&mut self, path: &str, mode: u32) -> VfsResult<()> {
382 self.overlay.chmod(path, mode)
383 }
384
385 fn chown(&mut self, path: &str, uid: u32, gid: u32) -> VfsResult<()> {
386 self.overlay.chown(path, uid, gid)
387 }
388
389 fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> VfsResult<()> {
390 self.overlay.utimes(path, atime_ms, mtime_ms)
391 }
392
393 fn utimes_spec(
394 &mut self,
395 path: &str,
396 atime: VirtualUtimeSpec,
397 mtime: VirtualUtimeSpec,
398 follow_symlinks: bool,
399 ) -> VfsResult<()> {
400 self.overlay
401 .utimes_spec(path, atime, mtime, follow_symlinks)
402 }
403
404 fn truncate(&mut self, path: &str, length: u64) -> VfsResult<()> {
405 self.overlay.truncate(path, length)
406 }
407
408 fn pread(&mut self, path: &str, offset: u64, length: usize) -> VfsResult<Vec<u8>> {
409 self.overlay.pread(path, offset, length)
410 }
411}
412
413#[derive(Debug, Deserialize)]
414struct RawBaseFilesystemSnapshot {
415 filesystem: RawFilesystemEntries,
416}
417
418#[derive(Debug, Deserialize)]
419struct RawFilesystemEntries {
420 entries: Vec<RawFilesystemEntry>,
421}
422
423#[derive(Debug, Deserialize)]
424struct RawFilesystemEntry {
425 path: String,
426 #[serde(rename = "type")]
427 kind: RawFilesystemEntryKind,
428 mode: String,
429 uid: u32,
430 gid: u32,
431 #[serde(default)]
432 content: Option<String>,
433 #[serde(default)]
434 encoding: Option<String>,
435 #[serde(default)]
436 target: Option<String>,
437}
438
439#[derive(Debug, Deserialize)]
440#[serde(rename_all = "snake_case")]
441enum RawFilesystemEntryKind {
442 File,
443 Directory,
444 Symlink,
445}
446
447#[derive(Debug, Deserialize)]
448struct RawSnapshotExport {
449 format: String,
450 filesystem: RawFilesystemEntries,
451}
452
453#[derive(Debug, serde::Serialize)]
454struct SnapshotExport<'a> {
455 format: &'static str,
456 filesystem: SnapshotFilesystem<'a>,
457}
458
459#[derive(Debug, serde::Serialize)]
460struct SnapshotFilesystem<'a> {
461 entries: Vec<SerializedFilesystemEntry<'a>>,
462}
463
464#[derive(Debug, serde::Serialize)]
465struct SerializedFilesystemEntry<'a> {
466 path: &'a str,
467 #[serde(rename = "type")]
468 kind: &'static str,
469 mode: String,
470 uid: u32,
471 gid: u32,
472 #[serde(skip_serializing_if = "Option::is_none")]
473 content: Option<String>,
474 #[serde(skip_serializing_if = "Option::is_none")]
475 encoding: Option<&'static str>,
476 #[serde(skip_serializing_if = "Option::is_none")]
477 target: Option<&'a str>,
478}
479
480pub fn encode_snapshot(snapshot: &RootFilesystemSnapshot) -> Result<Vec<u8>, RootFilesystemError> {
481 let serialized_entries = snapshot
482 .entries
483 .iter()
484 .map(|entry| SerializedFilesystemEntry {
485 path: &entry.path,
486 kind: match entry.kind {
487 FilesystemEntryKind::File => "file",
488 FilesystemEntryKind::Directory => "directory",
489 FilesystemEntryKind::Symlink => "symlink",
490 },
491 mode: format!("{:o}", entry.mode),
492 uid: entry.uid,
493 gid: entry.gid,
494 content: entry
495 .content
496 .as_ref()
497 .map(|bytes| base64::engine::general_purpose::STANDARD.encode(bytes)),
498 encoding: entry.content.as_ref().map(|_| "base64"),
499 target: entry.target.as_deref(),
500 })
501 .collect::<Vec<_>>();
502
503 serde_json::to_vec(&SnapshotExport {
504 format: ROOT_FILESYSTEM_SNAPSHOT_FORMAT,
505 filesystem: SnapshotFilesystem {
506 entries: serialized_entries,
507 },
508 })
509 .map_err(|error| RootFilesystemError::new(format!("serialize root snapshot: {error}")))
510}
511
512pub fn decode_snapshot(bytes: &[u8]) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
513 decode_snapshot_with_import_limits(bytes, &RootFilesystemImportLimits::default())
514}
515
516pub fn decode_snapshot_with_import_limits(
517 bytes: &[u8],
518 limits: &RootFilesystemImportLimits,
519) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
520 validate_encoded_snapshot_size(bytes, limits, "root snapshot")?;
521 let raw: RawSnapshotExport = serde_json::from_slice(bytes)
522 .map_err(|error| RootFilesystemError::new(format!("parse root snapshot: {error}")))?;
523 if !is_supported_root_filesystem_snapshot_format(&raw.format) {
524 return Err(RootFilesystemError::new(format!(
525 "unsupported root snapshot format: {}",
526 raw.format
527 )));
528 }
529 raw_entries_to_snapshot(raw.filesystem.entries, limits, "root snapshot")
530}
531
532pub fn is_supported_root_filesystem_snapshot_format(format: &str) -> bool {
533 format == ROOT_FILESYSTEM_SNAPSHOT_FORMAT
534 || format == LEGACY_AGENT_OS_ROOT_FILESYSTEM_SNAPSHOT_FORMAT
535}
536
537fn load_bundled_base_snapshot_with_limits(
538 limits: &RootFilesystemImportLimits,
539) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
540 validate_encoded_snapshot_size(
541 BUNDLED_BASE_FILESYSTEM_JSON.as_bytes(),
542 limits,
543 "bundled base filesystem",
544 )?;
545 let raw: RawBaseFilesystemSnapshot = serde_json::from_str(BUNDLED_BASE_FILESYSTEM_JSON)
546 .map_err(|error| {
547 RootFilesystemError::new(format!("parse bundled base filesystem: {error}"))
548 })?;
549 raw_entries_to_snapshot(raw.filesystem.entries, limits, "bundled base filesystem")
550}
551
552fn minimal_root_snapshot() -> RootFilesystemSnapshot {
553 let mut entries = DEFAULT_ROOT_DIRECTORIES
554 .iter()
555 .map(|path| FilesystemEntry::directory(*path))
556 .collect::<Vec<_>>();
557 entries.push(FilesystemEntry::file("/usr/bin/env", Vec::new()));
558 RootFilesystemSnapshot { entries }
559}
560
561fn convert_raw_entry(raw: RawFilesystemEntry) -> Result<FilesystemEntry, RootFilesystemError> {
562 let content = match raw.content {
563 Some(content) => match raw.encoding.as_deref() {
564 Some("base64") => Some(
565 base64::engine::general_purpose::STANDARD
566 .decode(content)
567 .map_err(|error| {
568 RootFilesystemError::new(format!(
569 "decode base64 content for {}: {error}",
570 raw.path
571 ))
572 })?,
573 ),
574 Some("utf8") | None => Some(content.into_bytes()),
575 Some(other) => {
576 return Err(RootFilesystemError::new(format!(
577 "unsupported content encoding for {}: {other}",
578 raw.path
579 )));
580 }
581 },
582 None => None,
583 };
584
585 Ok(FilesystemEntry {
586 path: raw.path,
587 kind: match raw.kind {
588 RawFilesystemEntryKind::File => FilesystemEntryKind::File,
589 RawFilesystemEntryKind::Directory => FilesystemEntryKind::Directory,
590 RawFilesystemEntryKind::Symlink => FilesystemEntryKind::Symlink,
591 },
592 mode: u32::from_str_radix(&raw.mode, 8).map_err(|error| {
593 RootFilesystemError::new(format!("parse mode {}: {error}", raw.mode))
594 })?,
595 uid: raw.uid,
596 gid: raw.gid,
597 content,
598 target: raw.target,
599 })
600}
601
602fn raw_entries_to_snapshot(
603 raw_entries: Vec<RawFilesystemEntry>,
604 limits: &RootFilesystemImportLimits,
605 context: &str,
606) -> Result<RootFilesystemSnapshot, RootFilesystemError> {
607 if let Some(limit) = limits.max_inode_count {
608 if raw_entries.len() > limit {
609 return Err(RootFilesystemError::new(format!(
610 "{context} contains {} entries, exceeding limit {limit}",
611 raw_entries.len()
612 )));
613 }
614 }
615
616 let entries = raw_entries
617 .into_iter()
618 .map(convert_raw_entry)
619 .collect::<Result<Vec<_>, _>>()?;
620 validate_entry_import_limits(&entries, limits, context)?;
621 Ok(RootFilesystemSnapshot { entries })
622}
623
624pub fn validate_snapshot_import_limits(
625 snapshot: &RootFilesystemSnapshot,
626 limits: &RootFilesystemImportLimits,
627 context: &str,
628) -> Result<(), RootFilesystemError> {
629 validate_entry_import_limits(&snapshot.entries, limits, context)
630}
631
632fn validate_descriptor_import_limits(
633 lowers: &[RootFilesystemSnapshot],
634 bootstrap_entries: &[FilesystemEntry],
635 limits: &RootFilesystemImportLimits,
636 context: &str,
637) -> Result<(), RootFilesystemError> {
638 let explicit_entry_count = lowers
639 .iter()
640 .map(|snapshot| snapshot.entries.len())
641 .sum::<usize>()
642 .saturating_add(bootstrap_entries.len());
643 let mut inode_paths = BTreeSet::new();
644 for snapshot in lowers {
645 collect_materialized_entry_paths(&snapshot.entries, &mut inode_paths);
646 }
647 collect_materialized_entry_paths(bootstrap_entries, &mut inode_paths);
648 let inode_count = inode_paths.len();
649 if let Some(limit) = limits.max_inode_count {
650 if explicit_entry_count > limit {
651 return Err(RootFilesystemError::new(format!(
652 "{context} contains {explicit_entry_count} entries, exceeding limit {limit}"
653 )));
654 }
655
656 if inode_count > limit {
657 return Err(RootFilesystemError::new(format!(
658 "{context} contains {inode_count} entries, exceeding limit {limit}"
659 )));
660 }
661 }
662
663 let mut bytes = 0_u64;
664 for snapshot in lowers {
665 bytes = bytes.saturating_add(entry_content_bytes(&snapshot.entries));
666 }
667 bytes = bytes.saturating_add(entry_content_bytes(bootstrap_entries));
668 if let Some(limit) = limits.max_filesystem_bytes {
669 if bytes > limit {
670 return Err(RootFilesystemError::new(format!(
671 "{context} contains {bytes} bytes, exceeding limit {limit}"
672 )));
673 }
674 }
675 Ok(())
676}
677
678fn validate_entry_import_limits(
679 entries: &[FilesystemEntry],
680 limits: &RootFilesystemImportLimits,
681 context: &str,
682) -> Result<(), RootFilesystemError> {
683 if let Some(limit) = limits.max_inode_count {
684 if entries.len() > limit {
685 return Err(RootFilesystemError::new(format!(
686 "{context} contains {} entries, exceeding limit {limit}",
687 entries.len()
688 )));
689 }
690
691 let inode_count = materialized_entry_inode_count(entries);
692 if inode_count > limit {
693 return Err(RootFilesystemError::new(format!(
694 "{context} contains {inode_count} entries, exceeding limit {limit}"
695 )));
696 }
697 }
698
699 let bytes = entry_content_bytes(entries);
700 if let Some(limit) = limits.max_filesystem_bytes {
701 if bytes > limit {
702 return Err(RootFilesystemError::new(format!(
703 "{context} contains {bytes} bytes, exceeding limit {limit}"
704 )));
705 }
706 }
707 Ok(())
708}
709
710fn validate_encoded_snapshot_size(
711 bytes: &[u8],
712 limits: &RootFilesystemImportLimits,
713 context: &str,
714) -> Result<(), RootFilesystemError> {
715 if let Some(limit) = limits.max_encoded_snapshot_bytes {
716 if bytes.len() > limit {
717 return Err(RootFilesystemError::new(format!(
718 "{context} contains {} encoded bytes, exceeding limit {limit}",
719 bytes.len()
720 )));
721 }
722 }
723 Ok(())
724}
725
726fn entry_content_bytes(entries: &[FilesystemEntry]) -> u64 {
727 entries.iter().fold(0_u64, |total, entry| {
728 total.saturating_add(match entry.kind {
729 FilesystemEntryKind::File => entry
730 .content
731 .as_ref()
732 .map(|content| usize_to_u64(content.len()))
733 .unwrap_or(0),
734 FilesystemEntryKind::Directory => 0,
735 FilesystemEntryKind::Symlink => entry
736 .target
737 .as_ref()
738 .map(|target| usize_to_u64(target.len()))
739 .unwrap_or(0),
740 })
741 })
742}
743
744fn materialized_entry_inode_count(entries: &[FilesystemEntry]) -> usize {
745 let mut paths = BTreeSet::new();
746 collect_materialized_entry_paths(entries, &mut paths);
747 paths.len()
748}
749
750fn collect_materialized_entry_paths(entries: &[FilesystemEntry], paths: &mut BTreeSet<String>) {
751 for entry in entries {
752 collect_materialized_path(&entry.path, paths);
753 }
754}
755
756fn collect_materialized_path(path: &str, paths: &mut BTreeSet<String>) {
757 let normalized = normalize_path(path);
758 paths.insert(normalized.clone());
759
760 let mut parent = String::new();
761 let segments = normalized
762 .split('/')
763 .filter(|segment| !segment.is_empty())
764 .collect::<Vec<_>>();
765 for segment in segments.iter().take(segments.len().saturating_sub(1)) {
766 parent.push('/');
767 parent.push_str(segment);
768 paths.insert(parent.clone());
769 }
770}
771
772fn usize_to_u64(value: usize) -> u64 {
773 u64::try_from(value).unwrap_or(u64::MAX)
774}
775
776const fn u64_limit_to_usize(value: u64) -> usize {
777 if value > usize::MAX as u64 {
778 usize::MAX
779 } else {
780 value as usize
781 }
782}
783
784const fn encoded_snapshot_limit(
785 max_filesystem_bytes: Option<u64>,
786 max_inode_count: Option<usize>,
787) -> Option<usize> {
788 let Some(max_filesystem_bytes) = max_filesystem_bytes else {
789 return None;
790 };
791
792 Some(
793 u64_limit_to_usize(max_filesystem_bytes)
794 .saturating_mul(2)
795 .saturating_add(match max_inode_count {
796 Some(max_inode_count) => {
797 max_inode_count.saturating_mul(ROOT_FILESYSTEM_SNAPSHOT_ENTRY_OVERHEAD_BYTES)
798 }
799 None => 0,
800 })
801 .saturating_add(ROOT_FILESYSTEM_SNAPSHOT_FIXED_OVERHEAD_BYTES),
802 )
803}
804
805fn snapshot_to_memory_filesystem(
806 snapshot: &RootFilesystemSnapshot,
807) -> Result<MemoryFileSystem, RootFilesystemError> {
808 let mut filesystem = MemoryFileSystem::new();
809 for entry in sort_entries(snapshot.entries.clone()) {
810 apply_entry_to_memory_filesystem(&mut filesystem, &entry)?;
811 }
812 Ok(filesystem)
813}
814
815fn apply_entry_to_memory_filesystem(
816 filesystem: &mut MemoryFileSystem,
817 entry: &FilesystemEntry,
818) -> Result<(), RootFilesystemError> {
819 ensure_parent_directories(filesystem, &entry.path)?;
820
821 match entry.kind {
822 FilesystemEntryKind::Directory => {
823 filesystem.mkdir(&entry.path, true)?;
824 filesystem.chmod(&entry.path, entry.mode)?;
825 filesystem.chown(&entry.path, entry.uid, entry.gid)?;
826 }
827 FilesystemEntryKind::File => {
828 filesystem.write_file(&entry.path, entry.content.clone().unwrap_or_default())?;
829 filesystem.chmod(&entry.path, entry.mode)?;
830 filesystem.chown(&entry.path, entry.uid, entry.gid)?;
831 }
832 FilesystemEntryKind::Symlink => {
833 let Some(target) = entry.target.as_deref() else {
834 return Err(RootFilesystemError::new(format!(
835 "missing symlink target for {}",
836 entry.path
837 )));
838 };
839 filesystem.symlink_with_metadata(
840 target,
841 &entry.path,
842 entry.mode,
843 entry.uid,
844 entry.gid,
845 )?;
846 }
847 }
848
849 Ok(())
850}
851
852fn apply_entry(
853 filesystem: &mut impl VirtualFileSystem,
854 entry: &FilesystemEntry,
855) -> Result<(), RootFilesystemError> {
856 ensure_parent_directories(filesystem, &entry.path)?;
857
858 match entry.kind {
859 FilesystemEntryKind::Directory => {
860 filesystem.mkdir(&entry.path, true)?;
861 filesystem.chmod(&entry.path, entry.mode)?;
862 filesystem.chown(&entry.path, entry.uid, entry.gid)?;
863 }
864 FilesystemEntryKind::File => {
865 filesystem.write_file(&entry.path, entry.content.clone().unwrap_or_default())?;
866 filesystem.chmod(&entry.path, entry.mode)?;
867 filesystem.chown(&entry.path, entry.uid, entry.gid)?;
868 }
869 FilesystemEntryKind::Symlink => {
870 let Some(target) = entry.target.as_deref() else {
871 return Err(RootFilesystemError::new(format!(
872 "missing symlink target for {}",
873 entry.path
874 )));
875 };
876 filesystem.symlink(target, &entry.path)?;
877 }
878 }
879
880 Ok(())
881}
882
883fn ensure_parent_directories(
884 filesystem: &mut impl VirtualFileSystem,
885 path: &str,
886) -> Result<(), RootFilesystemError> {
887 let normalized = normalize_path(path);
888 let mut current = String::new();
889 let segments = normalized
890 .split('/')
891 .filter(|segment| !segment.is_empty())
892 .collect::<Vec<_>>();
893
894 for segment in segments.iter().take(segments.len().saturating_sub(1)) {
895 current.push('/');
896 current.push_str(segment);
897
898 if filesystem.exists(¤t) {
899 continue;
900 }
901
902 filesystem.create_dir(¤t)?;
903 filesystem.chmod(¤t, 0o755)?;
904 filesystem.chown(¤t, 0, 0)?;
905 }
906
907 Ok(())
908}
909
910fn sort_entries(mut entries: Vec<FilesystemEntry>) -> Vec<FilesystemEntry> {
911 entries.sort_by(|left, right| {
912 let depth_left = if left.path == "/" {
913 0
914 } else {
915 left.path.split('/').filter(|part| !part.is_empty()).count()
916 };
917 let depth_right = if right.path == "/" {
918 0
919 } else {
920 right
921 .path
922 .split('/')
923 .filter(|part| !part.is_empty())
924 .count()
925 };
926 depth_left
927 .cmp(&depth_right)
928 .then_with(|| left.path.cmp(&right.path))
929 });
930 entries
931}
932
933fn snapshot_virtual_filesystem(
934 filesystem: &mut impl VirtualFileSystem,
935 root_path: &str,
936) -> Result<Vec<FilesystemEntry>, RootFilesystemError> {
937 let mut entries = Vec::new();
938 snapshot_path(filesystem, root_path, &mut entries)?;
939 Ok(entries)
940}
941
942fn snapshot_path(
943 filesystem: &mut impl VirtualFileSystem,
944 path: &str,
945 entries: &mut Vec<FilesystemEntry>,
946) -> Result<(), RootFilesystemError> {
947 let stat = if path == "/" {
948 filesystem.stat(path)?
949 } else {
950 filesystem.lstat(path)?
951 };
952
953 if stat.is_symbolic_link {
954 entries.push(FilesystemEntry {
955 path: path.to_owned(),
956 kind: FilesystemEntryKind::Symlink,
957 mode: stat.mode,
958 uid: stat.uid,
959 gid: stat.gid,
960 content: None,
961 target: Some(filesystem.read_link(path)?),
962 });
963 return Ok(());
964 }
965
966 if stat.is_directory {
967 entries.push(FilesystemEntry {
968 path: path.to_owned(),
969 kind: FilesystemEntryKind::Directory,
970 mode: stat.mode,
971 uid: stat.uid,
972 gid: stat.gid,
973 content: None,
974 target: None,
975 });
976
977 let mut children = filesystem
978 .read_dir_with_types(path)?
979 .into_iter()
980 .map(|entry| entry.name)
981 .filter(|name| name != "." && name != "..")
982 .collect::<Vec<_>>();
983 children.sort();
984
985 for child in children {
986 let child_path = if path == "/" {
987 format!("/{child}")
988 } else {
989 format!("{path}/{child}")
990 };
991 snapshot_path(filesystem, &child_path, entries)?;
992 }
993 return Ok(());
994 }
995
996 entries.push(FilesystemEntry {
997 path: path.to_owned(),
998 kind: FilesystemEntryKind::File,
999 mode: stat.mode,
1000 uid: stat.uid,
1001 gid: stat.gid,
1002 content: Some(filesystem.read_file(path)?),
1003 target: None,
1004 });
1005 Ok(())
1006}
1007
1008fn is_kernel_reserved_bootstrap_path(path: &str) -> bool {
1009 let normalized = normalize_path(path);
1010 KERNEL_RESERVED_BOOTSTRAP_PATH_PREFIXES
1011 .iter()
1012 .any(|prefix| normalized == *prefix || normalized.starts_with(&format!("{prefix}/")))
1013}