1use super::vfs::{
2 normalize_path, MemoryFileSystem, VfsError, VfsResult, VirtualDirEntry, VirtualFileSystem,
3 VirtualStat, VirtualUtimeSpec,
4};
5use base64::Engine;
6use std::collections::BTreeSet;
7
8const MAX_SNAPSHOT_DEPTH: usize = 1024;
9const OVERLAY_METADATA_ROOT: &str = "/.secure-exec-overlay";
10const OVERLAY_WHITEOUT_DIR: &str = "/.secure-exec-overlay/whiteouts";
11const OVERLAY_OPAQUE_DIR: &str = "/.secure-exec-overlay/opaque";
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum OverlayMode {
15 Ephemeral,
16 ReadOnly,
17}
18
19#[derive(Debug)]
20pub struct OverlayFileSystem {
21 lowers: Vec<MemoryFileSystem>,
22 upper: Option<MemoryFileSystem>,
23 writes_locked: bool,
24}
25
26#[derive(Debug, Clone, Copy)]
27enum OverlayMarkerKind {
28 Whiteout,
29 Opaque,
30}
31
32#[derive(Debug)]
33enum OverlaySnapshotKind {
34 Directory,
35 File(Vec<u8>),
36 Symlink(String),
37}
38
39#[derive(Debug)]
40struct OverlaySnapshotEntry {
41 path: String,
42 stat: VirtualStat,
43 kind: OverlaySnapshotKind,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
47struct OverlayCopyUpUsage {
48 total_bytes: u64,
49 inode_count: usize,
50}
51
52impl OverlayFileSystem {
53 pub fn new(lowers: Vec<MemoryFileSystem>, mode: OverlayMode) -> Self {
54 let mut effective_lowers = lowers;
55 if effective_lowers.is_empty() {
56 effective_lowers.push(MemoryFileSystem::new());
57 }
58
59 let mut upper = match mode {
60 OverlayMode::Ephemeral => Some(MemoryFileSystem::new()),
61 OverlayMode::ReadOnly => None,
62 };
63 if let Some(upper_filesystem) = upper.as_mut() {
64 sync_upper_root_metadata(upper_filesystem, &effective_lowers);
65 }
66
67 Self {
68 lowers: effective_lowers,
69 upper,
70 writes_locked: matches!(mode, OverlayMode::ReadOnly),
71 }
72 }
73
74 pub fn with_upper(lowers: Vec<MemoryFileSystem>, upper: MemoryFileSystem) -> Self {
75 let mut effective_lowers = lowers;
76 if effective_lowers.is_empty() {
77 effective_lowers.push(MemoryFileSystem::new());
78 }
79
80 Self {
81 lowers: effective_lowers,
82 upper: Some(upper),
83 writes_locked: false,
84 }
85 }
86
87 pub fn lock_writes(&mut self) {
88 self.writes_locked = true;
89 }
90
91 fn normalized(path: &str) -> String {
92 normalize_path(path)
93 }
94
95 fn parent_path(path: &str) -> String {
96 let normalized = Self::normalized(path);
97 if normalized == "/" {
98 return String::from("/");
99 }
100
101 match normalized.rsplit_once('/') {
102 Some(("", _)) | None => String::from("/"),
103 Some((parent, _)) => String::from(parent),
104 }
105 }
106
107 fn basename(path: &str) -> String {
108 let normalized = Self::normalized(path);
109 if normalized == "/" {
110 return String::from("/");
111 }
112 normalized
113 .rsplit('/')
114 .find(|component| !component.is_empty())
115 .unwrap_or("")
116 .to_owned()
117 }
118
119 fn validate_destination_parent(&mut self, path: &str) -> VfsResult<()> {
120 let parent = Self::parent_path(path);
121 let resolved_parent = self.resolve_merged_path(&parent, true, 0)?;
122 let stat = self.merged_lstat(&resolved_parent)?;
123 if !stat.is_directory {
124 return Err(Self::not_directory(&parent));
125 }
126 Ok(())
127 }
128
129 fn resolved_destination_path(&self, path: &str) -> VfsResult<String> {
130 let parent = Self::parent_path(path);
131 let resolved_parent = self.resolve_merged_path(&parent, true, 0)?;
132 Ok(Self::join_path(&resolved_parent, &Self::basename(path)))
133 }
134
135 fn resolve_merged_path(
136 &self,
137 path: &str,
138 follow_final_symlink: bool,
139 depth: usize,
140 ) -> VfsResult<String> {
141 if depth > MAX_SNAPSHOT_DEPTH {
142 return Err(VfsError::new(
143 "ELOOP",
144 format!("too many symbolic links while resolving '{path}'"),
145 ));
146 }
147
148 let normalized = Self::normalized(path);
149 if normalized == "/" {
150 return Ok(normalized);
151 }
152
153 let components: Vec<&str> = normalized
154 .split('/')
155 .filter(|component| !component.is_empty())
156 .collect();
157 let mut current = String::from("/");
158
159 for (index, component) in components.iter().enumerate() {
160 let candidate = Self::join_path(¤t, component);
161 let is_final = index + 1 == components.len();
162 let should_follow = !is_final || follow_final_symlink;
163
164 if should_follow {
165 if let Ok(stat) = self.merged_lstat(&candidate) {
166 if stat.is_symbolic_link {
167 let target = self.read_link_inner(&candidate)?;
168 let target_path = if target.starts_with('/') {
169 Self::normalized(&target)
170 } else {
171 Self::normalized(&Self::join_path(
172 &Self::parent_path(&candidate),
173 &target,
174 ))
175 };
176 let remainder = components[index + 1..].join("/");
177 let next_path = if remainder.is_empty() {
178 target_path
179 } else {
180 Self::normalized(&Self::join_path(&target_path, &remainder))
181 };
182 return self.resolve_merged_path(
183 &next_path,
184 follow_final_symlink,
185 depth + 1,
186 );
187 }
188
189 if !is_final && !stat.is_directory {
190 return Err(Self::not_directory(&candidate));
191 }
192 }
193 } else if let Ok(stat) = self.merged_lstat(&candidate) {
194 if !is_final && !stat.is_directory {
195 return Err(Self::not_directory(&candidate));
196 }
197 }
198
199 current = candidate;
200 }
201
202 Ok(current)
203 }
204
205 fn destination_parent_copy_up_paths(&self, path: &str) -> VfsResult<Vec<String>> {
206 let parent = Self::parent_path(path);
207 let mut paths = Vec::new();
208 let mut seen = BTreeSet::new();
209 self.collect_destination_parent_copy_up_paths(&parent, &mut paths, &mut seen, 0)?;
210 Ok(paths)
211 }
212
213 fn collect_destination_parent_copy_up_paths(
214 &self,
215 parent: &str,
216 paths: &mut Vec<String>,
217 seen: &mut BTreeSet<String>,
218 depth: usize,
219 ) -> VfsResult<()> {
220 if depth > MAX_SNAPSHOT_DEPTH {
221 return Err(VfsError::new(
222 "ELOOP",
223 format!("too many symbolic links while resolving '{parent}'"),
224 ));
225 }
226
227 let normalized = Self::normalized(parent);
228 if normalized == "/" {
229 return Ok(());
230 }
231
232 let components: Vec<&str> = normalized
233 .split('/')
234 .filter(|component| !component.is_empty())
235 .collect();
236 let mut current = String::from("/");
237 for (index, component) in components.iter().enumerate() {
238 current = Self::join_path(¤t, component);
239 let stat = self.merged_lstat(¤t)?;
240
241 if stat.is_symbolic_link {
242 if !self.has_entry_in_upper(¤t) && seen.insert(current.clone()) {
243 paths.push(current.clone());
244 }
245
246 let target = self.read_link_inner(¤t)?;
247 let target_path = if target.starts_with('/') {
248 Self::normalized(&target)
249 } else {
250 Self::normalized(&Self::join_path(&Self::parent_path(¤t), &target))
251 };
252 let remainder = components[index + 1..].join("/");
253 let next_parent = if remainder.is_empty() {
254 target_path
255 } else {
256 Self::normalized(&Self::join_path(&target_path, &remainder))
257 };
258 return self.collect_destination_parent_copy_up_paths(
259 &next_parent,
260 paths,
261 seen,
262 depth + 1,
263 );
264 }
265
266 if self.find_lower_by_entry(¤t).is_some()
267 && !self.has_entry_in_upper(¤t)
268 && seen.insert(current.clone())
269 {
270 paths.push(current.clone());
271 }
272 }
273
274 Ok(())
275 }
276
277 fn encode_marker_path(path: &str) -> String {
278 base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(path)
279 }
280
281 fn marker_directory(kind: OverlayMarkerKind) -> &'static str {
282 match kind {
283 OverlayMarkerKind::Whiteout => OVERLAY_WHITEOUT_DIR,
284 OverlayMarkerKind::Opaque => OVERLAY_OPAQUE_DIR,
285 }
286 }
287
288 fn marker_path(kind: OverlayMarkerKind, path: &str) -> String {
289 format!(
290 "{}/{}",
291 Self::marker_directory(kind),
292 Self::encode_marker_path(&Self::normalized(path))
293 )
294 }
295
296 fn is_internal_metadata_path(path: &str) -> bool {
297 let normalized = Self::normalized(path);
298 normalized == OVERLAY_METADATA_ROOT
299 || normalized.starts_with(&(String::from(OVERLAY_METADATA_ROOT) + "/"))
300 }
301
302 fn touches_internal_metadata(&self, path: &str) -> bool {
313 if Self::is_internal_metadata_path(path) {
314 return true;
315 }
316 if let Ok(resolved) = self.resolve_merged_path(path, true, 0) {
317 if Self::is_internal_metadata_path(&resolved) {
318 return true;
319 }
320 }
321 if let Ok(resolved) = self.resolved_destination_path(path) {
322 if Self::is_internal_metadata_path(&resolved) {
323 return true;
324 }
325 }
326 false
327 }
328
329 fn hidden_root_entry_name() -> &'static str {
330 ".secure-exec-overlay"
331 }
332
333 fn should_hide_directory_entry(path: &str, entry: &str) -> bool {
334 let normalized = Self::normalized(path);
335 normalized == "/" && entry == Self::hidden_root_entry_name()
336 }
337
338 fn should_ignore_raw_directory_entry(
339 upper: Option<&MemoryFileSystem>,
340 path: &str,
341 entry: &str,
342 ) -> bool {
343 if entry == "." || entry == ".." || Self::should_hide_directory_entry(path, entry) {
344 return true;
345 }
346
347 let entry_path = Self::join_path(path, entry);
348 Self::marker_exists_in_upper(upper, OverlayMarkerKind::Whiteout, &entry_path)
349 }
350
351 fn check_copy_up_usage_limits(
352 usage: &OverlayCopyUpUsage,
353 max_bytes: Option<u64>,
354 max_inodes: Option<usize>,
355 ) -> VfsResult<()> {
356 if let Some(limit) = max_bytes {
357 if usage.total_bytes > limit {
358 return Err(VfsError::new(
359 "ENOSPC",
360 format!(
361 "overlay rename copy-up bytes {} exceed configured limit {}",
362 usage.total_bytes, limit
363 ),
364 ));
365 }
366 }
367
368 if let Some(limit) = max_inodes {
369 if usage.inode_count > limit {
370 return Err(VfsError::new(
371 "ENOSPC",
372 format!(
373 "overlay rename copy-up inodes {} exceed configured limit {}",
374 usage.inode_count, limit
375 ),
376 ));
377 }
378 }
379
380 Ok(())
381 }
382
383 fn add_copy_up_usage(
384 usage: &mut OverlayCopyUpUsage,
385 bytes: u64,
386 inodes: usize,
387 max_bytes: Option<u64>,
388 max_inodes: Option<usize>,
389 ) -> VfsResult<()> {
390 usage.total_bytes = usage.total_bytes.saturating_add(bytes);
391 usage.inode_count = usage.inode_count.saturating_add(inodes);
392 Self::check_copy_up_usage_limits(usage, max_bytes, max_inodes)
393 }
394
395 fn remaining_inode_budget(
396 usage: &OverlayCopyUpUsage,
397 max_inodes: Option<usize>,
398 ) -> Option<usize> {
399 max_inodes.map(|limit| limit.saturating_sub(usage.inode_count))
400 }
401
402 fn copy_up_directory_entries_limited(
403 &mut self,
404 path: &str,
405 max_entries: Option<usize>,
406 ) -> VfsResult<Vec<String>> {
407 let Some(max_entries) = max_entries else {
408 return self.read_dir(path);
409 };
410
411 match self.read_dir_limited(path, max_entries) {
412 Ok(entries) => Ok(entries),
413 Err(error) if error.code() == "ENOMEM" => Err(VfsError::new(
414 "ENOSPC",
415 format!("overlay rename copy-up directory '{path}' exceeds configured inode limit"),
416 )),
417 Err(error) => Err(error),
418 }
419 }
420
421 fn directory_has_visible_entries_limited(&mut self, path: &str) -> VfsResult<bool> {
422 match self.read_dir_limited(path, 1) {
423 Ok(entries) => Ok(!entries.is_empty()),
424 Err(error) if error.code() == "ENOMEM" => Ok(true),
425 Err(error) => Err(error),
426 }
427 }
428
429 fn memory_subtree_usage_limited(
430 filesystem: &mut MemoryFileSystem,
431 path: &str,
432 max_bytes: Option<u64>,
433 max_inodes: Option<usize>,
434 ) -> VfsResult<OverlayCopyUpUsage> {
435 let mut usage = OverlayCopyUpUsage::default();
436 let mut visited = BTreeSet::new();
437 let mut pending = vec![Self::normalized(path)];
438 while let Some(current_path) = pending.pop() {
439 let stat = filesystem.lstat(¤t_path)?;
440 if visited.insert(stat.ino) {
441 let bytes = if stat.is_directory && !stat.is_symbolic_link {
442 0
443 } else {
444 stat.size
445 };
446 Self::add_copy_up_usage(&mut usage, bytes, 1, max_bytes, max_inodes)?;
447 }
448
449 if stat.is_directory && !stat.is_symbolic_link {
450 let remaining = Self::remaining_inode_budget(&usage, max_inodes);
451 let children = if let Some(max_entries) = remaining {
452 filesystem.read_dir_limited(¤t_path, max_entries)?
453 } else {
454 filesystem.read_dir(¤t_path)?
455 };
456 for entry in children.into_iter().rev() {
457 if matches!(entry.as_str(), "." | "..") {
458 continue;
459 }
460 if Self::should_hide_directory_entry(¤t_path, &entry) {
461 continue;
462 }
463 pending.push(Self::join_path(¤t_path, &entry));
464 }
465 }
466 }
467
468 Ok(usage)
469 }
470
471 fn memory_subtree_released_usage(
472 filesystem: &mut MemoryFileSystem,
473 path: &str,
474 ) -> VfsResult<OverlayCopyUpUsage> {
475 let mut usage = OverlayCopyUpUsage::default();
476 let mut visited = BTreeSet::new();
477 let mut pending = vec![Self::normalized(path)];
478 while let Some(current_path) = pending.pop() {
479 let stat = filesystem.lstat(¤t_path)?;
480 if visited.insert(stat.ino) {
481 let subtree_links = filesystem.link_count_in_subtree(stat.ino, path) as u64;
482 if stat.is_directory || stat.nlink <= subtree_links {
483 let bytes = if stat.is_directory && !stat.is_symbolic_link {
484 0
485 } else {
486 stat.size
487 };
488 Self::add_copy_up_usage(&mut usage, bytes, 1, None, None)?;
489 }
490 }
491
492 if stat.is_directory && !stat.is_symbolic_link {
493 for entry in filesystem.read_dir(¤t_path)?.into_iter().rev() {
494 if matches!(entry.as_str(), "." | "..") {
495 continue;
496 }
497 if Self::should_hide_directory_entry(¤t_path, &entry) {
498 continue;
499 }
500 pending.push(Self::join_path(¤t_path, &entry));
501 }
502 }
503 }
504
505 Ok(usage)
506 }
507
508 fn upper_usage_limited(
509 &mut self,
510 max_bytes: Option<u64>,
511 max_inodes: Option<usize>,
512 ) -> VfsResult<OverlayCopyUpUsage> {
513 let Some(upper) = self.upper.as_mut() else {
514 return Ok(OverlayCopyUpUsage::default());
515 };
516
517 Self::memory_subtree_usage_limited(upper, "/", max_bytes, max_inodes)
518 }
519
520 fn upper_subtree_released_usage(&mut self, path: &str) -> VfsResult<OverlayCopyUpUsage> {
521 let Some(upper) = self.upper.as_mut() else {
522 return Ok(OverlayCopyUpUsage::default());
523 };
524
525 if !upper.exists(path) {
526 return Ok(OverlayCopyUpUsage::default());
527 }
528
529 Self::memory_subtree_released_usage(upper, path)
530 }
531
532 fn collect_copy_up_usage_limited(
533 &mut self,
534 path: &str,
535 usage: &mut OverlayCopyUpUsage,
536 max_bytes: Option<u64>,
537 max_inodes: Option<usize>,
538 ) -> VfsResult<()> {
539 let mut pending = vec![(Self::normalized(path), 0usize)];
540 while let Some((current_path, depth)) = pending.pop() {
541 if depth > MAX_SNAPSHOT_DEPTH {
542 return Err(VfsError::new(
543 "EINVAL",
544 format!("overlay snapshot depth limit exceeded at '{current_path}'"),
545 ));
546 }
547
548 let stat = self.merged_lstat(¤t_path)?;
549 if !self.has_entry_in_upper(¤t_path) {
550 let bytes = if stat.is_symbolic_link {
551 self.read_link_inner(¤t_path)?.len() as u64
552 } else if stat.is_directory {
553 0
554 } else {
555 stat.size
556 };
557 Self::add_copy_up_usage(usage, bytes, 1, max_bytes, max_inodes)?;
558 }
559
560 if stat.is_directory && !stat.is_symbolic_link {
561 let children = self.copy_up_directory_entries_limited(¤t_path, max_inodes)?;
562 for entry in children.into_iter().rev() {
563 pending.push((Self::join_path(¤t_path, &entry), depth + 1));
564 }
565 }
566 }
567
568 Ok(())
569 }
570
571 fn collect_single_copy_up_usage_limited(
572 &mut self,
573 path: &str,
574 usage: &mut OverlayCopyUpUsage,
575 max_bytes: Option<u64>,
576 max_inodes: Option<usize>,
577 ) -> VfsResult<()> {
578 if self.has_entry_in_upper(path) {
579 return Ok(());
580 }
581
582 let stat = self.merged_lstat(path)?;
583 let bytes = if stat.is_symbolic_link {
584 self.read_link_inner(path)?.len() as u64
585 } else if stat.is_directory {
586 0
587 } else {
588 stat.size
589 };
590 Self::add_copy_up_usage(usage, bytes, 1, max_bytes, max_inodes)
591 }
592
593 pub fn check_rename_copy_up_limits(
594 &mut self,
595 old_path: &str,
596 new_path: &str,
597 max_bytes: Option<u64>,
598 max_inodes: Option<usize>,
599 ) -> VfsResult<()> {
600 let old_normalized = Self::normalized(old_path);
601 let new_normalized = Self::normalized(new_path);
602 if Self::is_internal_metadata_path(&old_normalized)
603 || Self::is_internal_metadata_path(&new_normalized)
604 {
605 return Err(VfsError::permission_denied("rename", old_path));
606 }
607
608 if old_normalized == "/" {
609 return Err(VfsError::permission_denied("rename", old_path));
610 }
611
612 if old_normalized == new_normalized {
613 return Ok(());
614 }
615
616 let source_stat = self.merged_lstat(old_path)?;
617 if self.writes_locked {
618 self.writable_upper(&old_normalized)?;
619 }
620 self.validate_destination_parent(&new_normalized)?;
621 let resolved_new_normalized = self.resolved_destination_path(&new_normalized)?;
622
623 if old_normalized == resolved_new_normalized {
624 return Ok(());
625 }
626
627 if source_stat.is_directory
628 && resolved_new_normalized.starts_with(&(old_normalized.clone() + "/"))
629 {
630 return Err(VfsError::new(
631 "EINVAL",
632 format!(
633 "cannot move '{}' into its own descendant '{}'",
634 old_path, new_path
635 ),
636 ));
637 }
638
639 let destination_parent_copy_up_paths =
640 self.destination_parent_copy_up_paths(&new_normalized)?;
641
642 if let Ok(destination_stat) = self.merged_lstat(&resolved_new_normalized) {
643 if destination_stat.is_directory
644 && !destination_stat.is_symbolic_link
645 && self.directory_has_visible_entries_limited(&resolved_new_normalized)?
646 {
647 return Err(Self::not_empty(&resolved_new_normalized));
648 }
649 }
650
651 let mut usage = self.upper_usage_limited(None, None)?;
652 if self.has_entry_in_upper(&resolved_new_normalized) {
653 let destination_usage = self.upper_subtree_released_usage(&resolved_new_normalized)?;
654 usage.total_bytes = usage
655 .total_bytes
656 .saturating_sub(destination_usage.total_bytes);
657 usage.inode_count = usage
658 .inode_count
659 .saturating_sub(destination_usage.inode_count);
660 }
661 Self::check_copy_up_usage_limits(&usage, max_bytes, max_inodes)?;
662 for path in destination_parent_copy_up_paths {
663 self.collect_single_copy_up_usage_limited(&path, &mut usage, max_bytes, max_inodes)?;
664 }
665 self.collect_copy_up_usage_limited(&old_normalized, &mut usage, max_bytes, max_inodes)?;
666
667 Self::check_copy_up_usage_limits(&usage, max_bytes, max_inodes)
668 }
669
670 fn marker_exists(&self, kind: OverlayMarkerKind, path: &str) -> bool {
671 Self::marker_exists_in_upper(self.upper.as_ref(), kind, path)
672 }
673
674 fn marker_exists_in_upper(
675 upper: Option<&MemoryFileSystem>,
676 kind: OverlayMarkerKind,
677 path: &str,
678 ) -> bool {
679 upper.is_some_and(|filesystem| filesystem.exists(&Self::marker_path(kind, path)))
680 }
681
682 fn is_whited_out(&self, path: &str) -> bool {
683 self.marker_exists(OverlayMarkerKind::Whiteout, path)
684 }
685
686 fn ensure_metadata_directories_in_upper(&mut self, path: &str) -> VfsResult<()> {
687 let upper = self.writable_upper(path)?;
688 upper.mkdir(OVERLAY_METADATA_ROOT, true)?;
689 upper.mkdir(OVERLAY_WHITEOUT_DIR, true)?;
690 upper.mkdir(OVERLAY_OPAQUE_DIR, true)?;
691 Ok(())
692 }
693
694 fn set_marker(&mut self, kind: OverlayMarkerKind, path: &str, present: bool) -> VfsResult<()> {
695 let marker_path = Self::marker_path(kind, path);
696 if present {
697 self.ensure_metadata_directories_in_upper(path)?;
698 self.writable_upper(path)?
699 .write_file(&marker_path, Self::normalized(path).into_bytes())?;
700 return Ok(());
701 }
702
703 if self
704 .upper
705 .as_ref()
706 .is_some_and(|upper| upper.exists(&marker_path))
707 {
708 self.writable_upper(path)?.remove_file(&marker_path)?;
709 }
710 Ok(())
711 }
712
713 fn add_whiteout(&mut self, path: &str) -> VfsResult<()> {
714 self.set_marker(OverlayMarkerKind::Whiteout, path, true)
715 }
716
717 fn remove_whiteout(&mut self, path: &str) -> VfsResult<()> {
718 self.set_marker(OverlayMarkerKind::Whiteout, path, false)
719 }
720
721 fn mark_opaque_directory(&mut self, path: &str) -> VfsResult<()> {
722 self.set_marker(OverlayMarkerKind::Opaque, path, true)
723 }
724
725 fn clear_opaque_directory(&mut self, path: &str) -> VfsResult<()> {
726 self.set_marker(OverlayMarkerKind::Opaque, path, false)
727 }
728
729 fn clear_path_metadata(&mut self, path: &str) -> VfsResult<()> {
730 self.remove_whiteout(path)?;
731 self.clear_opaque_directory(path)
732 }
733
734 fn join_path(base: &str, name: &str) -> String {
735 if base == "/" {
736 format!("/{name}")
737 } else {
738 format!("{base}/{name}")
739 }
740 }
741
742 fn rebase_path(path: &str, old_root: &str, new_root: &str) -> String {
743 if path == old_root {
744 return String::from(new_root);
745 }
746
747 format!("{new_root}{}", &path[old_root.len()..])
748 }
749
750 fn read_only_error(path: &str) -> VfsError {
751 VfsError::new("EROFS", format!("read-only filesystem: {path}"))
752 }
753
754 fn entry_not_found(path: &str) -> VfsError {
755 VfsError::new("ENOENT", format!("no such file: {path}"))
756 }
757
758 fn directory_not_found(path: &str) -> VfsError {
759 VfsError::new("ENOENT", format!("no such directory: {path}"))
760 }
761
762 fn already_exists(path: &str) -> VfsError {
763 VfsError::new("EEXIST", format!("file exists: {path}"))
764 }
765
766 fn not_directory(path: &str) -> VfsError {
767 VfsError::new("ENOTDIR", format!("not a directory: {path}"))
768 }
769
770 fn writable_upper(&mut self, path: &str) -> VfsResult<&mut MemoryFileSystem> {
771 if self.writes_locked {
772 return Err(Self::read_only_error(path));
773 }
774 self.upper
775 .as_mut()
776 .ok_or_else(|| Self::read_only_error(path))
777 }
778
779 fn path_exists_in_filesystem(filesystem: &MemoryFileSystem, path: &str) -> bool {
780 filesystem.exists(path)
781 }
782
783 fn has_entry_in_filesystem(filesystem: &MemoryFileSystem, path: &str) -> bool {
784 filesystem.lstat(path).is_ok()
785 }
786
787 fn exists_in_upper(&self, path: &str) -> bool {
788 self.upper
789 .as_ref()
790 .is_some_and(|upper| Self::path_exists_in_filesystem(upper, path))
791 }
792
793 fn has_entry_in_upper(&self, path: &str) -> bool {
794 self.upper
795 .as_ref()
796 .is_some_and(|upper| Self::has_entry_in_filesystem(upper, path))
797 }
798
799 fn find_lower_by_exists(&self, path: &str) -> Option<usize> {
800 self.lowers
801 .iter()
802 .position(|lower| Self::path_exists_in_filesystem(lower, path))
803 }
804
805 fn find_lower_by_entry(&self, path: &str) -> Option<(usize, VirtualStat)> {
806 self.lowers
807 .iter()
808 .enumerate()
809 .find_map(|(index, lower)| lower.lstat(path).ok().map(|stat| (index, stat)))
810 }
811
812 fn merged_lstat(&self, path: &str) -> VfsResult<VirtualStat> {
813 if Self::is_internal_metadata_path(path) {
814 return Err(Self::entry_not_found(path));
815 }
816 if self.is_whited_out(path) {
817 return Err(Self::entry_not_found(path));
818 }
819 if self.has_entry_in_upper(path) {
820 return self
821 .upper
822 .as_ref()
823 .expect("upper must exist when entry exists")
824 .lstat(path);
825 }
826 self.find_lower_by_entry(path)
827 .map(|(_, stat)| stat)
828 .ok_or_else(|| Self::entry_not_found(path))
829 }
830
831 fn read_link_inner(&self, path: &str) -> VfsResult<String> {
837 if Self::is_internal_metadata_path(path) {
838 return Err(Self::entry_not_found(path));
839 }
840 if self.is_whited_out(path) {
841 return Err(Self::entry_not_found(path));
842 }
843 if self.has_entry_in_upper(path) {
844 return self
845 .upper
846 .as_ref()
847 .expect("upper must exist when path exists")
848 .read_link(path);
849 }
850 let Some((index, _)) = self.find_lower_by_entry(path) else {
851 return Err(Self::entry_not_found(path));
852 };
853 self.lowers[index].read_link(path)
854 }
855
856 fn ensure_ancestor_directories_in_upper(&mut self, path: &str) -> VfsResult<()> {
857 if Self::is_internal_metadata_path(path) {
858 return Err(VfsError::permission_denied("mkdir", path));
859 }
860 let normalized = Self::normalized(path);
861 let parts = normalized
862 .split('/')
863 .filter(|part| !part.is_empty())
864 .collect::<Vec<_>>();
865
866 let mut current = String::new();
867 for part in parts.iter().take(parts.len().saturating_sub(1)) {
868 current.push('/');
869 current.push_str(part);
870
871 if self.exists_in_upper(¤t) {
872 continue;
873 }
874
875 if let Some(index) = self.find_lower_by_exists(¤t) {
876 let stat = self.lowers[index].stat(¤t)?;
877 if !stat.is_directory {
878 return Err(Self::not_directory(¤t));
879 }
880
881 let upper = self.writable_upper(¤t)?;
882 upper.mkdir(¤t, false)?;
883 upper.chmod(¤t, stat.mode)?;
884 upper.chown(¤t, stat.uid, stat.gid)?;
885 continue;
886 }
887
888 let upper = self.writable_upper(¤t)?;
889 upper.mkdir(¤t, false)?;
890 }
891
892 Ok(())
893 }
894
895 fn copy_up_path(&mut self, path: &str) -> VfsResult<()> {
896 if self.has_entry_in_upper(path) {
897 return Ok(());
898 }
899
900 self.ensure_ancestor_directories_in_upper(path)?;
901
902 let (lower_index, stat) = self
903 .find_lower_by_entry(path)
904 .ok_or_else(|| Self::entry_not_found(path))?;
905
906 if stat.is_symbolic_link {
907 let target = self.lowers[lower_index].read_link(path)?;
908 let upper = self.writable_upper(path)?;
909 upper.symlink(&target, path)?;
910 return Ok(());
911 }
912
913 if stat.is_directory {
914 let upper = self.writable_upper(path)?;
915 upper.mkdir(path, false)?;
916 upper.chmod(path, stat.mode)?;
917 upper.chown(path, stat.uid, stat.gid)?;
918 self.mark_opaque_directory(path)?;
919 return Ok(());
920 }
921
922 let data = self.lowers[lower_index].read_file(path)?;
923 let upper = self.writable_upper(path)?;
924 upper.write_file(path, data)?;
925 upper.chmod(path, stat.mode)?;
926 upper.chown(path, stat.uid, stat.gid)?;
927 Ok(())
928 }
929
930 fn materialize_destination_parent_in_upper(&mut self, path: &str) -> VfsResult<()> {
931 if self.has_entry_in_upper(path) {
932 return Ok(());
933 }
934
935 if self
936 .merged_lstat(path)
937 .is_ok_and(|stat| stat.is_symbolic_link)
938 {
939 return self.copy_up_path(path);
940 }
941
942 self.ensure_ancestor_directories_in_upper(path)?;
943 let stat = self.merged_lstat(path)?;
944 if !stat.is_directory || stat.is_symbolic_link {
945 return Err(Self::not_directory(path));
946 }
947
948 let upper = self.writable_upper(path)?;
949 upper.create_dir(path)?;
950 upper.chmod(path, stat.mode)?;
951 upper.chown(path, stat.uid, stat.gid)?;
952 Ok(())
953 }
954
955 fn path_exists_in_merged_view(&self, path: &str) -> bool {
956 if self.is_whited_out(path) {
957 return false;
958 }
959 if self.has_entry_in_upper(path) {
960 return true;
961 }
962 self.find_lower_by_entry(path).is_some()
963 }
964
965 fn not_empty(path: &str) -> VfsError {
966 VfsError::new("ENOTEMPTY", format!("directory not empty, rmdir '{path}'"))
967 }
968
969 fn collect_snapshot_entries(
970 &mut self,
971 path: &str,
972 entries: &mut Vec<OverlaySnapshotEntry>,
973 ) -> VfsResult<()> {
974 let mut pending = vec![(Self::normalized(path), 0usize)];
975 while let Some((current_path, depth)) = pending.pop() {
976 if depth > MAX_SNAPSHOT_DEPTH {
977 return Err(VfsError::new(
978 "EINVAL",
979 format!("overlay snapshot depth limit exceeded at '{current_path}'"),
980 ));
981 }
982
983 let stat = self.merged_lstat(¤t_path)?;
984
985 if stat.is_symbolic_link {
986 entries.push(OverlaySnapshotEntry {
987 path: current_path.clone(),
988 stat,
989 kind: OverlaySnapshotKind::Symlink(self.read_link_inner(¤t_path)?),
990 });
991 continue;
992 }
993
994 if stat.is_directory {
995 entries.push(OverlaySnapshotEntry {
996 path: current_path.clone(),
997 stat,
998 kind: OverlaySnapshotKind::Directory,
999 });
1000
1001 let children = self.read_dir_with_types_inner(¤t_path)?;
1002 for entry in children.into_iter().rev() {
1003 pending.push((Self::join_path(¤t_path, &entry.name), depth + 1));
1004 }
1005 continue;
1006 }
1007
1008 entries.push(OverlaySnapshotEntry {
1009 path: current_path.clone(),
1010 stat,
1011 kind: OverlaySnapshotKind::File(self.read_file(¤t_path)?),
1012 });
1013 }
1014 Ok(())
1015 }
1016
1017 fn remove_snapshot_entries(&mut self, entries: &[OverlaySnapshotEntry]) -> VfsResult<()> {
1018 for entry in entries.iter().rev() {
1019 if self.has_entry_in_upper(&entry.path) {
1020 match entry.kind {
1021 OverlaySnapshotKind::Directory => {
1022 self.writable_upper(&entry.path)?.remove_dir(&entry.path)?;
1023 }
1024 OverlaySnapshotKind::File(_) | OverlaySnapshotKind::Symlink(_) => {
1025 self.writable_upper(&entry.path)?.remove_file(&entry.path)?;
1026 }
1027 }
1028 }
1029
1030 if self.find_lower_by_entry(&entry.path).is_some() {
1031 self.clear_opaque_directory(&entry.path)?;
1032 self.add_whiteout(&entry.path)?;
1033 } else {
1034 self.clear_path_metadata(&entry.path)?;
1035 }
1036 }
1037
1038 Ok(())
1039 }
1040
1041 fn directory_has_raw_children(&mut self, path: &str) -> VfsResult<bool> {
1042 let normalized = Self::normalized(path);
1043 let mut directory_exists = false;
1044
1045 if let Some(upper) = self.upper.as_mut() {
1046 if let Ok(entries) = upper.read_dir(&normalized) {
1047 directory_exists = true;
1048 if entries.into_iter().any(|entry| {
1049 !Self::should_ignore_raw_directory_entry(Some(&*upper), &normalized, &entry)
1050 }) {
1051 return Ok(true);
1052 }
1053 }
1054 }
1055
1056 let upper = self.upper.as_ref();
1057 for lower in self.lowers.iter_mut().rev() {
1058 if let Ok(entries) = lower.read_dir(&normalized) {
1059 directory_exists = true;
1060 if entries.into_iter().any(|entry| {
1061 !Self::should_ignore_raw_directory_entry(upper, &normalized, &entry)
1062 }) {
1063 return Ok(true);
1064 }
1065 }
1066 }
1067
1068 if !directory_exists {
1069 return Err(Self::directory_not_found(path));
1070 }
1071
1072 Ok(false)
1073 }
1074
1075 fn read_dir_with_types_inner(&mut self, path: &str) -> VfsResult<Vec<VirtualDirEntry>> {
1076 if self.is_whited_out(path) {
1077 return Err(Self::directory_not_found(path));
1078 }
1079
1080 let normalized = Self::normalized(path);
1081 let mut directory_exists = false;
1082 let mut entries = Vec::<VirtualDirEntry>::new();
1083 let mut seen = BTreeSet::<String>::new();
1084 let upper = self.upper.as_ref();
1085 let include_lowers = !Self::marker_exists_in_upper(upper, OverlayMarkerKind::Opaque, path);
1086
1087 if include_lowers {
1088 for lower in self.lowers.iter_mut().rev() {
1089 if let Ok(lower_entries) = lower.read_dir_with_types(path) {
1090 directory_exists = true;
1091 for entry in lower_entries {
1092 if entry.name == "."
1093 || entry.name == ".."
1094 || Self::should_hide_directory_entry(path, &entry.name)
1095 {
1096 continue;
1097 }
1098 let child_path = if normalized == "/" {
1099 format!("/{}", entry.name)
1100 } else {
1101 format!("{normalized}/{}", entry.name)
1102 };
1103 if Self::marker_exists_in_upper(
1104 upper,
1105 OverlayMarkerKind::Whiteout,
1106 &child_path,
1107 ) || seen.contains(&entry.name)
1108 {
1109 continue;
1110 }
1111 seen.insert(entry.name.clone());
1112 entries.push(entry);
1113 }
1114 }
1115 }
1116 }
1117
1118 if let Some(upper) = self.upper.as_mut() {
1119 if let Ok(upper_entries) = upper.read_dir_with_types(path) {
1120 directory_exists = true;
1121 for entry in upper_entries {
1122 if entry.name == "."
1123 || entry.name == ".."
1124 || Self::should_hide_directory_entry(path, &entry.name)
1125 {
1126 continue;
1127 }
1128 if let Some(index) = entries
1129 .iter()
1130 .position(|existing| existing.name == entry.name)
1131 {
1132 entries[index] = entry;
1133 } else {
1134 seen.insert(entry.name.clone());
1135 entries.push(entry);
1136 }
1137 }
1138 }
1139 }
1140
1141 if !directory_exists {
1142 return Err(Self::directory_not_found(path));
1143 }
1144
1145 Ok(entries)
1146 }
1147
1148 fn marker_paths_in_upper(&mut self, kind: OverlayMarkerKind) -> VfsResult<Vec<String>> {
1149 let Some(upper) = self.upper.as_mut() else {
1150 return Ok(Vec::new());
1151 };
1152
1153 let marker_dir = Self::marker_directory(kind);
1154 let entries = match upper.read_dir(marker_dir) {
1155 Ok(entries) => entries,
1156 Err(error) if error.code() == "ENOENT" => return Ok(Vec::new()),
1157 Err(error) => return Err(error),
1158 };
1159
1160 let mut marker_paths = Vec::new();
1161 for entry in entries {
1162 if entry == "." || entry == ".." {
1163 continue;
1164 }
1165
1166 let marker_file = Self::join_path(marker_dir, &entry);
1167 let marker_path =
1168 String::from_utf8(upper.read_file(&marker_file).map_err(|_| {
1169 VfsError::io(format!("invalid overlay marker '{marker_file}'"))
1170 })?)
1171 .map_err(|_| VfsError::io(format!("invalid overlay marker '{marker_file}'")))?;
1172 marker_paths.push(Self::normalized(&marker_path));
1173 }
1174
1175 Ok(marker_paths)
1176 }
1177
1178 fn path_in_subtree(path: &str, root: &str) -> bool {
1179 path == root || path.starts_with(&(String::from(root) + "/"))
1180 }
1181
1182 fn clear_subtree_metadata(&mut self, path: &str) -> VfsResult<()> {
1183 let normalized = Self::normalized(path);
1184 for kind in [OverlayMarkerKind::Whiteout, OverlayMarkerKind::Opaque] {
1185 for marker_path in self.marker_paths_in_upper(kind)? {
1186 if Self::path_in_subtree(&marker_path, &normalized) {
1187 self.set_marker(kind, &marker_path, false)?;
1188 }
1189 }
1190 }
1191 Ok(())
1192 }
1193
1194 fn copy_subtree_metadata(&mut self, old_root: &str, new_root: &str) -> VfsResult<()> {
1195 let old_normalized = Self::normalized(old_root);
1196 let new_normalized = Self::normalized(new_root);
1197
1198 for kind in [OverlayMarkerKind::Whiteout, OverlayMarkerKind::Opaque] {
1199 for marker_path in self.marker_paths_in_upper(kind)? {
1200 if Self::path_in_subtree(&marker_path, &old_normalized) {
1201 let destination =
1202 Self::rebase_path(&marker_path, &old_normalized, &new_normalized);
1203 self.set_marker(kind, &destination, true)?;
1204 }
1205 }
1206 }
1207
1208 Ok(())
1209 }
1210
1211 fn stage_snapshot_entries_in_upper(
1212 &mut self,
1213 entries: &[OverlaySnapshotEntry],
1214 ) -> VfsResult<()> {
1215 for entry in entries {
1216 match &entry.kind {
1217 OverlaySnapshotKind::Directory => {
1218 if !self.has_entry_in_upper(&entry.path) {
1219 self.ensure_ancestor_directories_in_upper(&entry.path)?;
1220 self.writable_upper(&entry.path)?.create_dir(&entry.path)?;
1221 }
1222 self.writable_upper(&entry.path)?
1223 .chmod(&entry.path, entry.stat.mode)?;
1224 self.writable_upper(&entry.path)?.chown(
1225 &entry.path,
1226 entry.stat.uid,
1227 entry.stat.gid,
1228 )?;
1229 self.mark_opaque_directory(&entry.path)?;
1230 }
1231 OverlaySnapshotKind::File(data) => {
1232 if self.has_entry_in_upper(&entry.path) {
1233 continue;
1234 }
1235 self.ensure_ancestor_directories_in_upper(&entry.path)?;
1236 self.writable_upper(&entry.path)?
1237 .write_file(&entry.path, data.clone())?;
1238 self.writable_upper(&entry.path)?
1239 .chmod(&entry.path, entry.stat.mode)?;
1240 self.writable_upper(&entry.path)?.chown(
1241 &entry.path,
1242 entry.stat.uid,
1243 entry.stat.gid,
1244 )?;
1245 }
1246 OverlaySnapshotKind::Symlink(target) => {
1247 if self.has_entry_in_upper(&entry.path) {
1248 continue;
1249 }
1250 self.ensure_ancestor_directories_in_upper(&entry.path)?;
1251 self.writable_upper(&entry.path)?
1252 .symlink(target, &entry.path)?;
1253 }
1254 }
1255 }
1256
1257 Ok(())
1258 }
1259}
1260
1261fn sync_upper_root_metadata(upper: &mut MemoryFileSystem, lowers: &[MemoryFileSystem]) {
1262 let Some(root_stat) = lowers.iter().find_map(|lower| lower.lstat("/").ok()) else {
1263 return;
1264 };
1265
1266 upper
1267 .chmod("/", root_stat.mode)
1268 .expect("overlay upper root should exist");
1269 upper
1270 .chown("/", root_stat.uid, root_stat.gid)
1271 .expect("overlay upper root should exist");
1272}
1273
1274impl VirtualFileSystem for OverlayFileSystem {
1275 fn read_file(&mut self, path: &str) -> VfsResult<Vec<u8>> {
1276 if self.touches_internal_metadata(path) {
1277 return Err(Self::entry_not_found(path));
1278 }
1279 if self.is_whited_out(path) {
1280 return Err(Self::entry_not_found(path));
1281 }
1282 if self.exists_in_upper(path) {
1283 return self
1284 .upper
1285 .as_mut()
1286 .expect("upper must exist when path exists")
1287 .read_file(path);
1288 }
1289 let Some(index) = self.find_lower_by_exists(path) else {
1290 return Err(Self::entry_not_found(path));
1291 };
1292 self.lowers[index].read_file(path)
1293 }
1294
1295 fn read_dir(&mut self, path: &str) -> VfsResult<Vec<String>> {
1296 if self.touches_internal_metadata(path) {
1297 return Err(Self::directory_not_found(path));
1298 }
1299 if self.is_whited_out(path) {
1300 return Err(Self::directory_not_found(path));
1301 }
1302
1303 let normalized = Self::normalized(path);
1304 let mut directory_exists = false;
1305 let mut entries = BTreeSet::new();
1306 let upper = self.upper.as_ref();
1307 let include_lowers = !Self::marker_exists_in_upper(upper, OverlayMarkerKind::Opaque, path);
1308
1309 if include_lowers {
1310 for lower in self.lowers.iter_mut().rev() {
1311 if let Ok(lower_entries) = lower.read_dir(path) {
1312 directory_exists = true;
1313 for entry in lower_entries {
1314 if entry == "."
1315 || entry == ".."
1316 || Self::should_hide_directory_entry(path, &entry)
1317 {
1318 continue;
1319 }
1320 let child_path = if normalized == "/" {
1321 format!("/{entry}")
1322 } else {
1323 format!("{normalized}/{entry}")
1324 };
1325 if !Self::marker_exists_in_upper(
1326 upper,
1327 OverlayMarkerKind::Whiteout,
1328 &child_path,
1329 ) {
1330 entries.insert(entry);
1331 }
1332 }
1333 }
1334 }
1335 }
1336
1337 if let Some(upper) = self.upper.as_mut() {
1338 if let Ok(upper_entries) = upper.read_dir(path) {
1339 directory_exists = true;
1340 for entry in upper_entries {
1341 if entry == "."
1342 || entry == ".."
1343 || Self::should_hide_directory_entry(path, &entry)
1344 {
1345 continue;
1346 }
1347 entries.insert(entry);
1348 }
1349 }
1350 }
1351
1352 if !directory_exists {
1353 return Err(Self::directory_not_found(path));
1354 }
1355
1356 Ok(entries.into_iter().collect())
1357 }
1358
1359 fn read_dir_limited(&mut self, path: &str, max_entries: usize) -> VfsResult<Vec<String>> {
1360 if self.touches_internal_metadata(path) {
1361 return Err(Self::directory_not_found(path));
1362 }
1363 if self.is_whited_out(path) {
1364 return Err(Self::directory_not_found(path));
1365 }
1366
1367 let normalized = Self::normalized(path);
1368 let mut directory_exists = false;
1369 let mut entries = BTreeSet::new();
1370 let upper = self.upper.as_ref();
1371 let include_lowers = !Self::marker_exists_in_upper(upper, OverlayMarkerKind::Opaque, path);
1372
1373 if include_lowers {
1374 for lower in self.lowers.iter_mut().rev() {
1375 let lower_entries = match lower.read_dir_filtered_limited(
1376 path,
1377 max_entries.saturating_sub(entries.len()),
1378 |entry| {
1379 if entry == "."
1380 || entry == ".."
1381 || Self::should_hide_directory_entry(path, entry)
1382 {
1383 return false;
1384 }
1385 let child_path = if normalized == "/" {
1386 format!("/{entry}")
1387 } else {
1388 format!("{normalized}/{entry}")
1389 };
1390 !Self::marker_exists_in_upper(
1391 upper,
1392 OverlayMarkerKind::Whiteout,
1393 &child_path,
1394 ) && !entries.contains(entry)
1395 },
1396 ) {
1397 Ok(entries) => entries,
1398 Err(error) if error.code() == "ENOENT" || error.code() == "ENOTDIR" => {
1399 continue;
1400 }
1401 Err(error) => return Err(error),
1402 };
1403 directory_exists = true;
1404 for entry in lower_entries {
1405 entries.insert(entry);
1406 if entries.len() > max_entries {
1407 return Err(VfsError::new(
1408 "ENOMEM",
1409 format!(
1410 "directory listing for '{path}' exceeds configured limit of {max_entries} entries"
1411 ),
1412 ));
1413 }
1414 }
1415 }
1416 }
1417
1418 if let Some(upper) = self.upper.as_mut() {
1419 let upper_entries = match upper.read_dir_filtered_limited(
1420 path,
1421 max_entries.saturating_sub(entries.len()),
1422 |entry| {
1423 entry != "."
1424 && entry != ".."
1425 && !Self::should_hide_directory_entry(path, entry)
1426 && !entries.contains(entry)
1427 },
1428 ) {
1429 Ok(entries) => entries,
1430 Err(error) if error.code() == "ENOENT" => Vec::new(),
1431 Err(error) => return Err(error),
1432 };
1433 directory_exists = directory_exists || upper.exists(path);
1434 for entry in upper_entries {
1435 if entry == "." || entry == ".." || Self::should_hide_directory_entry(path, &entry)
1436 {
1437 continue;
1438 }
1439 entries.insert(entry);
1440 if entries.len() > max_entries {
1441 return Err(VfsError::new(
1442 "ENOMEM",
1443 format!(
1444 "directory listing for '{path}' exceeds configured limit of {max_entries} entries"
1445 ),
1446 ));
1447 }
1448 }
1449 }
1450
1451 if !directory_exists {
1452 return Err(Self::directory_not_found(path));
1453 }
1454
1455 Ok(entries.into_iter().collect())
1456 }
1457
1458 fn read_dir_with_types(&mut self, path: &str) -> VfsResult<Vec<VirtualDirEntry>> {
1459 if self.touches_internal_metadata(path) {
1460 return Err(Self::directory_not_found(path));
1461 }
1462 self.read_dir_with_types_inner(path)
1463 }
1464
1465 fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
1466 if self.touches_internal_metadata(path) {
1467 return Err(VfsError::permission_denied("open", path));
1468 }
1469 self.clear_path_metadata(path)?;
1470 if self.find_lower_by_entry(path).is_some() {
1471 self.copy_up_path(path)?;
1472 } else {
1473 self.ensure_ancestor_directories_in_upper(path)?;
1474 }
1475 self.writable_upper(path)?.write_file(path, content.into())
1476 }
1477
1478 fn create_file_exclusive(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
1479 if self.touches_internal_metadata(path) {
1480 return Err(VfsError::permission_denied("open", path));
1481 }
1482 self.clear_path_metadata(path)?;
1483 if self.path_exists_in_merged_view(path) {
1484 return Err(Self::already_exists(path));
1485 }
1486 self.ensure_ancestor_directories_in_upper(path)?;
1487 self.writable_upper(path)?
1488 .create_file_exclusive(path, content.into())
1489 }
1490
1491 fn append_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<u64> {
1492 if self.touches_internal_metadata(path) {
1493 return Err(VfsError::permission_denied("open", path));
1494 }
1495 self.clear_path_metadata(path)?;
1496 if self.find_lower_by_entry(path).is_some() {
1497 self.copy_up_path(path)?;
1498 } else {
1499 self.ensure_ancestor_directories_in_upper(path)?;
1500 }
1501 self.writable_upper(path)?.append_file(path, content.into())
1502 }
1503
1504 fn create_dir(&mut self, path: &str) -> VfsResult<()> {
1505 if self.touches_internal_metadata(path) {
1506 return Err(VfsError::permission_denied("mkdir", path));
1507 }
1508 self.clear_path_metadata(path)?;
1509 if self.path_exists_in_merged_view(path) {
1510 return Err(Self::already_exists(path));
1511 }
1512 self.ensure_ancestor_directories_in_upper(path)?;
1513 self.writable_upper(path)?.create_dir(path)
1514 }
1515
1516 fn mkdir(&mut self, path: &str, recursive: bool) -> VfsResult<()> {
1517 if self.touches_internal_metadata(path) {
1518 return Err(VfsError::permission_denied("mkdir", path));
1519 }
1520 self.clear_path_metadata(path)?;
1521 if self.path_exists_in_merged_view(path) {
1522 let stat = self.merged_lstat(path)?;
1523 if recursive && stat.is_directory && !stat.is_symbolic_link {
1524 return Ok(());
1525 }
1526 return Err(Self::already_exists(path));
1527 }
1528 self.ensure_ancestor_directories_in_upper(path)?;
1529 self.writable_upper(path)?.mkdir(path, recursive)
1530 }
1531
1532 fn exists(&self, path: &str) -> bool {
1533 if self.touches_internal_metadata(path) {
1534 return false;
1535 }
1536 self.path_exists_in_merged_view(path)
1537 }
1538
1539 fn stat(&mut self, path: &str) -> VfsResult<VirtualStat> {
1540 if self.touches_internal_metadata(path) {
1541 return Err(Self::entry_not_found(path));
1542 }
1543 if self.is_whited_out(path) {
1544 return Err(Self::entry_not_found(path));
1545 }
1546 if self.exists_in_upper(path) {
1547 return self
1548 .upper
1549 .as_mut()
1550 .expect("upper must exist when path exists")
1551 .stat(path);
1552 }
1553 let Some(index) = self.find_lower_by_exists(path) else {
1554 return Err(Self::entry_not_found(path));
1555 };
1556 self.lowers[index].stat(path)
1557 }
1558
1559 fn remove_file(&mut self, path: &str) -> VfsResult<()> {
1560 if self.touches_internal_metadata(path) {
1561 return Err(VfsError::permission_denied("unlink", path));
1562 }
1563 if self.is_whited_out(path) {
1564 return Err(Self::entry_not_found(path));
1565 }
1566 let lower_exists = self.find_lower_by_exists(path).is_some();
1567 let upper_exists = self.exists_in_upper(path);
1568 if !lower_exists && !upper_exists {
1569 return Err(Self::entry_not_found(path));
1570 }
1571 if upper_exists {
1572 self.writable_upper(path)?.remove_file(path)?;
1573 } else {
1574 self.writable_upper(path)?;
1575 }
1576 self.clear_opaque_directory(path)?;
1577 self.add_whiteout(path)?;
1578 Ok(())
1579 }
1580
1581 fn remove_dir(&mut self, path: &str) -> VfsResult<()> {
1582 let normalized = Self::normalized(path);
1583 if self.touches_internal_metadata(&normalized) {
1584 return Err(VfsError::permission_denied("rmdir", path));
1585 }
1586 if normalized == "/" {
1587 return Err(VfsError::permission_denied("rmdir", path));
1588 }
1589
1590 let stat = match self.merged_lstat(path) {
1591 Ok(stat) => stat,
1592 Err(error) if error.code() == "ENOENT" => return Err(Self::directory_not_found(path)),
1593 Err(error) => return Err(error),
1594 };
1595
1596 if !stat.is_directory || stat.is_symbolic_link {
1597 return Err(Self::not_directory(path));
1598 }
1599
1600 if self.directory_has_raw_children(path)? {
1601 return Err(Self::not_empty(path));
1602 }
1603
1604 let lower_exists = self.find_lower_by_entry(path).is_some();
1605 let upper_exists = self.has_entry_in_upper(path);
1606 if upper_exists {
1607 self.writable_upper(path)?.remove_dir(&normalized)?;
1608 } else {
1609 self.writable_upper(path)?;
1610 }
1611 if lower_exists {
1612 self.clear_opaque_directory(path)?;
1613 self.add_whiteout(path)?;
1614 } else {
1615 self.clear_path_metadata(path)?;
1616 }
1617 Ok(())
1618 }
1619
1620 fn rename(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
1621 let old_normalized = Self::normalized(old_path);
1622 let new_normalized = Self::normalized(new_path);
1623 if self.touches_internal_metadata(&old_normalized)
1624 || self.touches_internal_metadata(&new_normalized)
1625 {
1626 return Err(VfsError::permission_denied("rename", old_path));
1627 }
1628
1629 if old_normalized == "/" {
1630 return Err(VfsError::permission_denied("rename", old_path));
1631 }
1632
1633 if old_normalized == new_normalized {
1634 return Ok(());
1635 }
1636
1637 let source_stat = self.merged_lstat(old_path)?;
1638 self.validate_destination_parent(&new_normalized)?;
1639 let resolved_new_normalized = self.resolved_destination_path(&new_normalized)?;
1640
1641 if old_normalized == resolved_new_normalized {
1642 return Ok(());
1643 }
1644
1645 if source_stat.is_directory
1646 && resolved_new_normalized.starts_with(&(old_normalized.clone() + "/"))
1647 {
1648 return Err(VfsError::new(
1649 "EINVAL",
1650 format!(
1651 "cannot move '{}' into its own descendant '{}'",
1652 old_path, new_path
1653 ),
1654 ));
1655 }
1656
1657 for path in self.destination_parent_copy_up_paths(&new_normalized)? {
1658 self.materialize_destination_parent_in_upper(&path)?;
1659 }
1660
1661 let mut snapshot_entries = Vec::new();
1662 self.collect_snapshot_entries(&old_normalized, &mut snapshot_entries)?;
1663
1664 if let Ok(destination_stat) = self.merged_lstat(&resolved_new_normalized) {
1665 if destination_stat.is_directory
1666 && !destination_stat.is_symbolic_link
1667 && self.directory_has_visible_entries_limited(&resolved_new_normalized)?
1668 {
1669 return Err(Self::not_empty(&resolved_new_normalized));
1670 }
1671
1672 if self.has_entry_in_upper(&resolved_new_normalized) {
1673 if destination_stat.is_directory && !destination_stat.is_symbolic_link {
1674 self.writable_upper(&resolved_new_normalized)?
1675 .remove_dir(&resolved_new_normalized)?;
1676 } else {
1677 self.writable_upper(&resolved_new_normalized)?
1678 .remove_file(&resolved_new_normalized)?;
1679 }
1680 }
1681 self.clear_subtree_metadata(&resolved_new_normalized)?;
1682 }
1683
1684 self.stage_snapshot_entries_in_upper(&snapshot_entries)?;
1685 self.copy_subtree_metadata(&old_normalized, &resolved_new_normalized)?;
1686 self.writable_upper(&old_normalized)?
1687 .rename(&old_normalized, &resolved_new_normalized)?;
1688 self.remove_snapshot_entries(&snapshot_entries)
1689 }
1690
1691 fn realpath(&self, path: &str) -> VfsResult<String> {
1692 if self.touches_internal_metadata(path) {
1693 return Err(Self::entry_not_found(path));
1694 }
1695 if self.is_whited_out(path) {
1696 return Err(Self::entry_not_found(path));
1697 }
1698 if self.exists_in_upper(path) {
1699 return self
1700 .upper
1701 .as_ref()
1702 .expect("upper must exist when path exists")
1703 .realpath(path);
1704 }
1705 let Some(index) = self.find_lower_by_exists(path) else {
1706 return Err(Self::entry_not_found(path));
1707 };
1708 self.lowers[index].realpath(path)
1709 }
1710
1711 fn symlink(&mut self, target: &str, link_path: &str) -> VfsResult<()> {
1712 if self.touches_internal_metadata(link_path) {
1713 return Err(VfsError::permission_denied("symlink", link_path));
1714 }
1715 self.clear_path_metadata(link_path)?;
1716 self.ensure_ancestor_directories_in_upper(link_path)?;
1717 self.writable_upper(link_path)?.symlink(target, link_path)
1718 }
1719
1720 fn read_link(&self, path: &str) -> VfsResult<String> {
1721 if self.touches_internal_metadata(path) {
1722 return Err(Self::entry_not_found(path));
1723 }
1724 if self.is_whited_out(path) {
1725 return Err(Self::entry_not_found(path));
1726 }
1727 if self.has_entry_in_upper(path) {
1728 return self
1729 .upper
1730 .as_ref()
1731 .expect("upper must exist when path exists")
1732 .read_link(path);
1733 }
1734 let Some((index, _)) = self.find_lower_by_entry(path) else {
1735 return Err(Self::entry_not_found(path));
1736 };
1737 self.lowers[index].read_link(path)
1738 }
1739
1740 fn lstat(&self, path: &str) -> VfsResult<VirtualStat> {
1741 if self.touches_internal_metadata(path) {
1742 return Err(Self::entry_not_found(path));
1743 }
1744 if self.is_whited_out(path) {
1745 return Err(Self::entry_not_found(path));
1746 }
1747 if self.has_entry_in_upper(path) {
1748 return self
1749 .upper
1750 .as_ref()
1751 .expect("upper must exist when path exists")
1752 .lstat(path);
1753 }
1754 self.find_lower_by_entry(path)
1755 .map(|(_, stat)| stat)
1756 .ok_or_else(|| Self::entry_not_found(path))
1757 }
1758
1759 fn link(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
1760 if self.touches_internal_metadata(old_path) || self.touches_internal_metadata(new_path) {
1761 return Err(VfsError::permission_denied("link", new_path));
1762 }
1763 self.clear_path_metadata(new_path)?;
1764 self.copy_up_path(old_path)?;
1765 self.ensure_ancestor_directories_in_upper(new_path)?;
1766 self.writable_upper(new_path)?.link(old_path, new_path)
1767 }
1768
1769 fn chmod(&mut self, path: &str, mode: u32) -> VfsResult<()> {
1770 if self.touches_internal_metadata(path) {
1771 return Err(VfsError::permission_denied("chmod", path));
1772 }
1773 if self.is_whited_out(path) {
1774 return Err(Self::entry_not_found(path));
1775 }
1776 if !self.exists_in_upper(path) {
1777 self.copy_up_path(path)?;
1778 }
1779 self.writable_upper(path)?.chmod(path, mode)
1780 }
1781
1782 fn chown(&mut self, path: &str, uid: u32, gid: u32) -> VfsResult<()> {
1783 if self.touches_internal_metadata(path) {
1784 return Err(VfsError::permission_denied("chown", path));
1785 }
1786 if self.is_whited_out(path) {
1787 return Err(Self::entry_not_found(path));
1788 }
1789 if !self.exists_in_upper(path) {
1790 self.copy_up_path(path)?;
1791 }
1792 self.writable_upper(path)?.chown(path, uid, gid)
1793 }
1794
1795 fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> VfsResult<()> {
1796 if self.touches_internal_metadata(path) {
1797 return Err(VfsError::permission_denied("utime", path));
1798 }
1799 if self.is_whited_out(path) {
1800 return Err(Self::entry_not_found(path));
1801 }
1802 if !self.exists_in_upper(path) {
1803 self.copy_up_path(path)?;
1804 }
1805 self.writable_upper(path)?.utimes(path, atime_ms, mtime_ms)
1806 }
1807
1808 fn utimes_spec(
1809 &mut self,
1810 path: &str,
1811 atime: VirtualUtimeSpec,
1812 mtime: VirtualUtimeSpec,
1813 follow_symlinks: bool,
1814 ) -> VfsResult<()> {
1815 if self.touches_internal_metadata(path) {
1816 return Err(VfsError::permission_denied("utime", path));
1817 }
1818 if self.is_whited_out(path) {
1819 return Err(Self::entry_not_found(path));
1820 }
1821 if !self.exists_in_upper(path) {
1822 self.copy_up_path(path)?;
1823 }
1824 self.writable_upper(path)?
1825 .utimes_spec(path, atime, mtime, follow_symlinks)
1826 }
1827
1828 fn truncate(&mut self, path: &str, length: u64) -> VfsResult<()> {
1829 if self.touches_internal_metadata(path) {
1830 return Err(VfsError::permission_denied("truncate", path));
1831 }
1832 if self.is_whited_out(path) {
1833 return Err(Self::entry_not_found(path));
1834 }
1835 if !self.exists_in_upper(path) {
1836 self.copy_up_path(path)?;
1837 }
1838 self.writable_upper(path)?.truncate(path, length)
1839 }
1840
1841 fn pread(&mut self, path: &str, offset: u64, length: usize) -> VfsResult<Vec<u8>> {
1842 if self.touches_internal_metadata(path) {
1843 return Err(Self::entry_not_found(path));
1844 }
1845 if self.is_whited_out(path) {
1846 return Err(Self::entry_not_found(path));
1847 }
1848 if self.exists_in_upper(path) {
1849 return self
1850 .upper
1851 .as_mut()
1852 .expect("upper must exist when path exists")
1853 .pread(path, offset, length);
1854 }
1855 let Some(index) = self.find_lower_by_exists(path) else {
1856 return Err(Self::entry_not_found(path));
1857 };
1858 self.lowers[index].pread(path, offset, length)
1859 }
1860}
1861
1862#[cfg(test)]
1863mod tests {
1864 use super::{OverlayFileSystem, OverlayMode};
1865 use crate::posix::vfs::{MemoryFileSystem, VfsResult, VirtualFileSystem};
1866
1867 #[test]
1868 fn symlink_into_metadata_namespace_cannot_read_or_resurrect_whiteouts() {
1869 let mut lower = MemoryFileSystem::new();
1870 lower.mkdir("/data", true).expect("create lower directory");
1871 lower
1872 .write_file("/data/secret.txt", b"secret".to_vec())
1873 .expect("seed lower file");
1874
1875 let mut overlay = OverlayFileSystem::with_upper(vec![lower], MemoryFileSystem::new());
1876
1877 overlay
1880 .remove_file("/data/secret.txt")
1881 .expect("whiteout lower file");
1882 assert!(!overlay.exists("/data/secret.txt"));
1883
1884 overlay
1887 .symlink("/.secure-exec-overlay/whiteouts", "/escape")
1888 .expect("creating the symlink itself is allowed");
1889
1890 assert!(
1892 overlay.read_dir("/escape").is_err(),
1893 "listing the metadata namespace via a symlink must be denied"
1894 );
1895
1896 assert!(
1899 overlay.remove_file("/escape/anything").is_err(),
1900 "tampering with metadata via a symlink must be denied"
1901 );
1902 assert!(
1903 !overlay.exists("/data/secret.txt"),
1904 "deleted lower-layer file must stay deleted"
1905 );
1906
1907 overlay
1909 .symlink("/", "/rootlink")
1910 .expect("symlink to root is allowed");
1911 assert!(
1912 overlay
1913 .read_dir("/rootlink/.secure-exec-overlay/whiteouts")
1914 .is_err(),
1915 "metadata must be unreachable via an ancestor symlink too"
1916 );
1917 }
1918
1919 #[test]
1920 fn whiteouts_persist_when_overlay_reopens_with_same_upper() {
1921 let mut lower = MemoryFileSystem::new();
1922 lower.mkdir("/data", true).expect("create lower directory");
1923 lower
1924 .write_file("/data/base.txt", b"base".to_vec())
1925 .expect("seed lower file");
1926 let lower_snapshot = lower.snapshot();
1927
1928 let mut overlay = OverlayFileSystem::with_upper(
1929 vec![MemoryFileSystem::from_snapshot(lower_snapshot.clone())],
1930 MemoryFileSystem::new(),
1931 );
1932 overlay
1933 .remove_file("/data/base.txt")
1934 .expect("whiteout lower file");
1935
1936 let upper = overlay.upper.take().expect("overlay upper");
1937 let restored_lower = MemoryFileSystem::from_snapshot(lower_snapshot);
1938 let mut restored = OverlayFileSystem::with_upper(vec![restored_lower], upper);
1939
1940 assert!(!restored.exists("/data/base.txt"));
1941 assert_eq!(
1942 restored.read_dir("/data").expect("read merged directory"),
1943 Vec::<String>::new()
1944 );
1945 }
1946
1947 #[test]
1948 fn copied_up_directories_become_opaque_and_hide_overlay_metadata() {
1949 let mut lower = MemoryFileSystem::new();
1950 lower.mkdir("/data", true).expect("create lower directory");
1951 lower
1952 .write_file("/data/base.txt", b"base".to_vec())
1953 .expect("seed lower file");
1954
1955 let mut overlay = OverlayFileSystem::new(vec![lower], OverlayMode::Ephemeral);
1956 overlay
1957 .chmod("/data", 0o700)
1958 .expect("copy up lower directory");
1959
1960 assert_eq!(
1961 overlay.read_dir("/data").expect("read opaque directory"),
1962 Vec::<String>::new()
1963 );
1964 let root_entries = overlay.read_dir("/").expect("read root");
1965 assert!(!root_entries
1966 .iter()
1967 .any(|entry| entry == ".secure-exec-overlay"));
1968 }
1969
1970 #[test]
1971 fn remove_dir_succeeds_when_only_lower_children_are_whited_out() {
1972 let mut lower = MemoryFileSystem::new();
1973 lower.mkdir("/a", true).expect("create lower directory");
1974 lower
1975 .write_file("/a/c", b"child".to_vec())
1976 .expect("seed lower child");
1977
1978 let mut overlay = OverlayFileSystem::new(vec![lower], OverlayMode::Ephemeral);
1979 overlay.remove_file("/a/c").expect("whiteout lower child");
1980 overlay
1981 .remove_dir("/a")
1982 .expect("remove merged-empty directory");
1983
1984 assert!(!overlay.exists("/a"));
1985 assert_error_code(overlay.read_dir("/a"), "ENOENT");
1986 }
1987
1988 #[test]
1989 fn remove_dir_still_rejects_visible_children() {
1990 let mut lower = MemoryFileSystem::new();
1991 lower.mkdir("/a", true).expect("create lower directory");
1992 lower
1993 .write_file("/a/c", b"child".to_vec())
1994 .expect("seed lower child");
1995
1996 let mut overlay = OverlayFileSystem::new(vec![lower], OverlayMode::Ephemeral);
1997 assert_error_code(overlay.remove_dir("/a"), "ENOTEMPTY");
1998 assert!(overlay.exists("/a/c"));
1999 }
2000
2001 fn assert_error_code<T: std::fmt::Debug>(result: VfsResult<T>, expected: &str) {
2002 let error = result.expect_err("expected operation to fail");
2003 assert_eq!(error.code(), expected);
2004 }
2005}