Skip to main content

secure_exec_kernel/
overlay_fs.rs

1use crate::vfs::{
2    normalize_path, MemoryFileSystem, VfsError, VfsResult, VirtualDirEntry, VirtualFileSystem,
3    VirtualStat, VirtualUtimeSpec,
4};
5use base64::Engine;
6use std::collections::BTreeSet;
7
8const MAX_SNAPSHOT_DEPTH: usize = 1024;
9const OVERLAY_METADATA_ROOT: &str = "/.secure-exec-overlay";
10const OVERLAY_WHITEOUT_DIR: &str = "/.secure-exec-overlay/whiteouts";
11const OVERLAY_OPAQUE_DIR: &str = "/.secure-exec-overlay/opaque";
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum OverlayMode {
15    Ephemeral,
16    ReadOnly,
17}
18
19#[derive(Debug)]
20pub struct OverlayFileSystem {
21    lowers: Vec<MemoryFileSystem>,
22    upper: Option<MemoryFileSystem>,
23    writes_locked: bool,
24}
25
26#[derive(Debug, Clone, Copy)]
27enum OverlayMarkerKind {
28    Whiteout,
29    Opaque,
30}
31
32#[derive(Debug)]
33enum OverlaySnapshotKind {
34    Directory,
35    File(Vec<u8>),
36    Symlink(String),
37}
38
39#[derive(Debug)]
40struct OverlaySnapshotEntry {
41    path: String,
42    stat: VirtualStat,
43    kind: OverlaySnapshotKind,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
47struct OverlayCopyUpUsage {
48    total_bytes: u64,
49    inode_count: usize,
50}
51
52impl OverlayFileSystem {
53    pub fn new(lowers: Vec<MemoryFileSystem>, mode: OverlayMode) -> Self {
54        let mut effective_lowers = lowers;
55        if effective_lowers.is_empty() {
56            effective_lowers.push(MemoryFileSystem::new());
57        }
58
59        let mut upper = match mode {
60            OverlayMode::Ephemeral => Some(MemoryFileSystem::new()),
61            OverlayMode::ReadOnly => None,
62        };
63        if let Some(upper_filesystem) = upper.as_mut() {
64            sync_upper_root_metadata(upper_filesystem, &effective_lowers);
65        }
66
67        Self {
68            lowers: effective_lowers,
69            upper,
70            writes_locked: matches!(mode, OverlayMode::ReadOnly),
71        }
72    }
73
74    pub fn with_upper(lowers: Vec<MemoryFileSystem>, upper: MemoryFileSystem) -> Self {
75        let mut effective_lowers = lowers;
76        if effective_lowers.is_empty() {
77            effective_lowers.push(MemoryFileSystem::new());
78        }
79
80        Self {
81            lowers: effective_lowers,
82            upper: Some(upper),
83            writes_locked: false,
84        }
85    }
86
87    pub fn lock_writes(&mut self) {
88        self.writes_locked = true;
89    }
90
91    fn normalized(path: &str) -> String {
92        normalize_path(path)
93    }
94
95    fn parent_path(path: &str) -> String {
96        let normalized = Self::normalized(path);
97        if normalized == "/" {
98            return String::from("/");
99        }
100
101        match normalized.rsplit_once('/') {
102            Some(("", _)) | None => String::from("/"),
103            Some((parent, _)) => String::from(parent),
104        }
105    }
106
107    fn basename(path: &str) -> String {
108        let normalized = Self::normalized(path);
109        if normalized == "/" {
110            return String::from("/");
111        }
112        normalized
113            .rsplit('/')
114            .find(|component| !component.is_empty())
115            .unwrap_or("")
116            .to_owned()
117    }
118
119    fn validate_destination_parent(&mut self, path: &str) -> VfsResult<()> {
120        let parent = Self::parent_path(path);
121        let resolved_parent = self.resolve_merged_path(&parent, true, 0)?;
122        let stat = self.merged_lstat(&resolved_parent)?;
123        if !stat.is_directory {
124            return Err(Self::not_directory(&parent));
125        }
126        Ok(())
127    }
128
129    fn resolved_destination_path(&self, path: &str) -> VfsResult<String> {
130        let parent = Self::parent_path(path);
131        let resolved_parent = self.resolve_merged_path(&parent, true, 0)?;
132        Ok(Self::join_path(&resolved_parent, &Self::basename(path)))
133    }
134
135    fn resolve_merged_path(
136        &self,
137        path: &str,
138        follow_final_symlink: bool,
139        depth: usize,
140    ) -> VfsResult<String> {
141        if depth > MAX_SNAPSHOT_DEPTH {
142            return Err(VfsError::new(
143                "ELOOP",
144                format!("too many symbolic links while resolving '{path}'"),
145            ));
146        }
147
148        let normalized = Self::normalized(path);
149        if normalized == "/" {
150            return Ok(normalized);
151        }
152
153        let components: Vec<&str> = normalized
154            .split('/')
155            .filter(|component| !component.is_empty())
156            .collect();
157        let mut current = String::from("/");
158
159        for (index, component) in components.iter().enumerate() {
160            let candidate = Self::join_path(&current, component);
161            let is_final = index + 1 == components.len();
162            let should_follow = !is_final || follow_final_symlink;
163
164            if should_follow {
165                if let Ok(stat) = self.merged_lstat(&candidate) {
166                    if stat.is_symbolic_link {
167                        let target = self.read_link(&candidate)?;
168                        let target_path = if target.starts_with('/') {
169                            Self::normalized(&target)
170                        } else {
171                            Self::normalized(&Self::join_path(
172                                &Self::parent_path(&candidate),
173                                &target,
174                            ))
175                        };
176                        let remainder = components[index + 1..].join("/");
177                        let next_path = if remainder.is_empty() {
178                            target_path
179                        } else {
180                            Self::normalized(&Self::join_path(&target_path, &remainder))
181                        };
182                        return self.resolve_merged_path(
183                            &next_path,
184                            follow_final_symlink,
185                            depth + 1,
186                        );
187                    }
188
189                    if !is_final && !stat.is_directory {
190                        return Err(Self::not_directory(&candidate));
191                    }
192                }
193            } else if let Ok(stat) = self.merged_lstat(&candidate) {
194                if !is_final && !stat.is_directory {
195                    return Err(Self::not_directory(&candidate));
196                }
197            }
198
199            current = candidate;
200        }
201
202        Ok(current)
203    }
204
205    fn destination_parent_copy_up_paths(&self, path: &str) -> VfsResult<Vec<String>> {
206        let parent = Self::parent_path(path);
207        let mut paths = Vec::new();
208        let mut seen = BTreeSet::new();
209        self.collect_destination_parent_copy_up_paths(&parent, &mut paths, &mut seen, 0)?;
210        Ok(paths)
211    }
212
213    fn collect_destination_parent_copy_up_paths(
214        &self,
215        parent: &str,
216        paths: &mut Vec<String>,
217        seen: &mut BTreeSet<String>,
218        depth: usize,
219    ) -> VfsResult<()> {
220        if depth > MAX_SNAPSHOT_DEPTH {
221            return Err(VfsError::new(
222                "ELOOP",
223                format!("too many symbolic links while resolving '{parent}'"),
224            ));
225        }
226
227        let normalized = Self::normalized(parent);
228        if normalized == "/" {
229            return Ok(());
230        }
231
232        let components: Vec<&str> = normalized
233            .split('/')
234            .filter(|component| !component.is_empty())
235            .collect();
236        let mut current = String::from("/");
237        for (index, component) in components.iter().enumerate() {
238            current = Self::join_path(&current, component);
239            let stat = self.merged_lstat(&current)?;
240
241            if stat.is_symbolic_link {
242                if !self.has_entry_in_upper(&current) && seen.insert(current.clone()) {
243                    paths.push(current.clone());
244                }
245
246                let target = self.read_link(&current)?;
247                let target_path = if target.starts_with('/') {
248                    Self::normalized(&target)
249                } else {
250                    Self::normalized(&Self::join_path(&Self::parent_path(&current), &target))
251                };
252                let remainder = components[index + 1..].join("/");
253                let next_parent = if remainder.is_empty() {
254                    target_path
255                } else {
256                    Self::normalized(&Self::join_path(&target_path, &remainder))
257                };
258                return self.collect_destination_parent_copy_up_paths(
259                    &next_parent,
260                    paths,
261                    seen,
262                    depth + 1,
263                );
264            }
265
266            if self.find_lower_by_entry(&current).is_some()
267                && !self.has_entry_in_upper(&current)
268                && seen.insert(current.clone())
269            {
270                paths.push(current.clone());
271            }
272        }
273
274        Ok(())
275    }
276
277    fn encode_marker_path(path: &str) -> String {
278        base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(path)
279    }
280
281    fn marker_directory(kind: OverlayMarkerKind) -> &'static str {
282        match kind {
283            OverlayMarkerKind::Whiteout => OVERLAY_WHITEOUT_DIR,
284            OverlayMarkerKind::Opaque => OVERLAY_OPAQUE_DIR,
285        }
286    }
287
288    fn marker_path(kind: OverlayMarkerKind, path: &str) -> String {
289        format!(
290            "{}/{}",
291            Self::marker_directory(kind),
292            Self::encode_marker_path(&Self::normalized(path))
293        )
294    }
295
296    fn is_internal_metadata_path(path: &str) -> bool {
297        let normalized = Self::normalized(path);
298        normalized == OVERLAY_METADATA_ROOT
299            || normalized.starts_with(&(String::from(OVERLAY_METADATA_ROOT) + "/"))
300    }
301
302    fn hidden_root_entry_name() -> &'static str {
303        ".secure-exec-overlay"
304    }
305
306    fn should_hide_directory_entry(path: &str, entry: &str) -> bool {
307        let normalized = Self::normalized(path);
308        normalized == "/" && entry == Self::hidden_root_entry_name()
309    }
310
311    fn should_ignore_raw_directory_entry(
312        upper: Option<&MemoryFileSystem>,
313        path: &str,
314        entry: &str,
315    ) -> bool {
316        if entry == "." || entry == ".." || Self::should_hide_directory_entry(path, entry) {
317            return true;
318        }
319
320        let entry_path = Self::join_path(path, entry);
321        Self::marker_exists_in_upper(upper, OverlayMarkerKind::Whiteout, &entry_path)
322    }
323
324    fn check_copy_up_usage_limits(
325        usage: &OverlayCopyUpUsage,
326        max_bytes: Option<u64>,
327        max_inodes: Option<usize>,
328    ) -> VfsResult<()> {
329        if let Some(limit) = max_bytes {
330            if usage.total_bytes > limit {
331                return Err(VfsError::new(
332                    "ENOSPC",
333                    format!(
334                        "overlay rename copy-up bytes {} exceed configured limit {}",
335                        usage.total_bytes, limit
336                    ),
337                ));
338            }
339        }
340
341        if let Some(limit) = max_inodes {
342            if usage.inode_count > limit {
343                return Err(VfsError::new(
344                    "ENOSPC",
345                    format!(
346                        "overlay rename copy-up inodes {} exceed configured limit {}",
347                        usage.inode_count, limit
348                    ),
349                ));
350            }
351        }
352
353        Ok(())
354    }
355
356    fn add_copy_up_usage(
357        usage: &mut OverlayCopyUpUsage,
358        bytes: u64,
359        inodes: usize,
360        max_bytes: Option<u64>,
361        max_inodes: Option<usize>,
362    ) -> VfsResult<()> {
363        usage.total_bytes = usage.total_bytes.saturating_add(bytes);
364        usage.inode_count = usage.inode_count.saturating_add(inodes);
365        Self::check_copy_up_usage_limits(usage, max_bytes, max_inodes)
366    }
367
368    fn remaining_inode_budget(
369        usage: &OverlayCopyUpUsage,
370        max_inodes: Option<usize>,
371    ) -> Option<usize> {
372        max_inodes.map(|limit| limit.saturating_sub(usage.inode_count))
373    }
374
375    fn copy_up_directory_entries_limited(
376        &mut self,
377        path: &str,
378        max_entries: Option<usize>,
379    ) -> VfsResult<Vec<String>> {
380        let Some(max_entries) = max_entries else {
381            return self.read_dir(path);
382        };
383
384        match self.read_dir_limited(path, max_entries) {
385            Ok(entries) => Ok(entries),
386            Err(error) if error.code() == "ENOMEM" => Err(VfsError::new(
387                "ENOSPC",
388                format!("overlay rename copy-up directory '{path}' exceeds configured inode limit"),
389            )),
390            Err(error) => Err(error),
391        }
392    }
393
394    fn directory_has_visible_entries_limited(&mut self, path: &str) -> VfsResult<bool> {
395        match self.read_dir_limited(path, 1) {
396            Ok(entries) => Ok(!entries.is_empty()),
397            Err(error) if error.code() == "ENOMEM" => Ok(true),
398            Err(error) => Err(error),
399        }
400    }
401
402    fn memory_subtree_usage_limited(
403        filesystem: &mut MemoryFileSystem,
404        path: &str,
405        max_bytes: Option<u64>,
406        max_inodes: Option<usize>,
407    ) -> VfsResult<OverlayCopyUpUsage> {
408        let mut usage = OverlayCopyUpUsage::default();
409        let mut visited = BTreeSet::new();
410        let mut pending = vec![Self::normalized(path)];
411        while let Some(current_path) = pending.pop() {
412            let stat = filesystem.lstat(&current_path)?;
413            if visited.insert(stat.ino) {
414                let bytes = if stat.is_directory && !stat.is_symbolic_link {
415                    0
416                } else {
417                    stat.size
418                };
419                Self::add_copy_up_usage(&mut usage, bytes, 1, max_bytes, max_inodes)?;
420            }
421
422            if stat.is_directory && !stat.is_symbolic_link {
423                let remaining = Self::remaining_inode_budget(&usage, max_inodes);
424                let children = if let Some(max_entries) = remaining {
425                    filesystem.read_dir_limited(&current_path, max_entries)?
426                } else {
427                    filesystem.read_dir(&current_path)?
428                };
429                for entry in children.into_iter().rev() {
430                    if matches!(entry.as_str(), "." | "..") {
431                        continue;
432                    }
433                    if Self::should_hide_directory_entry(&current_path, &entry) {
434                        continue;
435                    }
436                    pending.push(Self::join_path(&current_path, &entry));
437                }
438            }
439        }
440
441        Ok(usage)
442    }
443
444    fn memory_subtree_released_usage(
445        filesystem: &mut MemoryFileSystem,
446        path: &str,
447    ) -> VfsResult<OverlayCopyUpUsage> {
448        let mut usage = OverlayCopyUpUsage::default();
449        let mut visited = BTreeSet::new();
450        let mut pending = vec![Self::normalized(path)];
451        while let Some(current_path) = pending.pop() {
452            let stat = filesystem.lstat(&current_path)?;
453            if visited.insert(stat.ino) {
454                let subtree_links = filesystem.link_count_in_subtree(stat.ino, path) as u64;
455                if stat.is_directory || stat.nlink <= subtree_links {
456                    let bytes = if stat.is_directory && !stat.is_symbolic_link {
457                        0
458                    } else {
459                        stat.size
460                    };
461                    Self::add_copy_up_usage(&mut usage, bytes, 1, None, None)?;
462                }
463            }
464
465            if stat.is_directory && !stat.is_symbolic_link {
466                for entry in filesystem.read_dir(&current_path)?.into_iter().rev() {
467                    if matches!(entry.as_str(), "." | "..") {
468                        continue;
469                    }
470                    if Self::should_hide_directory_entry(&current_path, &entry) {
471                        continue;
472                    }
473                    pending.push(Self::join_path(&current_path, &entry));
474                }
475            }
476        }
477
478        Ok(usage)
479    }
480
481    fn upper_usage_limited(
482        &mut self,
483        max_bytes: Option<u64>,
484        max_inodes: Option<usize>,
485    ) -> VfsResult<OverlayCopyUpUsage> {
486        let Some(upper) = self.upper.as_mut() else {
487            return Ok(OverlayCopyUpUsage::default());
488        };
489
490        Self::memory_subtree_usage_limited(upper, "/", max_bytes, max_inodes)
491    }
492
493    fn upper_subtree_released_usage(&mut self, path: &str) -> VfsResult<OverlayCopyUpUsage> {
494        let Some(upper) = self.upper.as_mut() else {
495            return Ok(OverlayCopyUpUsage::default());
496        };
497
498        if !upper.exists(path) {
499            return Ok(OverlayCopyUpUsage::default());
500        }
501
502        Self::memory_subtree_released_usage(upper, path)
503    }
504
505    fn collect_copy_up_usage_limited(
506        &mut self,
507        path: &str,
508        usage: &mut OverlayCopyUpUsage,
509        max_bytes: Option<u64>,
510        max_inodes: Option<usize>,
511    ) -> VfsResult<()> {
512        let mut pending = vec![(Self::normalized(path), 0usize)];
513        while let Some((current_path, depth)) = pending.pop() {
514            if depth > MAX_SNAPSHOT_DEPTH {
515                return Err(VfsError::new(
516                    "EINVAL",
517                    format!("overlay snapshot depth limit exceeded at '{current_path}'"),
518                ));
519            }
520
521            let stat = self.lstat(&current_path)?;
522            if !self.has_entry_in_upper(&current_path) {
523                let bytes = if stat.is_symbolic_link {
524                    self.read_link(&current_path)?.len() as u64
525                } else if stat.is_directory {
526                    0
527                } else {
528                    stat.size
529                };
530                Self::add_copy_up_usage(usage, bytes, 1, max_bytes, max_inodes)?;
531            }
532
533            if stat.is_directory && !stat.is_symbolic_link {
534                let children = self.copy_up_directory_entries_limited(&current_path, max_inodes)?;
535                for entry in children.into_iter().rev() {
536                    pending.push((Self::join_path(&current_path, &entry), depth + 1));
537                }
538            }
539        }
540
541        Ok(())
542    }
543
544    fn collect_single_copy_up_usage_limited(
545        &mut self,
546        path: &str,
547        usage: &mut OverlayCopyUpUsage,
548        max_bytes: Option<u64>,
549        max_inodes: Option<usize>,
550    ) -> VfsResult<()> {
551        if self.has_entry_in_upper(path) {
552            return Ok(());
553        }
554
555        let stat = self.merged_lstat(path)?;
556        let bytes = if stat.is_symbolic_link {
557            self.read_link(path)?.len() as u64
558        } else if stat.is_directory {
559            0
560        } else {
561            stat.size
562        };
563        Self::add_copy_up_usage(usage, bytes, 1, max_bytes, max_inodes)
564    }
565
566    pub fn check_rename_copy_up_limits(
567        &mut self,
568        old_path: &str,
569        new_path: &str,
570        max_bytes: Option<u64>,
571        max_inodes: Option<usize>,
572    ) -> VfsResult<()> {
573        let old_normalized = Self::normalized(old_path);
574        let new_normalized = Self::normalized(new_path);
575        if Self::is_internal_metadata_path(&old_normalized)
576            || Self::is_internal_metadata_path(&new_normalized)
577        {
578            return Err(VfsError::permission_denied("rename", old_path));
579        }
580
581        if old_normalized == "/" {
582            return Err(VfsError::permission_denied("rename", old_path));
583        }
584
585        if old_normalized == new_normalized {
586            return Ok(());
587        }
588
589        let source_stat = self.merged_lstat(old_path)?;
590        if self.writes_locked {
591            self.writable_upper(&old_normalized)?;
592        }
593        self.validate_destination_parent(&new_normalized)?;
594        let resolved_new_normalized = self.resolved_destination_path(&new_normalized)?;
595
596        if old_normalized == resolved_new_normalized {
597            return Ok(());
598        }
599
600        if source_stat.is_directory
601            && resolved_new_normalized.starts_with(&(old_normalized.clone() + "/"))
602        {
603            return Err(VfsError::new(
604                "EINVAL",
605                format!(
606                    "cannot move '{}' into its own descendant '{}'",
607                    old_path, new_path
608                ),
609            ));
610        }
611
612        let destination_parent_copy_up_paths =
613            self.destination_parent_copy_up_paths(&new_normalized)?;
614
615        if let Ok(destination_stat) = self.merged_lstat(&resolved_new_normalized) {
616            if destination_stat.is_directory
617                && !destination_stat.is_symbolic_link
618                && self.directory_has_visible_entries_limited(&resolved_new_normalized)?
619            {
620                return Err(Self::not_empty(&resolved_new_normalized));
621            }
622        }
623
624        let mut usage = self.upper_usage_limited(None, None)?;
625        if self.has_entry_in_upper(&resolved_new_normalized) {
626            let destination_usage = self.upper_subtree_released_usage(&resolved_new_normalized)?;
627            usage.total_bytes = usage
628                .total_bytes
629                .saturating_sub(destination_usage.total_bytes);
630            usage.inode_count = usage
631                .inode_count
632                .saturating_sub(destination_usage.inode_count);
633        }
634        Self::check_copy_up_usage_limits(&usage, max_bytes, max_inodes)?;
635        for path in destination_parent_copy_up_paths {
636            self.collect_single_copy_up_usage_limited(&path, &mut usage, max_bytes, max_inodes)?;
637        }
638        self.collect_copy_up_usage_limited(&old_normalized, &mut usage, max_bytes, max_inodes)?;
639
640        Self::check_copy_up_usage_limits(&usage, max_bytes, max_inodes)
641    }
642
643    fn marker_exists(&self, kind: OverlayMarkerKind, path: &str) -> bool {
644        Self::marker_exists_in_upper(self.upper.as_ref(), kind, path)
645    }
646
647    fn marker_exists_in_upper(
648        upper: Option<&MemoryFileSystem>,
649        kind: OverlayMarkerKind,
650        path: &str,
651    ) -> bool {
652        upper.is_some_and(|filesystem| filesystem.exists(&Self::marker_path(kind, path)))
653    }
654
655    fn is_whited_out(&self, path: &str) -> bool {
656        self.marker_exists(OverlayMarkerKind::Whiteout, path)
657    }
658
659    fn ensure_metadata_directories_in_upper(&mut self, path: &str) -> VfsResult<()> {
660        let upper = self.writable_upper(path)?;
661        upper.mkdir(OVERLAY_METADATA_ROOT, true)?;
662        upper.mkdir(OVERLAY_WHITEOUT_DIR, true)?;
663        upper.mkdir(OVERLAY_OPAQUE_DIR, true)?;
664        Ok(())
665    }
666
667    fn set_marker(&mut self, kind: OverlayMarkerKind, path: &str, present: bool) -> VfsResult<()> {
668        let marker_path = Self::marker_path(kind, path);
669        if present {
670            self.ensure_metadata_directories_in_upper(path)?;
671            self.writable_upper(path)?
672                .write_file(&marker_path, Self::normalized(path).into_bytes())?;
673            return Ok(());
674        }
675
676        if self
677            .upper
678            .as_ref()
679            .is_some_and(|upper| upper.exists(&marker_path))
680        {
681            self.writable_upper(path)?.remove_file(&marker_path)?;
682        }
683        Ok(())
684    }
685
686    fn add_whiteout(&mut self, path: &str) -> VfsResult<()> {
687        self.set_marker(OverlayMarkerKind::Whiteout, path, true)
688    }
689
690    fn remove_whiteout(&mut self, path: &str) -> VfsResult<()> {
691        self.set_marker(OverlayMarkerKind::Whiteout, path, false)
692    }
693
694    fn mark_opaque_directory(&mut self, path: &str) -> VfsResult<()> {
695        self.set_marker(OverlayMarkerKind::Opaque, path, true)
696    }
697
698    fn clear_opaque_directory(&mut self, path: &str) -> VfsResult<()> {
699        self.set_marker(OverlayMarkerKind::Opaque, path, false)
700    }
701
702    fn clear_path_metadata(&mut self, path: &str) -> VfsResult<()> {
703        self.remove_whiteout(path)?;
704        self.clear_opaque_directory(path)
705    }
706
707    fn join_path(base: &str, name: &str) -> String {
708        if base == "/" {
709            format!("/{name}")
710        } else {
711            format!("{base}/{name}")
712        }
713    }
714
715    fn rebase_path(path: &str, old_root: &str, new_root: &str) -> String {
716        if path == old_root {
717            return String::from(new_root);
718        }
719
720        format!("{new_root}{}", &path[old_root.len()..])
721    }
722
723    fn read_only_error(path: &str) -> VfsError {
724        VfsError::new("EROFS", format!("read-only filesystem: {path}"))
725    }
726
727    fn entry_not_found(path: &str) -> VfsError {
728        VfsError::new("ENOENT", format!("no such file: {path}"))
729    }
730
731    fn directory_not_found(path: &str) -> VfsError {
732        VfsError::new("ENOENT", format!("no such directory: {path}"))
733    }
734
735    fn already_exists(path: &str) -> VfsError {
736        VfsError::new("EEXIST", format!("file exists: {path}"))
737    }
738
739    fn not_directory(path: &str) -> VfsError {
740        VfsError::new("ENOTDIR", format!("not a directory: {path}"))
741    }
742
743    fn writable_upper(&mut self, path: &str) -> VfsResult<&mut MemoryFileSystem> {
744        if self.writes_locked {
745            return Err(Self::read_only_error(path));
746        }
747        self.upper
748            .as_mut()
749            .ok_or_else(|| Self::read_only_error(path))
750    }
751
752    fn path_exists_in_filesystem(filesystem: &MemoryFileSystem, path: &str) -> bool {
753        filesystem.exists(path)
754    }
755
756    fn has_entry_in_filesystem(filesystem: &MemoryFileSystem, path: &str) -> bool {
757        filesystem.lstat(path).is_ok()
758    }
759
760    fn exists_in_upper(&self, path: &str) -> bool {
761        self.upper
762            .as_ref()
763            .is_some_and(|upper| Self::path_exists_in_filesystem(upper, path))
764    }
765
766    fn has_entry_in_upper(&self, path: &str) -> bool {
767        self.upper
768            .as_ref()
769            .is_some_and(|upper| Self::has_entry_in_filesystem(upper, path))
770    }
771
772    fn find_lower_by_exists(&self, path: &str) -> Option<usize> {
773        self.lowers
774            .iter()
775            .position(|lower| Self::path_exists_in_filesystem(lower, path))
776    }
777
778    fn find_lower_by_entry(&self, path: &str) -> Option<(usize, VirtualStat)> {
779        self.lowers
780            .iter()
781            .enumerate()
782            .find_map(|(index, lower)| lower.lstat(path).ok().map(|stat| (index, stat)))
783    }
784
785    fn merged_lstat(&self, path: &str) -> VfsResult<VirtualStat> {
786        if Self::is_internal_metadata_path(path) {
787            return Err(Self::entry_not_found(path));
788        }
789        if self.is_whited_out(path) {
790            return Err(Self::entry_not_found(path));
791        }
792        if self.has_entry_in_upper(path) {
793            return self
794                .upper
795                .as_ref()
796                .expect("upper must exist when entry exists")
797                .lstat(path);
798        }
799        self.find_lower_by_entry(path)
800            .map(|(_, stat)| stat)
801            .ok_or_else(|| Self::entry_not_found(path))
802    }
803
804    fn ensure_ancestor_directories_in_upper(&mut self, path: &str) -> VfsResult<()> {
805        if Self::is_internal_metadata_path(path) {
806            return Err(VfsError::permission_denied("mkdir", path));
807        }
808        let normalized = Self::normalized(path);
809        let parts = normalized
810            .split('/')
811            .filter(|part| !part.is_empty())
812            .collect::<Vec<_>>();
813
814        let mut current = String::new();
815        for part in parts.iter().take(parts.len().saturating_sub(1)) {
816            current.push('/');
817            current.push_str(part);
818
819            if self.exists_in_upper(&current) {
820                continue;
821            }
822
823            if let Some(index) = self.find_lower_by_exists(&current) {
824                let stat = self.lowers[index].stat(&current)?;
825                if !stat.is_directory {
826                    return Err(Self::not_directory(&current));
827                }
828
829                let upper = self.writable_upper(&current)?;
830                upper.mkdir(&current, false)?;
831                upper.chmod(&current, stat.mode)?;
832                upper.chown(&current, stat.uid, stat.gid)?;
833                continue;
834            }
835
836            let upper = self.writable_upper(&current)?;
837            upper.mkdir(&current, false)?;
838        }
839
840        Ok(())
841    }
842
843    fn copy_up_path(&mut self, path: &str) -> VfsResult<()> {
844        if self.has_entry_in_upper(path) {
845            return Ok(());
846        }
847
848        self.ensure_ancestor_directories_in_upper(path)?;
849
850        let (lower_index, stat) = self
851            .find_lower_by_entry(path)
852            .ok_or_else(|| Self::entry_not_found(path))?;
853
854        if stat.is_symbolic_link {
855            let target = self.lowers[lower_index].read_link(path)?;
856            let upper = self.writable_upper(path)?;
857            upper.symlink(&target, path)?;
858            return Ok(());
859        }
860
861        if stat.is_directory {
862            let upper = self.writable_upper(path)?;
863            upper.mkdir(path, false)?;
864            upper.chmod(path, stat.mode)?;
865            upper.chown(path, stat.uid, stat.gid)?;
866            self.mark_opaque_directory(path)?;
867            return Ok(());
868        }
869
870        let data = self.lowers[lower_index].read_file(path)?;
871        let upper = self.writable_upper(path)?;
872        upper.write_file(path, data)?;
873        upper.chmod(path, stat.mode)?;
874        upper.chown(path, stat.uid, stat.gid)?;
875        Ok(())
876    }
877
878    fn materialize_destination_parent_in_upper(&mut self, path: &str) -> VfsResult<()> {
879        if self.has_entry_in_upper(path) {
880            return Ok(());
881        }
882
883        if self
884            .merged_lstat(path)
885            .is_ok_and(|stat| stat.is_symbolic_link)
886        {
887            return self.copy_up_path(path);
888        }
889
890        self.ensure_ancestor_directories_in_upper(path)?;
891        let stat = self.merged_lstat(path)?;
892        if !stat.is_directory || stat.is_symbolic_link {
893            return Err(Self::not_directory(path));
894        }
895
896        let upper = self.writable_upper(path)?;
897        upper.create_dir(path)?;
898        upper.chmod(path, stat.mode)?;
899        upper.chown(path, stat.uid, stat.gid)?;
900        Ok(())
901    }
902
903    fn path_exists_in_merged_view(&self, path: &str) -> bool {
904        if self.is_whited_out(path) {
905            return false;
906        }
907        if self.has_entry_in_upper(path) {
908            return true;
909        }
910        self.find_lower_by_entry(path).is_some()
911    }
912
913    fn not_empty(path: &str) -> VfsError {
914        VfsError::new("ENOTEMPTY", format!("directory not empty, rmdir '{path}'"))
915    }
916
917    fn collect_snapshot_entries(
918        &mut self,
919        path: &str,
920        entries: &mut Vec<OverlaySnapshotEntry>,
921    ) -> VfsResult<()> {
922        let mut pending = vec![(Self::normalized(path), 0usize)];
923        while let Some((current_path, depth)) = pending.pop() {
924            if depth > MAX_SNAPSHOT_DEPTH {
925                return Err(VfsError::new(
926                    "EINVAL",
927                    format!("overlay snapshot depth limit exceeded at '{current_path}'"),
928                ));
929            }
930
931            let stat = self.lstat(&current_path)?;
932
933            if stat.is_symbolic_link {
934                entries.push(OverlaySnapshotEntry {
935                    path: current_path.clone(),
936                    stat,
937                    kind: OverlaySnapshotKind::Symlink(self.read_link(&current_path)?),
938                });
939                continue;
940            }
941
942            if stat.is_directory {
943                entries.push(OverlaySnapshotEntry {
944                    path: current_path.clone(),
945                    stat,
946                    kind: OverlaySnapshotKind::Directory,
947                });
948
949                let children = self.read_dir_with_types(&current_path)?;
950                for entry in children.into_iter().rev() {
951                    pending.push((Self::join_path(&current_path, &entry.name), depth + 1));
952                }
953                continue;
954            }
955
956            entries.push(OverlaySnapshotEntry {
957                path: current_path.clone(),
958                stat,
959                kind: OverlaySnapshotKind::File(self.read_file(&current_path)?),
960            });
961        }
962        Ok(())
963    }
964
965    fn remove_snapshot_entries(&mut self, entries: &[OverlaySnapshotEntry]) -> VfsResult<()> {
966        for entry in entries.iter().rev() {
967            if self.has_entry_in_upper(&entry.path) {
968                match entry.kind {
969                    OverlaySnapshotKind::Directory => {
970                        self.writable_upper(&entry.path)?.remove_dir(&entry.path)?;
971                    }
972                    OverlaySnapshotKind::File(_) | OverlaySnapshotKind::Symlink(_) => {
973                        self.writable_upper(&entry.path)?.remove_file(&entry.path)?;
974                    }
975                }
976            }
977
978            if self.find_lower_by_entry(&entry.path).is_some() {
979                self.clear_opaque_directory(&entry.path)?;
980                self.add_whiteout(&entry.path)?;
981            } else {
982                self.clear_path_metadata(&entry.path)?;
983            }
984        }
985
986        Ok(())
987    }
988
989    fn directory_has_raw_children(&mut self, path: &str) -> VfsResult<bool> {
990        let normalized = Self::normalized(path);
991        let mut directory_exists = false;
992
993        if let Some(upper) = self.upper.as_mut() {
994            if let Ok(entries) = upper.read_dir(&normalized) {
995                directory_exists = true;
996                if entries.into_iter().any(|entry| {
997                    !Self::should_ignore_raw_directory_entry(Some(&*upper), &normalized, &entry)
998                }) {
999                    return Ok(true);
1000                }
1001            }
1002        }
1003
1004        let upper = self.upper.as_ref();
1005        for lower in self.lowers.iter_mut().rev() {
1006            if let Ok(entries) = lower.read_dir(&normalized) {
1007                directory_exists = true;
1008                if entries.into_iter().any(|entry| {
1009                    !Self::should_ignore_raw_directory_entry(upper, &normalized, &entry)
1010                }) {
1011                    return Ok(true);
1012                }
1013            }
1014        }
1015
1016        if !directory_exists {
1017            return Err(Self::directory_not_found(path));
1018        }
1019
1020        Ok(false)
1021    }
1022
1023    fn marker_paths_in_upper(&mut self, kind: OverlayMarkerKind) -> VfsResult<Vec<String>> {
1024        let Some(upper) = self.upper.as_mut() else {
1025            return Ok(Vec::new());
1026        };
1027
1028        let marker_dir = Self::marker_directory(kind);
1029        let entries = match upper.read_dir(marker_dir) {
1030            Ok(entries) => entries,
1031            Err(error) if error.code() == "ENOENT" => return Ok(Vec::new()),
1032            Err(error) => return Err(error),
1033        };
1034
1035        let mut marker_paths = Vec::new();
1036        for entry in entries {
1037            if entry == "." || entry == ".." {
1038                continue;
1039            }
1040
1041            let marker_file = Self::join_path(marker_dir, &entry);
1042            let marker_path =
1043                String::from_utf8(upper.read_file(&marker_file).map_err(|_| {
1044                    VfsError::io(format!("invalid overlay marker '{marker_file}'"))
1045                })?)
1046                .map_err(|_| VfsError::io(format!("invalid overlay marker '{marker_file}'")))?;
1047            marker_paths.push(Self::normalized(&marker_path));
1048        }
1049
1050        Ok(marker_paths)
1051    }
1052
1053    fn path_in_subtree(path: &str, root: &str) -> bool {
1054        path == root || path.starts_with(&(String::from(root) + "/"))
1055    }
1056
1057    fn clear_subtree_metadata(&mut self, path: &str) -> VfsResult<()> {
1058        let normalized = Self::normalized(path);
1059        for kind in [OverlayMarkerKind::Whiteout, OverlayMarkerKind::Opaque] {
1060            for marker_path in self.marker_paths_in_upper(kind)? {
1061                if Self::path_in_subtree(&marker_path, &normalized) {
1062                    self.set_marker(kind, &marker_path, false)?;
1063                }
1064            }
1065        }
1066        Ok(())
1067    }
1068
1069    fn copy_subtree_metadata(&mut self, old_root: &str, new_root: &str) -> VfsResult<()> {
1070        let old_normalized = Self::normalized(old_root);
1071        let new_normalized = Self::normalized(new_root);
1072
1073        for kind in [OverlayMarkerKind::Whiteout, OverlayMarkerKind::Opaque] {
1074            for marker_path in self.marker_paths_in_upper(kind)? {
1075                if Self::path_in_subtree(&marker_path, &old_normalized) {
1076                    let destination =
1077                        Self::rebase_path(&marker_path, &old_normalized, &new_normalized);
1078                    self.set_marker(kind, &destination, true)?;
1079                }
1080            }
1081        }
1082
1083        Ok(())
1084    }
1085
1086    fn stage_snapshot_entries_in_upper(
1087        &mut self,
1088        entries: &[OverlaySnapshotEntry],
1089    ) -> VfsResult<()> {
1090        for entry in entries {
1091            match &entry.kind {
1092                OverlaySnapshotKind::Directory => {
1093                    if !self.has_entry_in_upper(&entry.path) {
1094                        self.ensure_ancestor_directories_in_upper(&entry.path)?;
1095                        self.writable_upper(&entry.path)?.create_dir(&entry.path)?;
1096                    }
1097                    self.writable_upper(&entry.path)?
1098                        .chmod(&entry.path, entry.stat.mode)?;
1099                    self.writable_upper(&entry.path)?.chown(
1100                        &entry.path,
1101                        entry.stat.uid,
1102                        entry.stat.gid,
1103                    )?;
1104                    self.mark_opaque_directory(&entry.path)?;
1105                }
1106                OverlaySnapshotKind::File(data) => {
1107                    if self.has_entry_in_upper(&entry.path) {
1108                        continue;
1109                    }
1110                    self.ensure_ancestor_directories_in_upper(&entry.path)?;
1111                    self.writable_upper(&entry.path)?
1112                        .write_file(&entry.path, data.clone())?;
1113                    self.writable_upper(&entry.path)?
1114                        .chmod(&entry.path, entry.stat.mode)?;
1115                    self.writable_upper(&entry.path)?.chown(
1116                        &entry.path,
1117                        entry.stat.uid,
1118                        entry.stat.gid,
1119                    )?;
1120                }
1121                OverlaySnapshotKind::Symlink(target) => {
1122                    if self.has_entry_in_upper(&entry.path) {
1123                        continue;
1124                    }
1125                    self.ensure_ancestor_directories_in_upper(&entry.path)?;
1126                    self.writable_upper(&entry.path)?
1127                        .symlink(target, &entry.path)?;
1128                }
1129            }
1130        }
1131
1132        Ok(())
1133    }
1134}
1135
1136fn sync_upper_root_metadata(upper: &mut MemoryFileSystem, lowers: &[MemoryFileSystem]) {
1137    let Some(root_stat) = lowers.iter().find_map(|lower| lower.lstat("/").ok()) else {
1138        return;
1139    };
1140
1141    upper
1142        .chmod("/", root_stat.mode)
1143        .expect("overlay upper root should exist");
1144    upper
1145        .chown("/", root_stat.uid, root_stat.gid)
1146        .expect("overlay upper root should exist");
1147}
1148
1149impl VirtualFileSystem for OverlayFileSystem {
1150    fn read_file(&mut self, path: &str) -> VfsResult<Vec<u8>> {
1151        if Self::is_internal_metadata_path(path) {
1152            return Err(Self::entry_not_found(path));
1153        }
1154        if self.is_whited_out(path) {
1155            return Err(Self::entry_not_found(path));
1156        }
1157        if self.exists_in_upper(path) {
1158            return self
1159                .upper
1160                .as_mut()
1161                .expect("upper must exist when path exists")
1162                .read_file(path);
1163        }
1164        let Some(index) = self.find_lower_by_exists(path) else {
1165            return Err(Self::entry_not_found(path));
1166        };
1167        self.lowers[index].read_file(path)
1168    }
1169
1170    fn read_dir(&mut self, path: &str) -> VfsResult<Vec<String>> {
1171        if Self::is_internal_metadata_path(path) {
1172            return Err(Self::directory_not_found(path));
1173        }
1174        if self.is_whited_out(path) {
1175            return Err(Self::directory_not_found(path));
1176        }
1177
1178        let normalized = Self::normalized(path);
1179        let mut directory_exists = false;
1180        let mut entries = BTreeSet::new();
1181        let upper = self.upper.as_ref();
1182        let include_lowers = !Self::marker_exists_in_upper(upper, OverlayMarkerKind::Opaque, path);
1183
1184        if include_lowers {
1185            for lower in self.lowers.iter_mut().rev() {
1186                if let Ok(lower_entries) = lower.read_dir(path) {
1187                    directory_exists = true;
1188                    for entry in lower_entries {
1189                        if entry == "."
1190                            || entry == ".."
1191                            || Self::should_hide_directory_entry(path, &entry)
1192                        {
1193                            continue;
1194                        }
1195                        let child_path = if normalized == "/" {
1196                            format!("/{entry}")
1197                        } else {
1198                            format!("{normalized}/{entry}")
1199                        };
1200                        if !Self::marker_exists_in_upper(
1201                            upper,
1202                            OverlayMarkerKind::Whiteout,
1203                            &child_path,
1204                        ) {
1205                            entries.insert(entry);
1206                        }
1207                    }
1208                }
1209            }
1210        }
1211
1212        if let Some(upper) = self.upper.as_mut() {
1213            if let Ok(upper_entries) = upper.read_dir(path) {
1214                directory_exists = true;
1215                for entry in upper_entries {
1216                    if entry == "."
1217                        || entry == ".."
1218                        || Self::should_hide_directory_entry(path, &entry)
1219                    {
1220                        continue;
1221                    }
1222                    entries.insert(entry);
1223                }
1224            }
1225        }
1226
1227        if !directory_exists {
1228            return Err(Self::directory_not_found(path));
1229        }
1230
1231        Ok(entries.into_iter().collect())
1232    }
1233
1234    fn read_dir_limited(&mut self, path: &str, max_entries: usize) -> VfsResult<Vec<String>> {
1235        if Self::is_internal_metadata_path(path) {
1236            return Err(Self::directory_not_found(path));
1237        }
1238        if self.is_whited_out(path) {
1239            return Err(Self::directory_not_found(path));
1240        }
1241
1242        let normalized = Self::normalized(path);
1243        let mut directory_exists = false;
1244        let mut entries = BTreeSet::new();
1245        let upper = self.upper.as_ref();
1246        let include_lowers = !Self::marker_exists_in_upper(upper, OverlayMarkerKind::Opaque, path);
1247
1248        if include_lowers {
1249            for lower in self.lowers.iter_mut().rev() {
1250                let lower_entries = match lower.read_dir_filtered_limited(
1251                    path,
1252                    max_entries.saturating_sub(entries.len()),
1253                    |entry| {
1254                        if entry == "."
1255                            || entry == ".."
1256                            || Self::should_hide_directory_entry(path, entry)
1257                        {
1258                            return false;
1259                        }
1260                        let child_path = if normalized == "/" {
1261                            format!("/{entry}")
1262                        } else {
1263                            format!("{normalized}/{entry}")
1264                        };
1265                        !Self::marker_exists_in_upper(
1266                            upper,
1267                            OverlayMarkerKind::Whiteout,
1268                            &child_path,
1269                        ) && !entries.contains(entry)
1270                    },
1271                ) {
1272                    Ok(entries) => entries,
1273                    Err(error) if error.code() == "ENOENT" || error.code() == "ENOTDIR" => {
1274                        continue;
1275                    }
1276                    Err(error) => return Err(error),
1277                };
1278                directory_exists = true;
1279                for entry in lower_entries {
1280                    entries.insert(entry);
1281                    if entries.len() > max_entries {
1282                        return Err(VfsError::new(
1283                            "ENOMEM",
1284                            format!(
1285                                "directory listing for '{path}' exceeds configured limit of {max_entries} entries"
1286                            ),
1287                        ));
1288                    }
1289                }
1290            }
1291        }
1292
1293        if let Some(upper) = self.upper.as_mut() {
1294            let upper_entries = match upper.read_dir_filtered_limited(
1295                path,
1296                max_entries.saturating_sub(entries.len()),
1297                |entry| {
1298                    entry != "."
1299                        && entry != ".."
1300                        && !Self::should_hide_directory_entry(path, entry)
1301                        && !entries.contains(entry)
1302                },
1303            ) {
1304                Ok(entries) => entries,
1305                Err(error) if error.code() == "ENOENT" => Vec::new(),
1306                Err(error) => return Err(error),
1307            };
1308            directory_exists = directory_exists || upper.exists(path);
1309            for entry in upper_entries {
1310                if entry == "." || entry == ".." || Self::should_hide_directory_entry(path, &entry)
1311                {
1312                    continue;
1313                }
1314                entries.insert(entry);
1315                if entries.len() > max_entries {
1316                    return Err(VfsError::new(
1317                        "ENOMEM",
1318                        format!(
1319                            "directory listing for '{path}' exceeds configured limit of {max_entries} entries"
1320                        ),
1321                    ));
1322                }
1323            }
1324        }
1325
1326        if !directory_exists {
1327            return Err(Self::directory_not_found(path));
1328        }
1329
1330        Ok(entries.into_iter().collect())
1331    }
1332
1333    fn read_dir_with_types(&mut self, path: &str) -> VfsResult<Vec<VirtualDirEntry>> {
1334        if Self::is_internal_metadata_path(path) {
1335            return Err(Self::directory_not_found(path));
1336        }
1337        if self.is_whited_out(path) {
1338            return Err(Self::directory_not_found(path));
1339        }
1340
1341        let normalized = Self::normalized(path);
1342        let mut directory_exists = false;
1343        let mut entries = Vec::<VirtualDirEntry>::new();
1344        let mut seen = BTreeSet::<String>::new();
1345        let upper = self.upper.as_ref();
1346        let include_lowers = !Self::marker_exists_in_upper(upper, OverlayMarkerKind::Opaque, path);
1347
1348        if include_lowers {
1349            for lower in self.lowers.iter_mut().rev() {
1350                if let Ok(lower_entries) = lower.read_dir_with_types(path) {
1351                    directory_exists = true;
1352                    for entry in lower_entries {
1353                        if entry.name == "."
1354                            || entry.name == ".."
1355                            || Self::should_hide_directory_entry(path, &entry.name)
1356                        {
1357                            continue;
1358                        }
1359                        let child_path = if normalized == "/" {
1360                            format!("/{}", entry.name)
1361                        } else {
1362                            format!("{normalized}/{}", entry.name)
1363                        };
1364                        if Self::marker_exists_in_upper(
1365                            upper,
1366                            OverlayMarkerKind::Whiteout,
1367                            &child_path,
1368                        ) || seen.contains(&entry.name)
1369                        {
1370                            continue;
1371                        }
1372                        seen.insert(entry.name.clone());
1373                        entries.push(entry);
1374                    }
1375                }
1376            }
1377        }
1378
1379        if let Some(upper) = self.upper.as_mut() {
1380            if let Ok(upper_entries) = upper.read_dir_with_types(path) {
1381                directory_exists = true;
1382                for entry in upper_entries {
1383                    if entry.name == "."
1384                        || entry.name == ".."
1385                        || Self::should_hide_directory_entry(path, &entry.name)
1386                    {
1387                        continue;
1388                    }
1389                    if let Some(index) = entries
1390                        .iter()
1391                        .position(|existing| existing.name == entry.name)
1392                    {
1393                        entries[index] = entry;
1394                    } else {
1395                        seen.insert(entry.name.clone());
1396                        entries.push(entry);
1397                    }
1398                }
1399            }
1400        }
1401
1402        if !directory_exists {
1403            return Err(Self::directory_not_found(path));
1404        }
1405
1406        Ok(entries)
1407    }
1408
1409    fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
1410        if Self::is_internal_metadata_path(path) {
1411            return Err(VfsError::permission_denied("open", path));
1412        }
1413        self.clear_path_metadata(path)?;
1414        if self.find_lower_by_entry(path).is_some() {
1415            self.copy_up_path(path)?;
1416        } else {
1417            self.ensure_ancestor_directories_in_upper(path)?;
1418        }
1419        self.writable_upper(path)?.write_file(path, content.into())
1420    }
1421
1422    fn create_file_exclusive(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<()> {
1423        if Self::is_internal_metadata_path(path) {
1424            return Err(VfsError::permission_denied("open", path));
1425        }
1426        self.clear_path_metadata(path)?;
1427        if self.path_exists_in_merged_view(path) {
1428            return Err(Self::already_exists(path));
1429        }
1430        self.ensure_ancestor_directories_in_upper(path)?;
1431        self.writable_upper(path)?
1432            .create_file_exclusive(path, content.into())
1433    }
1434
1435    fn append_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> VfsResult<u64> {
1436        if Self::is_internal_metadata_path(path) {
1437            return Err(VfsError::permission_denied("open", path));
1438        }
1439        self.clear_path_metadata(path)?;
1440        if self.find_lower_by_entry(path).is_some() {
1441            self.copy_up_path(path)?;
1442        } else {
1443            self.ensure_ancestor_directories_in_upper(path)?;
1444        }
1445        self.writable_upper(path)?.append_file(path, content.into())
1446    }
1447
1448    fn create_dir(&mut self, path: &str) -> VfsResult<()> {
1449        if Self::is_internal_metadata_path(path) {
1450            return Err(VfsError::permission_denied("mkdir", path));
1451        }
1452        self.clear_path_metadata(path)?;
1453        if self.path_exists_in_merged_view(path) {
1454            return Err(Self::already_exists(path));
1455        }
1456        self.ensure_ancestor_directories_in_upper(path)?;
1457        self.writable_upper(path)?.create_dir(path)
1458    }
1459
1460    fn mkdir(&mut self, path: &str, recursive: bool) -> VfsResult<()> {
1461        if Self::is_internal_metadata_path(path) {
1462            return Err(VfsError::permission_denied("mkdir", path));
1463        }
1464        self.clear_path_metadata(path)?;
1465        if self.path_exists_in_merged_view(path) {
1466            let stat = self.merged_lstat(path)?;
1467            if recursive && stat.is_directory && !stat.is_symbolic_link {
1468                return Ok(());
1469            }
1470            return Err(Self::already_exists(path));
1471        }
1472        self.ensure_ancestor_directories_in_upper(path)?;
1473        self.writable_upper(path)?.mkdir(path, recursive)
1474    }
1475
1476    fn exists(&self, path: &str) -> bool {
1477        if Self::is_internal_metadata_path(path) {
1478            return false;
1479        }
1480        self.path_exists_in_merged_view(path)
1481    }
1482
1483    fn stat(&mut self, path: &str) -> VfsResult<VirtualStat> {
1484        if Self::is_internal_metadata_path(path) {
1485            return Err(Self::entry_not_found(path));
1486        }
1487        if self.is_whited_out(path) {
1488            return Err(Self::entry_not_found(path));
1489        }
1490        if self.exists_in_upper(path) {
1491            return self
1492                .upper
1493                .as_mut()
1494                .expect("upper must exist when path exists")
1495                .stat(path);
1496        }
1497        let Some(index) = self.find_lower_by_exists(path) else {
1498            return Err(Self::entry_not_found(path));
1499        };
1500        self.lowers[index].stat(path)
1501    }
1502
1503    fn remove_file(&mut self, path: &str) -> VfsResult<()> {
1504        if Self::is_internal_metadata_path(path) {
1505            return Err(VfsError::permission_denied("unlink", path));
1506        }
1507        if self.is_whited_out(path) {
1508            return Err(Self::entry_not_found(path));
1509        }
1510        let lower_exists = self.find_lower_by_exists(path).is_some();
1511        let upper_exists = self.exists_in_upper(path);
1512        if !lower_exists && !upper_exists {
1513            return Err(Self::entry_not_found(path));
1514        }
1515        if upper_exists {
1516            self.writable_upper(path)?.remove_file(path)?;
1517        } else {
1518            self.writable_upper(path)?;
1519        }
1520        self.clear_opaque_directory(path)?;
1521        self.add_whiteout(path)?;
1522        Ok(())
1523    }
1524
1525    fn remove_dir(&mut self, path: &str) -> VfsResult<()> {
1526        let normalized = Self::normalized(path);
1527        if Self::is_internal_metadata_path(&normalized) {
1528            return Err(VfsError::permission_denied("rmdir", path));
1529        }
1530        if normalized == "/" {
1531            return Err(VfsError::permission_denied("rmdir", path));
1532        }
1533
1534        let stat = match self.merged_lstat(path) {
1535            Ok(stat) => stat,
1536            Err(error) if error.code() == "ENOENT" => return Err(Self::directory_not_found(path)),
1537            Err(error) => return Err(error),
1538        };
1539
1540        if !stat.is_directory || stat.is_symbolic_link {
1541            return Err(Self::not_directory(path));
1542        }
1543
1544        if self.directory_has_raw_children(path)? {
1545            return Err(Self::not_empty(path));
1546        }
1547
1548        let lower_exists = self.find_lower_by_entry(path).is_some();
1549        let upper_exists = self.has_entry_in_upper(path);
1550        if upper_exists {
1551            self.writable_upper(path)?.remove_dir(&normalized)?;
1552        } else {
1553            self.writable_upper(path)?;
1554        }
1555        if lower_exists {
1556            self.clear_opaque_directory(path)?;
1557            self.add_whiteout(path)?;
1558        } else {
1559            self.clear_path_metadata(path)?;
1560        }
1561        Ok(())
1562    }
1563
1564    fn rename(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
1565        let old_normalized = Self::normalized(old_path);
1566        let new_normalized = Self::normalized(new_path);
1567        if Self::is_internal_metadata_path(&old_normalized)
1568            || Self::is_internal_metadata_path(&new_normalized)
1569        {
1570            return Err(VfsError::permission_denied("rename", old_path));
1571        }
1572
1573        if old_normalized == "/" {
1574            return Err(VfsError::permission_denied("rename", old_path));
1575        }
1576
1577        if old_normalized == new_normalized {
1578            return Ok(());
1579        }
1580
1581        let source_stat = self.merged_lstat(old_path)?;
1582        self.validate_destination_parent(&new_normalized)?;
1583        let resolved_new_normalized = self.resolved_destination_path(&new_normalized)?;
1584
1585        if old_normalized == resolved_new_normalized {
1586            return Ok(());
1587        }
1588
1589        if source_stat.is_directory
1590            && resolved_new_normalized.starts_with(&(old_normalized.clone() + "/"))
1591        {
1592            return Err(VfsError::new(
1593                "EINVAL",
1594                format!(
1595                    "cannot move '{}' into its own descendant '{}'",
1596                    old_path, new_path
1597                ),
1598            ));
1599        }
1600
1601        for path in self.destination_parent_copy_up_paths(&new_normalized)? {
1602            self.materialize_destination_parent_in_upper(&path)?;
1603        }
1604
1605        let mut snapshot_entries = Vec::new();
1606        self.collect_snapshot_entries(&old_normalized, &mut snapshot_entries)?;
1607
1608        if let Ok(destination_stat) = self.merged_lstat(&resolved_new_normalized) {
1609            if destination_stat.is_directory
1610                && !destination_stat.is_symbolic_link
1611                && self.directory_has_visible_entries_limited(&resolved_new_normalized)?
1612            {
1613                return Err(Self::not_empty(&resolved_new_normalized));
1614            }
1615
1616            if self.has_entry_in_upper(&resolved_new_normalized) {
1617                if destination_stat.is_directory && !destination_stat.is_symbolic_link {
1618                    self.writable_upper(&resolved_new_normalized)?
1619                        .remove_dir(&resolved_new_normalized)?;
1620                } else {
1621                    self.writable_upper(&resolved_new_normalized)?
1622                        .remove_file(&resolved_new_normalized)?;
1623                }
1624            }
1625            self.clear_subtree_metadata(&resolved_new_normalized)?;
1626        }
1627
1628        self.stage_snapshot_entries_in_upper(&snapshot_entries)?;
1629        self.copy_subtree_metadata(&old_normalized, &resolved_new_normalized)?;
1630        self.writable_upper(&old_normalized)?
1631            .rename(&old_normalized, &resolved_new_normalized)?;
1632        self.remove_snapshot_entries(&snapshot_entries)
1633    }
1634
1635    fn realpath(&self, path: &str) -> VfsResult<String> {
1636        if Self::is_internal_metadata_path(path) {
1637            return Err(Self::entry_not_found(path));
1638        }
1639        if self.is_whited_out(path) {
1640            return Err(Self::entry_not_found(path));
1641        }
1642        if self.exists_in_upper(path) {
1643            return self
1644                .upper
1645                .as_ref()
1646                .expect("upper must exist when path exists")
1647                .realpath(path);
1648        }
1649        let Some(index) = self.find_lower_by_exists(path) else {
1650            return Err(Self::entry_not_found(path));
1651        };
1652        self.lowers[index].realpath(path)
1653    }
1654
1655    fn symlink(&mut self, target: &str, link_path: &str) -> VfsResult<()> {
1656        if Self::is_internal_metadata_path(link_path) {
1657            return Err(VfsError::permission_denied("symlink", link_path));
1658        }
1659        self.clear_path_metadata(link_path)?;
1660        self.ensure_ancestor_directories_in_upper(link_path)?;
1661        self.writable_upper(link_path)?.symlink(target, link_path)
1662    }
1663
1664    fn read_link(&self, path: &str) -> VfsResult<String> {
1665        if Self::is_internal_metadata_path(path) {
1666            return Err(Self::entry_not_found(path));
1667        }
1668        if self.is_whited_out(path) {
1669            return Err(Self::entry_not_found(path));
1670        }
1671        if self.has_entry_in_upper(path) {
1672            return self
1673                .upper
1674                .as_ref()
1675                .expect("upper must exist when path exists")
1676                .read_link(path);
1677        }
1678        let Some((index, _)) = self.find_lower_by_entry(path) else {
1679            return Err(Self::entry_not_found(path));
1680        };
1681        self.lowers[index].read_link(path)
1682    }
1683
1684    fn lstat(&self, path: &str) -> VfsResult<VirtualStat> {
1685        if Self::is_internal_metadata_path(path) {
1686            return Err(Self::entry_not_found(path));
1687        }
1688        if self.is_whited_out(path) {
1689            return Err(Self::entry_not_found(path));
1690        }
1691        if self.has_entry_in_upper(path) {
1692            return self
1693                .upper
1694                .as_ref()
1695                .expect("upper must exist when path exists")
1696                .lstat(path);
1697        }
1698        self.find_lower_by_entry(path)
1699            .map(|(_, stat)| stat)
1700            .ok_or_else(|| Self::entry_not_found(path))
1701    }
1702
1703    fn link(&mut self, old_path: &str, new_path: &str) -> VfsResult<()> {
1704        if Self::is_internal_metadata_path(old_path) || Self::is_internal_metadata_path(new_path) {
1705            return Err(VfsError::permission_denied("link", new_path));
1706        }
1707        self.clear_path_metadata(new_path)?;
1708        self.copy_up_path(old_path)?;
1709        self.ensure_ancestor_directories_in_upper(new_path)?;
1710        self.writable_upper(new_path)?.link(old_path, new_path)
1711    }
1712
1713    fn chmod(&mut self, path: &str, mode: u32) -> VfsResult<()> {
1714        if Self::is_internal_metadata_path(path) {
1715            return Err(VfsError::permission_denied("chmod", path));
1716        }
1717        if self.is_whited_out(path) {
1718            return Err(Self::entry_not_found(path));
1719        }
1720        if !self.exists_in_upper(path) {
1721            self.copy_up_path(path)?;
1722        }
1723        self.writable_upper(path)?.chmod(path, mode)
1724    }
1725
1726    fn chown(&mut self, path: &str, uid: u32, gid: u32) -> VfsResult<()> {
1727        if Self::is_internal_metadata_path(path) {
1728            return Err(VfsError::permission_denied("chown", path));
1729        }
1730        if self.is_whited_out(path) {
1731            return Err(Self::entry_not_found(path));
1732        }
1733        if !self.exists_in_upper(path) {
1734            self.copy_up_path(path)?;
1735        }
1736        self.writable_upper(path)?.chown(path, uid, gid)
1737    }
1738
1739    fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> VfsResult<()> {
1740        if Self::is_internal_metadata_path(path) {
1741            return Err(VfsError::permission_denied("utime", path));
1742        }
1743        if self.is_whited_out(path) {
1744            return Err(Self::entry_not_found(path));
1745        }
1746        if !self.exists_in_upper(path) {
1747            self.copy_up_path(path)?;
1748        }
1749        self.writable_upper(path)?.utimes(path, atime_ms, mtime_ms)
1750    }
1751
1752    fn utimes_spec(
1753        &mut self,
1754        path: &str,
1755        atime: VirtualUtimeSpec,
1756        mtime: VirtualUtimeSpec,
1757        follow_symlinks: bool,
1758    ) -> VfsResult<()> {
1759        if Self::is_internal_metadata_path(path) {
1760            return Err(VfsError::permission_denied("utime", path));
1761        }
1762        if self.is_whited_out(path) {
1763            return Err(Self::entry_not_found(path));
1764        }
1765        if !self.exists_in_upper(path) {
1766            self.copy_up_path(path)?;
1767        }
1768        self.writable_upper(path)?
1769            .utimes_spec(path, atime, mtime, follow_symlinks)
1770    }
1771
1772    fn truncate(&mut self, path: &str, length: u64) -> VfsResult<()> {
1773        if Self::is_internal_metadata_path(path) {
1774            return Err(VfsError::permission_denied("truncate", path));
1775        }
1776        if self.is_whited_out(path) {
1777            return Err(Self::entry_not_found(path));
1778        }
1779        if !self.exists_in_upper(path) {
1780            self.copy_up_path(path)?;
1781        }
1782        self.writable_upper(path)?.truncate(path, length)
1783    }
1784
1785    fn pread(&mut self, path: &str, offset: u64, length: usize) -> VfsResult<Vec<u8>> {
1786        if Self::is_internal_metadata_path(path) {
1787            return Err(Self::entry_not_found(path));
1788        }
1789        if self.is_whited_out(path) {
1790            return Err(Self::entry_not_found(path));
1791        }
1792        if self.exists_in_upper(path) {
1793            return self
1794                .upper
1795                .as_mut()
1796                .expect("upper must exist when path exists")
1797                .pread(path, offset, length);
1798        }
1799        let Some(index) = self.find_lower_by_exists(path) else {
1800            return Err(Self::entry_not_found(path));
1801        };
1802        self.lowers[index].pread(path, offset, length)
1803    }
1804}
1805
1806#[cfg(test)]
1807mod tests {
1808    use super::{OverlayFileSystem, OverlayMode};
1809    use crate::vfs::{MemoryFileSystem, VfsResult, VirtualFileSystem};
1810
1811    #[test]
1812    fn whiteouts_persist_when_overlay_reopens_with_same_upper() {
1813        let mut lower = MemoryFileSystem::new();
1814        lower.mkdir("/data", true).expect("create lower directory");
1815        lower
1816            .write_file("/data/base.txt", b"base".to_vec())
1817            .expect("seed lower file");
1818        let lower_snapshot = lower.snapshot();
1819
1820        let mut overlay = OverlayFileSystem::with_upper(
1821            vec![MemoryFileSystem::from_snapshot(lower_snapshot.clone())],
1822            MemoryFileSystem::new(),
1823        );
1824        overlay
1825            .remove_file("/data/base.txt")
1826            .expect("whiteout lower file");
1827
1828        let upper = overlay.upper.take().expect("overlay upper");
1829        let restored_lower = MemoryFileSystem::from_snapshot(lower_snapshot);
1830        let mut restored = OverlayFileSystem::with_upper(vec![restored_lower], upper);
1831
1832        assert!(!restored.exists("/data/base.txt"));
1833        assert_eq!(
1834            restored.read_dir("/data").expect("read merged directory"),
1835            Vec::<String>::new()
1836        );
1837    }
1838
1839    #[test]
1840    fn copied_up_directories_become_opaque_and_hide_overlay_metadata() {
1841        let mut lower = MemoryFileSystem::new();
1842        lower.mkdir("/data", true).expect("create lower directory");
1843        lower
1844            .write_file("/data/base.txt", b"base".to_vec())
1845            .expect("seed lower file");
1846
1847        let mut overlay = OverlayFileSystem::new(vec![lower], OverlayMode::Ephemeral);
1848        overlay
1849            .chmod("/data", 0o700)
1850            .expect("copy up lower directory");
1851
1852        assert_eq!(
1853            overlay.read_dir("/data").expect("read opaque directory"),
1854            Vec::<String>::new()
1855        );
1856        let root_entries = overlay.read_dir("/").expect("read root");
1857        assert!(!root_entries
1858            .iter()
1859            .any(|entry| entry == ".secure-exec-overlay"));
1860    }
1861
1862    #[test]
1863    fn remove_dir_succeeds_when_only_lower_children_are_whited_out() {
1864        let mut lower = MemoryFileSystem::new();
1865        lower.mkdir("/a", true).expect("create lower directory");
1866        lower
1867            .write_file("/a/c", b"child".to_vec())
1868            .expect("seed lower child");
1869
1870        let mut overlay = OverlayFileSystem::new(vec![lower], OverlayMode::Ephemeral);
1871        overlay.remove_file("/a/c").expect("whiteout lower child");
1872        overlay
1873            .remove_dir("/a")
1874            .expect("remove merged-empty directory");
1875
1876        assert!(!overlay.exists("/a"));
1877        assert_error_code(overlay.read_dir("/a"), "ENOENT");
1878    }
1879
1880    #[test]
1881    fn remove_dir_still_rejects_visible_children() {
1882        let mut lower = MemoryFileSystem::new();
1883        lower.mkdir("/a", true).expect("create lower directory");
1884        lower
1885            .write_file("/a/c", b"child".to_vec())
1886            .expect("seed lower child");
1887
1888        let mut overlay = OverlayFileSystem::new(vec![lower], OverlayMode::Ephemeral);
1889        assert_error_code(overlay.remove_dir("/a"), "ENOTEMPTY");
1890        assert!(overlay.exists("/a/c"));
1891    }
1892
1893    fn assert_error_code<T: std::fmt::Debug>(result: VfsResult<T>, expected: &str) {
1894        let error = result.expect_err("expected operation to fail");
1895        assert_eq!(error.code(), expected);
1896    }
1897}