libfuse_fs/overlayfs/
mod.rs

1// Copyright (C) 2023 Ant Group. All rights reserved.
2//  2024 From [fuse_backend_rs](https://github.com/cloud-hypervisor/fuse-backend-rs)
3// SPDX-License-Identifier: Apache-2.0
4
5#![allow(missing_docs)]
6mod async_io;
7pub mod config;
8mod inode_store;
9mod layer;
10mod utils;
11
12//mod tempfile;
13use core::panic;
14use std::collections::HashMap;
15use std::ffi::{OsStr, OsString};
16use std::future::Future;
17use std::io::{Error, Result};
18use std::path::Path;
19
20use config::Config;
21use futures::StreamExt as _;
22use rfuse3::raw::reply::{
23    DirectoryEntry, DirectoryEntryPlus, ReplyAttr, ReplyEntry, ReplyOpen, ReplyStatFs,
24};
25use rfuse3::raw::{Filesystem, Request, Session};
26use std::sync::{Arc, Weak};
27use tracing::debug;
28use tracing::error;
29use tracing::info;
30use tracing::trace;
31
32use rfuse3::{Errno, FileType, MountOptions, mode_from_kind_and_perm};
33const SLASH_ASCII: char = '/';
34use futures::future::join_all;
35use futures::stream::iter;
36
37use crate::passthrough::newlogfs::LoggingFileSystem;
38use crate::passthrough::{PassthroughArgs, PassthroughFs, new_passthroughfs_layer};
39use crate::util::convert_stat64_to_file_attr;
40use inode_store::InodeStore;
41use layer::Layer;
42use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
43
44use tokio::sync::{Mutex, RwLock};
45
46pub type Inode = u64;
47pub type Handle = u64;
48
49type BoxedLayer = PassthroughFs;
50//type BoxedFileSystem = Box<dyn FileSystem<Inode = Inode, Handle = Handle> + Send + Sync>;
51const INODE_ALLOC_BATCH: u64 = 0x1_0000_0000;
52// RealInode represents one inode object in specific layer.
53// Also, each RealInode maps to one Entry, which should be 'forgotten' after drop.
54// Important note: do not impl Clone trait for it or refcount will be messed up.
55pub(crate) struct RealInode {
56    pub layer: Arc<PassthroughFs>,
57    pub in_upper_layer: bool,
58    pub inode: u64,
59    // File is whiteouted, we need to hide it.
60    pub whiteout: bool,
61    // Directory is opaque, we need to hide all entries inside it.
62    pub opaque: bool,
63    pub stat: Option<ReplyAttr>,
64}
65
66// OverlayInode must be protected by lock, it can be operated by multiple threads.
67// #[derive(Default)]
68pub(crate) struct OverlayInode {
69    // Inode hash table, map from 'name' to 'OverlayInode'.
70    pub childrens: Mutex<HashMap<String, Arc<OverlayInode>>>,
71    pub parent: Mutex<Weak<OverlayInode>>,
72    // Backend inodes from all layers.
73    pub real_inodes: Mutex<Vec<Arc<RealInode>>>,
74    // Inode number.
75    pub inode: u64,
76    pub path: RwLock<String>,
77    pub name: RwLock<String>,
78    pub lookups: AtomicU64,
79    // Node is whiteout-ed.
80    pub whiteout: AtomicBool,
81    // Directory is loaded.
82    pub loaded: AtomicBool,
83}
84
85#[derive(Default)]
86pub enum CachePolicy {
87    Never,
88    #[default]
89    Auto,
90    Always,
91}
92pub struct OverlayFs {
93    config: Config,
94    lower_layers: Vec<Arc<PassthroughFs>>,
95    upper_layer: Option<Arc<PassthroughFs>>,
96    // All inodes in FS.
97    inodes: RwLock<InodeStore>,
98    // Open file handles.
99    handles: Mutex<HashMap<u64, Arc<HandleData>>>,
100    next_handle: AtomicU64,
101    writeback: AtomicBool,
102    no_open: AtomicBool,
103    no_opendir: AtomicBool,
104    killpriv_v2: AtomicBool,
105    perfile_dax: AtomicBool,
106    root_inodes: u64,
107}
108
109// This is a wrapper of one inode in specific layer, It can't impl Clone trait.
110struct RealHandle {
111    layer: Arc<PassthroughFs>,
112    in_upper_layer: bool,
113    inode: u64,
114    handle: AtomicU64,
115}
116
117struct HandleData {
118    node: Arc<OverlayInode>,
119    //offset: libc::off_t,
120    real_handle: Option<RealHandle>,
121    // Cache the directory entries for stable readdir offsets.
122    // The snapshot contains all necessary info to avoid re-accessing childrens map.
123    dir_snapshot: Mutex<Option<Vec<DirectoryEntryPlus>>>,
124}
125
126// RealInode is a wrapper of one inode in specific layer.
127// All layer operations returning Entry should be wrapped in RealInode implementation
128// so that we can increase the refcount(lookup count) of each inode and decrease it after Drop.
129// Important: do not impl 'Copy' trait for it or refcount will be messed up.
130impl RealInode {
131    async fn new(
132        layer: Arc<PassthroughFs>,
133        in_upper_layer: bool,
134        inode: u64,
135        whiteout: bool,
136        opaque: bool,
137    ) -> Self {
138        let mut ri = RealInode {
139            layer,
140            in_upper_layer,
141            inode,
142            whiteout,
143            opaque,
144            stat: None,
145        };
146        match ri.stat64_ignore_enoent(&Request::default()).await {
147            Ok(v) => {
148                ri.stat = v;
149            }
150            Err(e) => {
151                error!("stat64 failed during RealInode creation: {e}");
152            }
153        }
154        ri
155    }
156
157    async fn stat64(&self, req: &Request) -> Result<ReplyAttr> {
158        let layer = self.layer.as_ref();
159        if self.inode == 0 {
160            return Err(Error::from_raw_os_error(libc::ENOENT));
161        }
162        // trace!("stat64: trying to getattr req: {:?}", req);
163        layer
164            .getattr(*req, self.inode, None, 0)
165            .await
166            .map_err(|e| e.into())
167    }
168
169    async fn stat64_ignore_enoent(&self, req: &Request) -> Result<Option<ReplyAttr>> {
170        match self.stat64(req).await {
171            Ok(v1) => Ok(Some(v1)),
172            Err(e) => match e.raw_os_error() {
173                Some(raw_error) => {
174                    if raw_error == libc::ENOENT
175                        || raw_error == libc::ENAMETOOLONG
176                        || raw_error == libc::ESTALE
177                    {
178                        return Ok(None);
179                    }
180                    Err(e)
181                }
182                None => Err(e),
183            },
184        }
185    }
186
187    // Do real lookup action in specific layer, this call will increase Entry refcount which must be released later.
188    async fn lookup_child_ignore_enoent(
189        &self,
190        ctx: Request,
191        name: &str,
192    ) -> Result<Option<ReplyEntry>> {
193        let cname = OsStr::new(name);
194        // Real inode must have a layer.
195        let layer = self.layer.as_ref();
196        match layer.lookup(ctx, self.inode, cname).await {
197            Ok(v) => {
198                // Negative entry also indicates missing entry.
199                if v.attr.ino == 0 {
200                    return Ok(None);
201                }
202                Ok(Some(v))
203            }
204            Err(e) => {
205                let ioerror: std::io::Error = e.into();
206                if let Some(raw_error) = ioerror.raw_os_error()
207                    && (raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG)
208                {
209                    return Ok(None);
210                }
211
212                Err(e.into())
213            }
214        }
215    }
216
217    // Find child inode in same layer under this directory(Self).
218    // Return None if not found.
219    async fn lookup_child(&self, ctx: Request, name: &str) -> Result<Option<RealInode>> {
220        if self.whiteout {
221            return Ok(None);
222        }
223
224        let layer = self.layer.as_ref();
225
226        // Find child Entry with <name> under directory with inode <self.inode>.
227        match self.lookup_child_ignore_enoent(ctx, name).await? {
228            Some(v) => {
229                // The Entry must be forgotten in each layer, which will be done automatically by Drop operation.
230                let (whiteout, opaque) = if v.attr.kind == FileType::Directory {
231                    (false, layer.is_opaque(ctx, v.attr.ino).await?)
232                } else {
233                    (layer.is_whiteout(ctx, v.attr.ino).await?, false)
234                };
235
236                Ok(Some(RealInode {
237                    layer: self.layer.clone(),
238                    in_upper_layer: self.in_upper_layer,
239                    inode: v.attr.ino,
240                    whiteout,
241                    opaque,
242                    stat: Some(ReplyAttr {
243                        ttl: v.ttl,
244                        attr: v.attr,
245                    }),
246                }))
247            }
248            None => Ok(None),
249        }
250    }
251
252    // Read directory entries from specific RealInode, error out if it's not directory.
253    async fn readdir(&self, ctx: Request) -> Result<HashMap<String, RealInode>> {
254        // Deleted inode should not be read.
255        if self.whiteout {
256            return Err(Error::from_raw_os_error(libc::ENOENT));
257        }
258        // trace!("readdir: before stat");
259        let stat = match self.stat.clone() {
260            Some(v) => v,
261            None => self.stat64(&ctx).await?,
262        };
263
264        // Must be directory.
265        if stat.attr.kind != FileType::Directory {
266            return Err(Error::from_raw_os_error(libc::ENOTDIR));
267        }
268
269        // Open the directory and load each entry.
270        let opendir_res = self
271            .layer
272            .opendir(ctx, self.inode, libc::O_RDONLY as u32)
273            .await;
274        // trace!("readdir: after opendir");
275        let handle = match opendir_res {
276            Ok(handle) => handle,
277
278            // opendir may not be supported if no_opendir is set, so we can ignore this error.
279            Err(e) => {
280                let ioerror: std::io::Error = e.into();
281                match ioerror.raw_os_error() {
282                    Some(raw_error) => {
283                        if raw_error == libc::ENOSYS {
284                            // We can still call readdir with inode if opendir is not supported in this layer.
285                            ReplyOpen { fh: 0, flags: 0 }
286                        } else {
287                            return Err(e.into());
288                        }
289                    }
290                    None => {
291                        return Err(e.into());
292                    }
293                }
294            }
295        };
296
297        let child_names = self.layer.readdir(ctx, self.inode, handle.fh, 0).await?;
298        // Non-zero handle indicates successful 'open', we should 'release' it.
299        if handle.fh > 0 {
300            self.layer
301                .releasedir(ctx, self.inode, handle.fh, handle.flags)
302                .await?
303            //DIFF
304        }
305
306        // Lookup all child and construct "RealInode"s.
307        let child_real_inodes = Arc::new(Mutex::new(HashMap::new()));
308        // trace!("readdir: before iter childrens");
309        let a_map = child_names.entries.map(|entery| async {
310            match entery {
311                Ok(dire) => {
312                    let dname = dire.name.into_string().unwrap();
313                    if dname == "." || dname == ".." {
314                        // Skip . and .. entries.
315                        return Ok(());
316                    }
317                    // trace!("readdir: before lookup child: dname={}", dname);
318                    if let Some(child) = self.lookup_child(ctx, &dname).await? {
319                        child_real_inodes.lock().await.insert(dname, child);
320                    }
321                    Ok(())
322                }
323                Err(err) => Err(err),
324            }
325        });
326        let k = join_all(a_map.collect::<Vec<_>>().await).await;
327        drop(k);
328        // Now into_inner func is safety.
329        let re = Arc::try_unwrap(child_real_inodes)
330            .map_err(|_| Errno::new_not_exist())?
331            .into_inner();
332        // trace!("readdir: return");
333        Ok(re)
334    }
335
336    async fn create_whiteout(&self, ctx: Request, name: &str) -> Result<RealInode> {
337        if !self.in_upper_layer {
338            return Err(Error::from_raw_os_error(libc::EROFS));
339        }
340
341        // from &str to &OsStr
342        let name_osstr = OsStr::new(name);
343        let entry = self
344            .layer
345            .create_whiteout(ctx, self.inode, name_osstr)
346            .await?;
347
348        // Wrap whiteout to RealInode.
349        Ok(RealInode {
350            layer: self.layer.clone(),
351            in_upper_layer: true,
352            inode: entry.attr.ino,
353            whiteout: true,
354            opaque: false,
355            stat: Some(ReplyAttr {
356                ttl: entry.ttl,
357                attr: entry.attr,
358            }),
359        })
360    }
361
362    async fn mkdir(&self, ctx: Request, name: &str, mode: u32, umask: u32) -> Result<RealInode> {
363        if !self.in_upper_layer {
364            return Err(Error::from_raw_os_error(libc::EROFS));
365        }
366
367        let name_osstr = OsStr::new(name);
368        let entry = self
369            .layer
370            .mkdir(ctx, self.inode, name_osstr, mode, umask)
371            .await?;
372
373        // update node's first_layer
374        Ok(RealInode {
375            layer: self.layer.clone(),
376            in_upper_layer: true,
377            inode: entry.attr.ino,
378            whiteout: false,
379            opaque: false,
380            stat: Some(ReplyAttr {
381                ttl: entry.ttl,
382                attr: entry.attr,
383            }),
384        })
385    }
386
387    async fn create(
388        &self,
389        ctx: Request,
390        name: &str,
391        mode: u32,
392        flags: u32,
393    ) -> Result<(RealInode, Option<u64>)> {
394        if !self.in_upper_layer {
395            return Err(Error::from_raw_os_error(libc::EROFS));
396        }
397        let name = OsStr::new(name);
398        let create_rep = self
399            .layer
400            .create(ctx, self.inode, name, mode, flags)
401            .await?;
402
403        Ok((
404            RealInode {
405                layer: self.layer.clone(),
406                in_upper_layer: true,
407                inode: create_rep.attr.ino,
408                whiteout: false,
409                opaque: false,
410                stat: Some(ReplyAttr {
411                    ttl: create_rep.ttl,
412                    attr: create_rep.attr,
413                }),
414            },
415            Some(create_rep.fh),
416        ))
417    }
418
419    async fn mknod(
420        &self,
421        ctx: Request,
422        name: &str,
423        mode: u32,
424        rdev: u32,
425        _umask: u32,
426    ) -> Result<RealInode> {
427        if !self.in_upper_layer {
428            return Err(Error::from_raw_os_error(libc::EROFS));
429        }
430        let name = OsStr::new(name);
431        let rep = self.layer.mknod(ctx, self.inode, name, mode, rdev).await?;
432        Ok(RealInode {
433            layer: self.layer.clone(),
434            in_upper_layer: true,
435            inode: rep.attr.ino,
436            whiteout: false,
437            opaque: false,
438            stat: Some(ReplyAttr {
439                ttl: rep.ttl,
440                attr: rep.attr,
441            }),
442        })
443    }
444
445    async fn link(&self, ctx: Request, ino: u64, name: &str) -> Result<RealInode> {
446        if !self.in_upper_layer {
447            return Err(Error::from_raw_os_error(libc::EROFS));
448        }
449        let name = OsStr::new(name);
450        let entry = self.layer.link(ctx, ino, self.inode, name).await?;
451
452        let opaque = if utils::is_dir(&entry.attr.kind) {
453            self.layer.is_opaque(ctx, entry.attr.ino).await?
454        } else {
455            false
456        };
457        Ok(RealInode {
458            layer: self.layer.clone(),
459            in_upper_layer: true,
460            inode: entry.attr.ino,
461            whiteout: false,
462            opaque,
463            stat: Some(ReplyAttr {
464                ttl: entry.ttl,
465                attr: entry.attr,
466            }),
467        })
468    }
469
470    // Create a symlink in self directory.
471    async fn symlink(&self, ctx: Request, link_name: &str, filename: &str) -> Result<RealInode> {
472        if !self.in_upper_layer {
473            return Err(Error::from_raw_os_error(libc::EROFS));
474        }
475        let link_name = OsStr::new(link_name);
476        let filename = OsStr::new(filename);
477        let entry = self
478            .layer
479            .symlink(ctx, self.inode, filename, link_name)
480            .await?;
481
482        Ok(RealInode {
483            layer: self.layer.clone(),
484            in_upper_layer: true,
485            inode: entry.attr.ino,
486            whiteout: false,
487            opaque: false,
488            stat: Some(ReplyAttr {
489                ttl: entry.ttl,
490                attr: entry.attr,
491            }),
492        })
493    }
494}
495
496impl Drop for RealInode {
497    fn drop(&mut self) {
498        let layer = Arc::clone(&self.layer);
499        let inode = self.inode;
500        tokio::spawn(async move {
501            let ctx = Request::default();
502            layer.forget(ctx, inode, 1).await;
503        });
504    }
505}
506
507impl OverlayInode {
508    pub fn new() -> Self {
509        Self {
510            childrens: Mutex::new(HashMap::new()),
511            parent: Mutex::new(Weak::new()),
512            real_inodes: Mutex::new(vec![]),
513            inode: 0,
514            path: RwLock::new(String::new()),
515            name: RwLock::new(String::new()),
516            lookups: AtomicU64::new(0),
517            whiteout: AtomicBool::new(false),
518            loaded: AtomicBool::new(false),
519        }
520    }
521    // Allocate new OverlayInode based on one RealInode,
522    // inode number is always 0 since only OverlayFs has global unique inode allocator.
523    pub async fn new_from_real_inode(
524        name: &str,
525        ino: u64,
526        path: String,
527        real_inode: RealInode,
528    ) -> Self {
529        let mut new = OverlayInode::new();
530        new.inode = ino;
531        new.path = path.into();
532        new.name = name.to_string().into();
533        new.whiteout.store(real_inode.whiteout, Ordering::Relaxed);
534        new.lookups = AtomicU64::new(1);
535        new.real_inodes = Mutex::new(vec![real_inode.into()]);
536        new
537    }
538
539    pub async fn new_from_real_inodes(
540        name: &str,
541        ino: u64,
542        path: String,
543        real_inodes: Vec<RealInode>,
544    ) -> Result<Self> {
545        if real_inodes.is_empty() {
546            error!("BUG: new_from_real_inodes() called with empty real_inodes");
547            return Err(Error::from_raw_os_error(libc::EINVAL));
548        }
549
550        let mut first = true;
551        let mut new = Self::new();
552        for ri in real_inodes {
553            let whiteout = ri.whiteout;
554            let opaque = ri.opaque;
555            let stat = match &ri.stat {
556                Some(v) => v.clone(),
557                None => ri.stat64(&Request::default()).await?,
558            };
559
560            if first {
561                first = false;
562                new = Self::new_from_real_inode(name, ino, path.clone(), ri).await;
563
564                // This is whiteout, no need to check lower layers.
565                if whiteout {
566                    break;
567                }
568
569                // A non-directory file shadows all lower layers as default.
570                if !utils::is_dir(&stat.attr.kind) {
571                    break;
572                }
573
574                // Opaque directory shadows all lower layers.
575                if opaque {
576                    break;
577                }
578            } else {
579                // This is whiteout, no need to record this, break directly.
580                if ri.whiteout {
581                    break;
582                }
583
584                // Only directory have multiple real inodes, so if this is non-first real-inode
585                // and it's not directory, it should indicates some invalid layout. @weizhang555
586                if !utils::is_dir(&stat.attr.kind) {
587                    error!("invalid layout: non-directory has multiple real inodes");
588                    break;
589                }
590
591                // Valid directory.
592                new.real_inodes.lock().await.push(ri.into());
593                // Opaque directory shadows all lower layers.
594                if opaque {
595                    break;
596                }
597            }
598        }
599        Ok(new)
600    }
601
602    pub async fn stat64(&self, ctx: Request) -> Result<ReplyAttr> {
603        // try layers in order or just take stat from first layer?
604        for l in self.real_inodes.lock().await.iter() {
605            if let Some(v) = l.stat64_ignore_enoent(&ctx).await? {
606                return Ok(v);
607            }
608        }
609
610        // not in any layer
611        Err(Error::from_raw_os_error(libc::ENOENT))
612    }
613
614    pub async fn is_dir(&self, ctx: Request) -> Result<bool> {
615        let st = self.stat64(ctx).await?;
616        Ok(utils::is_dir(&st.attr.kind))
617    }
618
619    pub async fn count_entries_and_whiteout(&self, ctx: Request) -> Result<(u64, u64)> {
620        let mut count = 0;
621        let mut whiteouts = 0;
622
623        let st = self.stat64(ctx).await?;
624
625        // must be directory
626        if !utils::is_dir(&st.attr.kind) {
627            return Err(Error::from_raw_os_error(libc::ENOTDIR));
628        }
629
630        for (_, child) in self.childrens.lock().await.iter() {
631            if child.whiteout.load(Ordering::Relaxed) {
632                whiteouts += 1;
633            } else {
634                count += 1;
635            }
636        }
637        Ok((count, whiteouts))
638    }
639
640    pub async fn open(
641        &self,
642        ctx: Request,
643        flags: u32,
644        _fuse_flags: u32,
645    ) -> Result<(Arc<BoxedLayer>, ReplyOpen)> {
646        let (layer, _, inode) = self.first_layer_inode().await;
647        let ro = layer.as_ref().open(ctx, inode, flags).await?;
648        Ok((layer, ro))
649    }
650
651    // Self is directory, fill all childrens.
652    pub async fn scan_childrens(self: &Arc<Self>, ctx: Request) -> Result<Vec<OverlayInode>> {
653        let st = self.stat64(ctx).await?;
654        if !utils::is_dir(&st.attr.kind) {
655            return Err(Error::from_raw_os_error(libc::ENOTDIR));
656        }
657
658        let mut all_layer_inodes: HashMap<String, Vec<RealInode>> = HashMap::new();
659        // read out directories from each layer
660        // Scan from upper layer to lower layer.
661        for ri in self.real_inodes.lock().await.iter() {
662            if ri.whiteout {
663                // Node is deleted from some upper layer, skip it.
664                debug!("directory is whiteout");
665                break;
666            }
667
668            let stat = match &ri.stat {
669                Some(v) => v.clone(),
670                None => ri.stat64(&ctx).await?,
671            };
672
673            if !utils::is_dir(&stat.attr.kind) {
674                debug!("{} is not a directory", self.path.read().await);
675                // not directory
676                break;
677            }
678
679            // Read all entries from one layer.
680            let entries: HashMap<String, RealInode> = ri.readdir(ctx).await?;
681
682            // Merge entries from one layer to all_layer_inodes.
683            for (name, inode) in entries {
684                match all_layer_inodes.get_mut(&name) {
685                    Some(v) => {
686                        // Append additional RealInode to the end of vector.
687                        v.push(inode)
688                    }
689                    None => {
690                        all_layer_inodes.insert(name, vec![inode]);
691                    }
692                }
693            }
694
695            // if opaque, stop here
696            if ri.opaque {
697                debug!("directory {} is opaque", self.path.read().await);
698                break;
699            }
700        }
701
702        // Construct OverlayInode for each entry.
703        let mut childrens = vec![];
704        for (name, real_inodes) in all_layer_inodes {
705            // Inode numbers are not allocated yet.
706            let path = format!("{}/{}", self.path.read().await, name);
707            let new = Self::new_from_real_inodes(name.as_str(), 0, path, real_inodes).await?;
708            childrens.push(new);
709        }
710
711        Ok(childrens)
712    }
713
714    /// Create a new directory in upper layer for node, node must be directory.
715    ///
716    /// Recursively ensures a directory path exists in the upper layer.
717    ///
718    /// This function is a critical part of the copy-up process. When a file or directory
719    /// needs to be copied up, this function is called on its parent to ensure the entire
720    /// directory hierarchy exists in the upper layer first. It works recursively:
721    /// 1. If the current directory is already in the upper layer, it does nothing.
722    /// 2. If not, it first calls itself on its own parent directory.
723    /// 3. Once the parent is guaranteed to be in the upper layer, it creates the current
724    ///    directory within the parent's upper-layer representation.
725    ///
726    /// Crucially, it preserves the original directory's ownership (UID/GID) and permissions
727    /// by using the [`do_getattr_helper`][crate::passthrough::PassthroughFs::do_getattr_helper] and
728    /// [`do_mkdir_helper`][crate::passthrough::PassthroughFs::do_mkdir_helper] functions.
729    pub async fn create_upper_dir(
730        self: Arc<Self>,
731        ctx: Request,
732        mode_umask: Option<(u32, u32)>,
733    ) -> Result<()> {
734        // To preserve original ownership, we must get the raw, unmapped host attributes.
735        // We achieve this by calling `do_getattr_helper`, which is specifically designed
736        // to bypass the ID mapping logic. This is safe and does not affect other
737        // functionalities because `do_getattr_helper` and the standard `stat64()` call
738        // both rely on the same underlying `stat` system call; they only differ in
739        // whether the resulting `uid` and `gid` are mapped.
740        let (self_layer, _, self_inode) = self.first_layer_inode().await;
741        let re = self_layer.do_getattr_helper(self_inode, None).await?;
742        let st = ReplyAttr {
743            ttl: re.1,
744            attr: convert_stat64_to_file_attr(re.0),
745        };
746        if !utils::is_dir(&st.attr.kind) {
747            return Err(Error::from_raw_os_error(libc::ENOTDIR));
748        }
749
750        // If node already has upper layer, we can just return here.
751        if self.in_upper_layer().await {
752            return Ok(());
753        }
754
755        // not in upper layer, check parent.
756        let pnode = if let Some(n) = self.parent.lock().await.upgrade() {
757            Arc::clone(&n)
758        } else {
759            return Err(Error::other("no parent?"));
760        };
761
762        if !pnode.in_upper_layer().await {
763            Box::pin(pnode.clone().create_upper_dir(ctx, None)).await?; // recursive call
764        }
765        let child: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
766        let c_name = self.name.read().await.clone();
767        let _ = pnode
768            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
769                match parent_upper_inode {
770                    Some(parent_ri) => {
771                        let ri = match mode_umask {
772                            // We manually unfold the `mkdir` logic here instead of calling the `mkdir` method directly.
773                            // This is necessary to preserve the original directory's UID and GID during the copy-up process.
774                            Some((mode, umask)) => {
775                                if !parent_ri.in_upper_layer {
776                                    return Err(Error::from_raw_os_error(libc::EROFS));
777                                }
778                                let name_osstr = OsStr::new(&c_name);
779                                let entry = parent_ri
780                                    .layer
781                                    .do_mkdir_helper(
782                                        ctx,
783                                        parent_ri.inode,
784                                        name_osstr,
785                                        mode,
786                                        umask,
787                                        st.attr.uid,
788                                        st.attr.gid,
789                                    )
790                                    .await?;
791                                RealInode {
792                                    layer: parent_ri.layer.clone(),
793                                    in_upper_layer: true,
794                                    inode: entry.attr.ino,
795                                    whiteout: false,
796                                    opaque: false,
797                                    stat: Some(ReplyAttr {
798                                        ttl: entry.ttl,
799                                        attr: entry.attr,
800                                    }),
801                                }
802                            }
803                            None => {
804                                if !parent_ri.in_upper_layer {
805                                    return Err(Error::from_raw_os_error(libc::EROFS));
806                                }
807                                let name_osstr = OsStr::new(&c_name);
808                                let entry = parent_ri
809                                    .layer
810                                    .do_mkdir_helper(
811                                        ctx,
812                                        parent_ri.inode,
813                                        name_osstr,
814                                        mode_from_kind_and_perm(st.attr.kind, st.attr.perm),
815                                        0,
816                                        st.attr.uid,
817                                        st.attr.gid,
818                                    )
819                                    .await?;
820                                RealInode {
821                                    layer: parent_ri.layer.clone(),
822                                    in_upper_layer: true,
823                                    inode: entry.attr.ino,
824                                    whiteout: false,
825                                    opaque: false,
826                                    stat: Some(ReplyAttr {
827                                        ttl: entry.ttl,
828                                        attr: entry.attr,
829                                    }),
830                                }
831                            }
832                        };
833                        // create directory here
834                        child.lock().await.replace(ri);
835                    }
836                    None => {
837                        error!(
838                            "BUG: parent {} has no upper inode after create_upper_dir",
839                            pnode.inode
840                        );
841                        return Err(Error::from_raw_os_error(libc::EINVAL));
842                    }
843                }
844                Ok(false)
845            })
846            .await?;
847
848        if let Some(ri) = child.lock().await.take() {
849            // Push the new real inode to the front of vector.
850            self.add_upper_inode(ri, false).await;
851        }
852
853        Ok(())
854    }
855
856    // Add new upper RealInode to OverlayInode, clear all lower RealInodes if 'clear_lowers' is true.
857    async fn add_upper_inode(self: &Arc<Self>, ri: RealInode, clear_lowers: bool) {
858        let mut inodes = self.real_inodes.lock().await;
859        // Update self according to upper attribute.
860        self.whiteout.store(ri.whiteout, Ordering::Relaxed);
861
862        // Push the new real inode to the front of vector.
863        let mut new = vec![Arc::new(ri)];
864        // Drain lower RealInodes.
865        let lowers = inodes.drain(..).collect::<Vec<Arc<RealInode>>>();
866        if !clear_lowers {
867            // If not clear lowers, append them to the end of vector.
868            new.extend(lowers);
869        }
870        inodes.extend(new);
871    }
872
873    // return the uppder layer fs.
874    pub async fn in_upper_layer(&self) -> bool {
875        let all_inodes = self.real_inodes.lock().await;
876        let first = all_inodes.first();
877        match first {
878            Some(v) => v.in_upper_layer,
879            None => false,
880        }
881    }
882
883    pub async fn upper_layer_only(&self) -> bool {
884        let real_inodes = self.real_inodes.lock().await;
885        let first = real_inodes.first();
886        match first {
887            Some(v) => {
888                if !v.in_upper_layer {
889                    false
890                } else {
891                    real_inodes.len() == 1
892                }
893            }
894            None => false,
895        }
896    }
897
898    pub async fn first_layer_inode(&self) -> (Arc<BoxedLayer>, bool, u64) {
899        let all_inodes = self.real_inodes.lock().await;
900        let first = all_inodes.first();
901        match first {
902            Some(v) => (v.layer.clone(), v.in_upper_layer, v.inode),
903            None => panic!("BUG: dangling OverlayInode"),
904        }
905    }
906
907    pub async fn child(&self, name: &str) -> Option<Arc<OverlayInode>> {
908        self.childrens.lock().await.get(name).cloned()
909    }
910
911    pub async fn remove_child(&self, name: &str) -> Option<Arc<OverlayInode>> {
912        self.childrens.lock().await.remove(name)
913    }
914
915    pub async fn insert_child(&self, name: &str, node: Arc<OverlayInode>) {
916        self.childrens.lock().await.insert(name.to_string(), node);
917    }
918
919    /// Handles operations on the upper layer inode of an `OverlayInode` in a thread-safe manner.
920    ///
921    /// This function locks the `real_inodes` field of the `OverlayInode` and retrieves the first
922    /// real inode (if any). If the first inode exists and belongs to the upper layer (`in_upper_layer` is true),
923    /// the provided callback `f` is invoked with the inode wrapped in `Some`. Otherwise, `f` is invoked with `None`.
924    ///
925    /// # Arguments
926    /// * `f`: A closure that takes an `Option<RealInode>` and returns a future. The future resolves to a `Result<bool>`.
927    ///
928    /// # Returns
929    /// * `Ok(bool)`: The result of invoking the callback `f`.
930    /// * `Err(Erron)`: An error is returned if:
931    ///   - There are no backend inodes (`real_inodes` is empty), indicating a dangling `OverlayInode`.
932    ///   - The callback `f` itself returns an error.
933    ///
934    /// # Behavior
935    /// 1. Locks the `real_inodes` field to ensure thread safety.
936    /// 2. Checks if the first inode exists:
937    ///    - If it exists and is in the upper layer, invokes `f(Some(inode))`.
938    ///    - If it exists but is not in the upper layer, invokes `f(None)`.
939    /// 3. If no inodes exist, returns an error indicating a dangling `OverlayInode`.
940    ///
941    /// # Example Use Case
942    /// This function is typically used to perform operations on the upper layer inode of an `OverlayInode`,
943    /// such as creating, modifying, or deleting files/directories in the overlay filesystem's upper layer.
944    pub async fn handle_upper_inode_locked<F, Fut>(&self, f: F) -> Result<bool>
945    where
946        // Can pass a &RealInode (or None) to f for any lifetime 'a
947        F: FnOnce(Option<Arc<RealInode>>) -> Fut,
948        // f returns a Future that must live at least as long as 'a
949        Fut: Future<Output = Result<bool>>,
950    {
951        let all_inodes = self.real_inodes.lock().await;
952        let first = all_inodes.first();
953        match first {
954            Some(v) => {
955                if v.in_upper_layer {
956                    f(Some(v.clone())).await
957                } else {
958                    f(None).await
959                }
960            }
961            None => Err(Error::other(format!(
962                "BUG: dangling OverlayInode {} without any backend inode",
963                self.inode
964            ))),
965        }
966    }
967}
968#[allow(unused)]
969fn entry_type_from_mode(mode: libc::mode_t) -> u8 {
970    match mode & libc::S_IFMT {
971        libc::S_IFBLK => libc::DT_BLK,
972        libc::S_IFCHR => libc::DT_CHR,
973        libc::S_IFDIR => libc::DT_DIR,
974        libc::S_IFIFO => libc::DT_FIFO,
975        libc::S_IFLNK => libc::DT_LNK,
976        libc::S_IFREG => libc::DT_REG,
977        libc::S_IFSOCK => libc::DT_SOCK,
978        _ => libc::DT_UNKNOWN,
979    }
980}
981impl OverlayFs {
982    pub fn new(
983        upper: Option<Arc<BoxedLayer>>,
984        lowers: Vec<Arc<BoxedLayer>>,
985        params: Config,
986        root_inode: u64,
987    ) -> Result<Self> {
988        Ok(OverlayFs {
989            config: params,
990            lower_layers: lowers,
991            upper_layer: upper,
992            inodes: RwLock::new(InodeStore::new()),
993            handles: Mutex::new(HashMap::new()),
994            next_handle: AtomicU64::new(1),
995            writeback: AtomicBool::new(false),
996            no_open: AtomicBool::new(false),
997            no_opendir: AtomicBool::new(false),
998            killpriv_v2: AtomicBool::new(false),
999            perfile_dax: AtomicBool::new(false),
1000            root_inodes: root_inode,
1001        })
1002    }
1003
1004    pub fn root_inode(&self) -> Inode {
1005        self.root_inodes
1006    }
1007
1008    async fn alloc_inode(&self, path: &str) -> Result<u64> {
1009        self.inodes.write().await.alloc_inode(path)
1010    }
1011
1012    /// Add a file layer and stack and merge the previous file layers.
1013    pub async fn push_layer(&mut self, layer: Arc<BoxedLayer>) -> Result<()> {
1014        let upper = self.upper_layer.take();
1015        if let Some(upper) = upper {
1016            self.lower_layers.push(upper);
1017        }
1018        self.upper_layer = Some(layer);
1019        // TODO: merge previous file layers. need optimization
1020        self.import().await?;
1021        Ok(())
1022    }
1023
1024    pub async fn import(&self) -> Result<()> {
1025        let mut root = OverlayInode::new();
1026        root.inode = self.root_inode();
1027        root.path = String::from("").into();
1028        root.name = String::from("").into();
1029        root.lookups = AtomicU64::new(2);
1030        root.real_inodes = Mutex::new(vec![]);
1031        let ctx = Request::default();
1032
1033        // Update upper inode
1034        if let Some(layer) = self.upper_layer.as_ref() {
1035            let ino = layer.root_inode();
1036            let real = RealInode::new(
1037                layer.clone(),
1038                true,
1039                ino,
1040                false,
1041                layer.is_opaque(ctx, ino).await?,
1042            )
1043            .await;
1044            root.real_inodes.lock().await.push(real.into());
1045        }
1046
1047        // Update lower inodes.
1048        for layer in self.lower_layers.iter() {
1049            let ino = layer.root_inode();
1050            let real: RealInode = RealInode::new(
1051                layer.clone(),
1052                false,
1053                ino,
1054                false,
1055                layer.is_opaque(ctx, ino).await?,
1056            )
1057            .await;
1058            root.real_inodes.lock().await.push(real.into());
1059        }
1060        let root_node = Arc::new(root);
1061
1062        // insert root inode into hash
1063        self.insert_inode(self.root_inode(), Arc::clone(&root_node))
1064            .await;
1065
1066        info!("loading root directory");
1067        self.load_directory(ctx, &root_node).await?;
1068        info!("loaded root directory");
1069
1070        Ok(())
1071    }
1072
1073    async fn root_node(&self) -> Arc<OverlayInode> {
1074        // Root node must exist.
1075        self.get_active_inode(self.root_inode()).await.unwrap()
1076    }
1077
1078    async fn insert_inode(&self, inode: u64, node: Arc<OverlayInode>) {
1079        self.inodes.write().await.insert_inode(inode, node).await;
1080    }
1081
1082    async fn get_active_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1083        self.inodes.read().await.get_inode(inode)
1084    }
1085
1086    // Get inode which is active or deleted.
1087    async fn get_all_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1088        let inode_store = self.inodes.read().await;
1089        match inode_store.get_inode(inode) {
1090            Some(n) => Some(n),
1091            None => inode_store.get_deleted_inode(inode),
1092        }
1093    }
1094
1095    // Return the inode only if it's permanently deleted from both self.inodes and self.deleted_inodes.
1096    async fn remove_inode(
1097        &self,
1098        inode: u64,
1099        path_removed: Option<String>,
1100    ) -> Option<Arc<OverlayInode>> {
1101        self.inodes
1102            .write()
1103            .await
1104            .remove_inode(inode, path_removed)
1105            .await
1106    }
1107
1108    // Lookup child OverlayInode with <name> under <parent> directory.
1109    // If name is empty, return parent itself.
1110    // Parent dir will be loaded, but returned OverlayInode won't.
1111    async fn lookup_node(
1112        &self,
1113        ctx: Request,
1114        parent: Inode,
1115        name: &str,
1116    ) -> Result<Arc<OverlayInode>> {
1117        if name.contains(SLASH_ASCII) {
1118            return Err(Error::from_raw_os_error(libc::EINVAL));
1119        }
1120
1121        // Parent inode is expected to be loaded before this function is called.
1122        // TODO: Is this correct?
1123        let pnode = match self.get_active_inode(parent).await {
1124            Some(v) => v,
1125            None => {
1126                match self.get_all_inode(parent).await {
1127                    Some(v) => {
1128                        trace!(
1129                            "overlayfs:mod.rs:1031:lookup_node: parent inode {parent} is deleted"
1130                        );
1131                        v
1132                    }
1133                    None => {
1134                        trace!(
1135                            "overlayfs:mod.rs:1034:lookup_node: parent inode {parent} not found"
1136                        );
1137                        // Parent inode is not found, return ENOENT.
1138                        return Err(Error::from_raw_os_error(libc::ENOENT));
1139                    }
1140                }
1141            }
1142        };
1143
1144        // Parent is whiteout-ed, return ENOENT.
1145        if pnode.whiteout.load(Ordering::Relaxed) {
1146            return Err(Error::from_raw_os_error(libc::ENOENT));
1147        }
1148
1149        let st = pnode.stat64(ctx).await?;
1150        if utils::is_dir(&st.attr.kind) && !pnode.loaded.load(Ordering::Relaxed) {
1151            // Parent is expected to be directory, load it first.
1152            self.load_directory(ctx, &pnode).await?;
1153        }
1154
1155        // Current file or dir.
1156        if name.eq(".")  
1157            // Root directory has no parent.
1158            || (parent == self.root_inode() && name.eq("..")) 
1159            // Special convention: empty name indicates current dir.
1160            || name.is_empty()
1161        {
1162            return Ok(Arc::clone(&pnode));
1163        }
1164
1165        match pnode.child(name).await {
1166            // Child is found.
1167            Some(v) => Ok(v),
1168            None => {
1169                trace!("lookup_node: child {name} not found");
1170                Err(Error::from_raw_os_error(libc::ENOENT))
1171            }
1172        }
1173    }
1174
1175    async fn lookup_node_ignore_enoent(
1176        &self,
1177        ctx: Request,
1178        parent: u64,
1179        name: &str,
1180    ) -> Result<Option<Arc<OverlayInode>>> {
1181        match self.lookup_node(ctx, parent, name).await {
1182            Ok(n) => Ok(Some(Arc::clone(&n))),
1183            Err(e) => {
1184                if let Some(raw_error) = e.raw_os_error()
1185                    && raw_error == libc::ENOENT
1186                {
1187                    return Ok(None);
1188                }
1189                Err(e)
1190            }
1191        }
1192    }
1193
1194    // Load entries of the directory from all layers, if node is not directory, return directly.
1195    async fn load_directory(&self, ctx: Request, node: &Arc<OverlayInode>) -> Result<()> {
1196        if node.loaded.load(Ordering::Relaxed) {
1197            return Ok(());
1198        }
1199
1200        // We got all childrens without inode.
1201        // info!("before scan childrens, ctx: {:?}, node: {:?}", ctx, node.inode);
1202        let childrens = node.scan_childrens(ctx).await?;
1203        // info!("scanned children");
1204
1205        // =============== Start Lock Area ===================
1206        // Lock OverlayFs inodes.
1207        let mut inode_store = self.inodes.write().await;
1208        // Lock the OverlayInode and its childrens.
1209        let mut node_children = node.childrens.lock().await;
1210
1211        // Check again in case another 'load_directory' function call gets locks and want to do duplicated work.
1212        if node.loaded.load(Ordering::Relaxed) {
1213            return Ok(());
1214        }
1215
1216        // Now we have two locks' protection, Fs inodes lock and OverlayInode's childrens lock.
1217        // info!("before iter childrens");
1218        for mut child in childrens.into_iter() {
1219            // Allocate inode for each child.
1220            let ino = inode_store.alloc_inode(&child.path.read().await)?;
1221
1222            let name = child.name.read().await.clone();
1223            child.inode = ino;
1224            // Create bi-directional link between parent and child.
1225            child.parent = Mutex::new(Arc::downgrade(node));
1226
1227            let arc_child = Arc::new(child);
1228            node_children.insert(name, arc_child.clone());
1229            // Record overlay inode in whole OverlayFs.
1230            inode_store.insert_inode(ino, arc_child).await;
1231        }
1232        // info!("after iter childrens");
1233
1234        node.loaded.store(true, Ordering::Relaxed);
1235
1236        Ok(())
1237    }
1238
1239    async fn forget_one(&self, inode: Inode, count: u64) {
1240        if inode == self.root_inode() || inode == 0 {
1241            return;
1242        }
1243
1244        let v = match self.get_all_inode(inode).await {
1245            Some(n) => n,
1246            None => {
1247                trace!("forget unknown inode: {inode}");
1248                return;
1249            }
1250        };
1251
1252        // Use fetch_update to atomically update lookups in a loop until it succeeds
1253        v.lookups
1254            .fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
1255                // If count is larger than current lookups, return 0
1256                // Otherwise subtract count from current lookups
1257                if current < count {
1258                    Some(0)
1259                } else {
1260                    Some(current - count)
1261                }
1262            })
1263            .expect("fetch_update failed");
1264
1265        let lookups = v.lookups.load(Ordering::Relaxed);
1266        trace!(
1267            "forget inode: {}, name {}, lookups: {}",
1268            inode,
1269            v.name.read().await,
1270            lookups
1271        );
1272        if lookups == 0 {
1273            debug!(
1274                "inode is forgotten: {}, name {}",
1275                inode,
1276                v.name.read().await
1277            );
1278            let _ = self.remove_inode(inode, None).await;
1279            let parent = v.parent.lock().await;
1280
1281            if let Some(p) = parent.upgrade() {
1282                // remove it from hashmap
1283                p.remove_child(&v.name.read().await).await;
1284            }
1285        }
1286    }
1287
1288    async fn do_lookup(&self, ctx: Request, parent: Inode, name: &str) -> Result<ReplyEntry> {
1289        let node = self.lookup_node(ctx, parent, name).await?;
1290        debug!("do_lookup: {name:?}, found");
1291
1292        if node.whiteout.load(Ordering::Relaxed) {
1293            eprintln!("Error: node.whiteout.load() called.");
1294            return Err(Error::from_raw_os_error(libc::ENOENT));
1295        }
1296
1297        let mut st = node.stat64(ctx).await?;
1298        st.attr.ino = node.inode;
1299        if utils::is_dir(&st.attr.kind) && !node.loaded.load(Ordering::Relaxed) {
1300            self.load_directory(ctx, &node).await?;
1301        }
1302
1303        // FIXME: can forget happen between found and increase reference counter?
1304        let tmp = node.lookups.fetch_add(1, Ordering::Relaxed);
1305        trace!("lookup count: {}", tmp + 1);
1306        Ok(ReplyEntry {
1307            ttl: st.ttl,
1308            attr: st.attr,
1309            generation: 0,
1310        })
1311    }
1312
1313    async fn do_statvfs(&self, ctx: Request, inode: Inode) -> Result<ReplyStatFs> {
1314        match self.get_active_inode(inode).await {
1315            Some(ovi) => {
1316                let all_inodes = ovi.real_inodes.lock().await;
1317                let real_inode = all_inodes
1318                    .first()
1319                    .ok_or(Error::other("backend inode not found"))?;
1320                Ok(real_inode.layer.statfs(ctx, real_inode.inode).await?)
1321            }
1322            None => Err(Error::from_raw_os_error(libc::ENOENT)),
1323        }
1324    }
1325
1326    #[allow(clippy::too_many_arguments)]
1327    async fn do_readdir<'a>(
1328        &self,
1329        ctx: Request,
1330        inode: Inode,
1331        handle: u64,
1332        offset: u64,
1333    ) -> Result<
1334        impl futures_util::stream::Stream<Item = std::result::Result<DirectoryEntry, Errno>> + Send + 'a,
1335    > {
1336        let snapshot = self.get_or_create_dir_snapshot(ctx, inode, handle).await?;
1337
1338        let entries: Vec<std::result::Result<DirectoryEntry, Errno>> =
1339            if offset < snapshot.len() as u64 {
1340                snapshot
1341                    .iter()
1342                    .skip(offset as usize)
1343                    .map(|entry| {
1344                        Ok(DirectoryEntry {
1345                            inode: entry.inode,
1346                            kind: entry.kind,
1347                            name: entry.name.clone(),
1348                            offset: entry.offset,
1349                        })
1350                    })
1351                    .collect()
1352            } else {
1353                vec![]
1354            };
1355
1356        Ok(iter(entries))
1357    }
1358
1359    #[allow(clippy::too_many_arguments)]
1360    async fn do_readdirplus<'a>(
1361        &self,
1362        ctx: Request,
1363        inode: Inode,
1364        handle: u64,
1365        offset: u64,
1366    ) -> Result<
1367        impl futures_util::stream::Stream<Item = std::result::Result<DirectoryEntryPlus, Errno>>
1368        + Send
1369        + 'a,
1370    > {
1371        let snapshot = self.get_or_create_dir_snapshot(ctx, inode, handle).await?;
1372
1373        let mut entries = Vec::new();
1374        if offset < snapshot.len() as u64 {
1375            for entry in snapshot.iter().skip(offset as usize) {
1376                // Increment lookup count for readdirplus as we are handing out a reference to the kernel.
1377                // We must do this here, not in snapshot creation, and we must NOT decrement it in HandleData drop.
1378                // The kernel will send a FORGET request when it's done with the entry.
1379                if let Some(node) = self.get_all_inode(entry.inode).await {
1380                    node.lookups.fetch_add(1, Ordering::Relaxed);
1381                }
1382                entries.push(Ok(entry.clone()));
1383            }
1384        }
1385
1386        Ok(iter(entries))
1387    }
1388
1389    async fn get_or_create_dir_snapshot(
1390        &self,
1391        ctx: Request,
1392        inode: Inode,
1393        handle: u64,
1394    ) -> Result<Vec<DirectoryEntryPlus>> {
1395        let handle_data = match self.handles.lock().await.get(&handle) {
1396            Some(hd) if hd.node.inode == inode => hd.clone(),
1397            _ => {
1398                // Fallback for cases without a valid handle (e.g. no-opendir)
1399                let node = self.lookup_node(ctx, inode, ".").await?;
1400                let st = node.stat64(ctx).await?;
1401                if !utils::is_dir(&st.attr.kind) {
1402                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1403                }
1404                // Create a temporary HandleData for this call only.
1405                Arc::new(HandleData {
1406                    node,
1407                    real_handle: None,
1408                    dir_snapshot: Mutex::new(None),
1409                })
1410            }
1411        };
1412
1413        // Optimistic check
1414        if let Some(snapshot) = handle_data.dir_snapshot.lock().await.as_ref() {
1415            return Ok(snapshot.clone());
1416        }
1417
1418        // Snapshot doesn't exist, create it.
1419        let ovl_inode = &handle_data.node;
1420        self.load_directory(ctx, ovl_inode).await?;
1421
1422        let mut entries = Vec::new();
1423
1424        // 1. Add "." entry
1425        let mut st_self = ovl_inode.stat64(ctx).await?;
1426        st_self.attr.ino = ovl_inode.inode;
1427        entries.push(DirectoryEntryPlus {
1428            inode: ovl_inode.inode,
1429            generation: 0,
1430            kind: st_self.attr.kind,
1431            name: ".".into(),
1432            offset: 1,
1433            attr: st_self.attr,
1434            entry_ttl: st_self.ttl,
1435            attr_ttl: st_self.ttl,
1436        });
1437
1438        // 2. Add ".." entry
1439        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1440            Some(node) => node,
1441            None => self.root_node().await,
1442        };
1443        let mut st_parent = parent_node.stat64(ctx).await?;
1444        st_parent.attr.ino = parent_node.inode;
1445        entries.push(DirectoryEntryPlus {
1446            inode: parent_node.inode,
1447            generation: 0,
1448            kind: st_parent.attr.kind,
1449            name: "..".into(),
1450            offset: 2,
1451            attr: st_parent.attr,
1452            entry_ttl: st_parent.ttl,
1453            attr_ttl: st_parent.ttl,
1454        });
1455
1456        // 3. Add children entries
1457        let children = ovl_inode.childrens.lock().await;
1458        for (name, child) in children.iter() {
1459            if child.whiteout.load(Ordering::Relaxed) {
1460                continue;
1461            }
1462            let mut st_child = child.stat64(ctx).await?;
1463            st_child.attr.ino = child.inode;
1464            entries.push(DirectoryEntryPlus {
1465                inode: child.inode,
1466                generation: 0,
1467                kind: st_child.attr.kind,
1468                name: name.clone().into(),
1469                offset: (entries.len() + 1) as i64,
1470                attr: st_child.attr,
1471                entry_ttl: st_child.ttl,
1472                attr_ttl: st_child.ttl,
1473            });
1474        }
1475        drop(children);
1476
1477        let mut snapshot_guard = handle_data.dir_snapshot.lock().await;
1478        if snapshot_guard.is_none() {
1479            // We won the race, install our prepared snapshot.
1480            *snapshot_guard = Some(entries.clone());
1481            Ok(entries)
1482        } else {
1483            // Another thread won the race while we were preparing.
1484            // Discard our work and use the existing snapshot.
1485            for entry in entries.iter().skip(2) {
1486                // skip "." and ".."
1487                self.forget_one(entry.inode, 1).await;
1488            }
1489            Ok(snapshot_guard.as_ref().unwrap().clone())
1490        }
1491    }
1492
1493    async fn do_mkdir(
1494        &self,
1495        ctx: Request,
1496        parent_node: Arc<OverlayInode>,
1497        name: &str,
1498        mode: u32,
1499        umask: u32,
1500    ) -> Result<()> {
1501        if self.upper_layer.is_none() {
1502            return Err(Error::from_raw_os_error(libc::EROFS));
1503        }
1504
1505        // Parent node was deleted.
1506        if parent_node.whiteout.load(Ordering::Relaxed) {
1507            return Err(Error::from_raw_os_error(libc::ENOENT));
1508        }
1509
1510        let mut delete_whiteout = false;
1511        let mut set_opaque = false;
1512        if let Some(n) = self
1513            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1514            .await?
1515        {
1516            // Node with same name exists, let's check if it's whiteout.
1517            if !n.whiteout.load(Ordering::Relaxed) {
1518                return Err(Error::from_raw_os_error(libc::EEXIST));
1519            }
1520
1521            if n.in_upper_layer().await {
1522                delete_whiteout = true;
1523            }
1524
1525            // Set opaque if child dir has lower layers.
1526            if !n.upper_layer_only().await {
1527                set_opaque = true;
1528            }
1529        }
1530
1531        // Copy parent node up if necessary.
1532        let pnode = self.copy_node_up(ctx, parent_node).await?;
1533
1534        let path = format!("{}/{}", pnode.path.read().await, name);
1535        let path_ref = &path;
1536        let new_node = Arc::new(Mutex::new(None));
1537        pnode
1538            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1539                let parent_real_inode = match parent_real_inode {
1540                    Some(inode) => inode,
1541                    None => {
1542                        error!("BUG: parent doesn't have upper inode after copied up");
1543                        return Err(Error::from_raw_os_error(libc::EINVAL));
1544                    }
1545                };
1546                let osstr = OsStr::new(name);
1547                if delete_whiteout {
1548                    let _ = parent_real_inode
1549                        .layer
1550                        .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1551                        .await;
1552                }
1553
1554                // Allocate inode number.
1555                let ino = self.alloc_inode(path_ref).await?;
1556                let child_dir = parent_real_inode.mkdir(ctx, name, mode, umask).await?;
1557                // Set opaque if child dir has lower layers.
1558                if set_opaque {
1559                    parent_real_inode
1560                        .layer
1561                        .set_opaque(ctx, child_dir.inode)
1562                        .await?;
1563                }
1564                let ovi =
1565                    OverlayInode::new_from_real_inode(name, ino, path_ref.clone(), child_dir).await;
1566                new_node.lock().await.replace(ovi);
1567                Ok(false)
1568            })
1569            .await?;
1570
1571        // new_node is always 'Some'
1572        let nn = new_node.lock().await.take();
1573        let arc_node = Arc::new(nn.unwrap());
1574        self.insert_inode(arc_node.inode, arc_node.clone()).await;
1575        pnode.insert_child(name, arc_node).await;
1576        Ok(())
1577    }
1578
1579    async fn do_mknod(
1580        &self,
1581        ctx: Request,
1582        parent_node: &Arc<OverlayInode>,
1583        name: &str,
1584        mode: u32,
1585        rdev: u32,
1586        umask: u32,
1587    ) -> Result<()> {
1588        if self.upper_layer.is_none() {
1589            return Err(Error::from_raw_os_error(libc::EROFS));
1590        }
1591
1592        // Parent node was deleted.
1593        if parent_node.whiteout.load(Ordering::Relaxed) {
1594            return Err(Error::from_raw_os_error(libc::ENOENT));
1595        }
1596
1597        match self
1598            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1599            .await?
1600        {
1601            Some(n) => {
1602                // Node with same name exists, let's check if it's whiteout.
1603                if !n.whiteout.load(Ordering::Relaxed) {
1604                    return Err(Error::from_raw_os_error(libc::EEXIST));
1605                }
1606
1607                // Copy parent node up if necessary.
1608                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1609                pnode
1610                    .handle_upper_inode_locked(
1611                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1612                            let parent_real_inode = match parent_real_inode {
1613                                Some(inode) => inode,
1614                                None => {
1615                                    error!("BUG: parent doesn't have upper inode after copied up");
1616                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1617                                }
1618                            };
1619                            let osstr = OsStr::new(name);
1620                            if n.in_upper_layer().await {
1621                                let _ = parent_real_inode
1622                                    .layer
1623                                    .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1624                                    .await;
1625                            }
1626
1627                            let child_ri = parent_real_inode
1628                                .mknod(ctx, name, mode, rdev, umask)
1629                                .await?;
1630
1631                            // Replace existing real inodes with new one.
1632                            n.add_upper_inode(child_ri, true).await;
1633                            Ok(false)
1634                        },
1635                    )
1636                    .await?;
1637            }
1638            None => {
1639                // Copy parent node up if necessary.
1640                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1641                let new_node = Arc::new(Mutex::new(None));
1642                let path = format!("{}/{}", pnode.path.read().await, name);
1643                pnode
1644                    .handle_upper_inode_locked(
1645                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1646                            let parent_real_inode = match parent_real_inode {
1647                                Some(inode) => inode,
1648                                None => {
1649                                    error!("BUG: parent doesn't have upper inode after copied up");
1650                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1651                                }
1652                            };
1653
1654                            // Allocate inode number.
1655                            let ino = self.alloc_inode(&path).await?;
1656                            let child_ri = parent_real_inode
1657                                .mknod(ctx, name, mode, rdev, umask)
1658                                .await?;
1659                            let ovi = OverlayInode::new_from_real_inode(
1660                                name,
1661                                ino,
1662                                path.clone(),
1663                                child_ri,
1664                            )
1665                            .await;
1666
1667                            new_node.lock().await.replace(ovi);
1668                            Ok(false)
1669                        },
1670                    )
1671                    .await?;
1672
1673                let nn = new_node.lock().await.take();
1674                let arc_node = Arc::new(nn.unwrap());
1675                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1676                pnode.insert_child(name, arc_node).await;
1677            }
1678        }
1679
1680        Ok(())
1681    }
1682
1683    async fn do_create(
1684        &self,
1685        ctx: Request,
1686        parent_node: &Arc<OverlayInode>,
1687        name: &OsStr,
1688        mode: u32,
1689        flags: u32,
1690    ) -> Result<Option<u64>> {
1691        let name_str = name.to_str().unwrap();
1692        let upper = self
1693            .upper_layer
1694            .as_ref()
1695            .cloned()
1696            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
1697
1698        // Parent node was deleted.
1699        if parent_node.whiteout.load(Ordering::Relaxed) {
1700            return Err(Error::from_raw_os_error(libc::ENOENT));
1701        }
1702
1703        let handle: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1704        let real_ino: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1705        let new_ovi = match self
1706            .lookup_node_ignore_enoent(ctx, parent_node.inode, name_str)
1707            .await?
1708        {
1709            Some(n) => {
1710                // Node with same name exists, let's check if it's whiteout.
1711                if !n.whiteout.load(Ordering::Relaxed) {
1712                    return Err(Error::from_raw_os_error(libc::EEXIST));
1713                }
1714
1715                // Copy parent node up if necessary.
1716                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1717                pnode
1718                    .handle_upper_inode_locked(
1719                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1720                            let parent_real_inode = match parent_real_inode {
1721                                Some(inode) => inode,
1722                                None => {
1723                                    error!("BUG: parent doesn't have upper inode after copied up");
1724                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1725                                }
1726                            };
1727
1728                            if n.in_upper_layer().await {
1729                                let _ = parent_real_inode
1730                                    .layer
1731                                    .delete_whiteout(ctx, parent_real_inode.inode, name)
1732                                    .await;
1733                            }
1734
1735                            let (child_ri, hd) =
1736                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1737                            real_ino.lock().await.replace(child_ri.inode);
1738                            handle.lock().await.replace(hd.unwrap());
1739
1740                            // Replace existing real inodes with new one.
1741                            n.add_upper_inode(child_ri, true).await;
1742                            Ok(false)
1743                        },
1744                    )
1745                    .await?;
1746                n.clone()
1747            }
1748            None => {
1749                // Copy parent node up if necessary.
1750                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1751                let new_node = Arc::new(Mutex::new(None));
1752                let path = format!("{}/{}", pnode.path.read().await, name_str);
1753                pnode
1754                    .handle_upper_inode_locked(
1755                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1756                            let parent_real_inode = match parent_real_inode {
1757                                Some(inode) => inode,
1758                                None => {
1759                                    error!("BUG: parent doesn't have upper inode after copied up");
1760                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1761                                }
1762                            };
1763
1764                            let (child_ri, hd) =
1765                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1766                            real_ino.lock().await.replace(child_ri.inode);
1767                            handle.lock().await.replace(hd.unwrap());
1768                            // Allocate inode number.
1769                            let ino = self.alloc_inode(&path).await?;
1770                            let ovi = OverlayInode::new_from_real_inode(
1771                                name_str,
1772                                ino,
1773                                path.clone(),
1774                                child_ri,
1775                            )
1776                            .await;
1777
1778                            new_node.lock().await.replace(ovi);
1779                            Ok(false)
1780                        },
1781                    )
1782                    .await?;
1783
1784                // new_node is always 'Some'
1785                let nn = new_node.lock().await.take();
1786                let arc_node = Arc::new(nn.unwrap());
1787                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1788                pnode.insert_child(name_str, arc_node.clone()).await;
1789                arc_node
1790            }
1791        };
1792
1793        let final_handle = match *handle.lock().await {
1794            Some(hd) => {
1795                if self.no_open.load(Ordering::Relaxed) {
1796                    None
1797                } else {
1798                    let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1799                    let handle_data = HandleData {
1800                        node: new_ovi,
1801                        real_handle: Some(RealHandle {
1802                            layer: upper.clone(),
1803                            in_upper_layer: true,
1804                            inode: real_ino.lock().await.unwrap(),
1805                            handle: AtomicU64::new(hd),
1806                        }),
1807                        dir_snapshot: Mutex::new(None),
1808                    };
1809                    self.handles
1810                        .lock()
1811                        .await
1812                        .insert(handle, Arc::new(handle_data));
1813                    Some(handle)
1814                }
1815            }
1816            None => None,
1817        };
1818        Ok(final_handle)
1819    }
1820
1821    async fn do_rename(
1822        &self,
1823        req: Request,
1824        parent: Inode,
1825        name: &OsStr,
1826        new_parent: Inode,
1827        new_name: &OsStr,
1828    ) -> Result<()> {
1829        let name_str = name.to_str().unwrap();
1830        let new_name_str = new_name.to_str().unwrap();
1831
1832        let parent_node = self.lookup_node(req, parent, "").await?;
1833        let new_parent_node = self.lookup_node(req, new_parent, "").await?;
1834        let src_node = self.lookup_node(req, parent, name_str).await?;
1835        let dest_node_opt = self
1836            .lookup_node_ignore_enoent(req, new_parent, new_name_str)
1837            .await?;
1838        // trace!("parent_node: {}, new_parent_node: {}, src_node: {}, dest_node_opt: {:?}", parent_node.inode, new_parent_node.inode, src_node.inode, dest_node_opt.as_ref().map(|n| n.inode));
1839
1840        if let Some(dest_node) = &dest_node_opt {
1841            let src_is_dir = src_node.is_dir(req).await?;
1842            let dest_is_dir = dest_node.is_dir(req).await?;
1843            if src_is_dir != dest_is_dir {
1844                return Err(Error::from_raw_os_error(libc::EISDIR));
1845            }
1846            if dest_is_dir {
1847                self.copy_directory_up(req, dest_node.clone()).await?;
1848                let (count, _) = dest_node.count_entries_and_whiteout(req).await?;
1849                if count > 0 {
1850                    return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
1851                }
1852            }
1853        }
1854
1855        let pnode = self.copy_node_up(req, parent_node).await?;
1856        let new_pnode = self.copy_node_up(req, new_parent_node).await?;
1857        let s_node = self.copy_node_up(req, src_node).await?;
1858
1859        let need_whiteout = !s_node.upper_layer_only().await;
1860
1861        let (p_layer, _, p_inode) = pnode.first_layer_inode().await;
1862        let (new_p_layer, _, new_p_inode) = new_pnode.first_layer_inode().await;
1863        assert!(Arc::ptr_eq(&p_layer, &new_p_layer));
1864
1865        p_layer
1866            .rename(req, p_inode, name, new_p_inode, new_name)
1867            .await?;
1868
1869        // Handle the replaced destination node (if any).
1870        if let Some(dest_node) = dest_node_opt {
1871            let path = dest_node.path.read().await.clone();
1872            self.remove_inode(dest_node.inode, Some(path)).await;
1873        }
1874
1875        // Update the moved source node's state.
1876
1877        // Remove from old parent.
1878        pnode.remove_child(name_str).await;
1879        self.remove_inode(s_node.inode, s_node.path.read().await.clone().into())
1880            .await;
1881        let new_path = format!("{}/{}", new_pnode.path.read().await, new_name_str);
1882        *s_node.path.write().await = new_path;
1883        *s_node.name.write().await = new_name_str.to_string();
1884        *s_node.parent.lock().await = Arc::downgrade(&new_pnode);
1885        new_pnode.insert_child(new_name_str, s_node.clone()).await;
1886        self.insert_inode(s_node.inode, s_node).await;
1887
1888        // Create whiteout at the old location if necessary.
1889        if need_whiteout {
1890            p_layer.create_whiteout(req, p_inode, name).await?;
1891        }
1892
1893        Ok(())
1894    }
1895
1896    async fn do_link(
1897        &self,
1898        ctx: Request,
1899        src_node: &Arc<OverlayInode>,
1900        new_parent: &Arc<OverlayInode>,
1901        name: &str,
1902    ) -> Result<()> {
1903        if self.upper_layer.is_none() {
1904            return Err(Error::from_raw_os_error(libc::EROFS));
1905        }
1906
1907        // Node is whiteout.
1908        if src_node.whiteout.load(Ordering::Relaxed) || new_parent.whiteout.load(Ordering::Relaxed)
1909        {
1910            return Err(Error::from_raw_os_error(libc::ENOENT));
1911        }
1912
1913        let st = src_node.stat64(ctx).await?;
1914        if utils::is_dir(&st.attr.kind) {
1915            // Directory can't be hardlinked.
1916            return Err(Error::from_raw_os_error(libc::EPERM));
1917        }
1918
1919        let src_node = self.copy_node_up(ctx, Arc::clone(src_node)).await?;
1920        let new_parent = self.copy_node_up(ctx, Arc::clone(new_parent)).await?;
1921        let src_ino = src_node.first_layer_inode().await.2;
1922
1923        if let Some(existing_node) = self
1924            .lookup_node_ignore_enoent(ctx, new_parent.inode, name)
1925            .await?
1926        {
1927            // If it's not a whiteout, it's an error
1928            if !existing_node.whiteout.load(Ordering::Relaxed) {
1929                return Err(Error::from_raw_os_error(libc::EEXIST));
1930            }
1931            // If it is a whiteout, we will overwrite it.
1932            // First, remove the physical whiteout file in the upper layer.
1933            new_parent
1934                .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1935                    let parent_ri = parent_real_inode.ok_or_else(|| {
1936                        error!("BUG: parent doesn't have upper inode after copied up");
1937                        Error::from_raw_os_error(libc::EINVAL)
1938                    })?;
1939                    // Only delete if the whiteout is in the upper layer
1940                    if existing_node.in_upper_layer().await {
1941                        let _ = parent_ri
1942                            .layer
1943                            .delete_whiteout(ctx, parent_ri.inode, OsStr::new(name))
1944                            .await;
1945                    }
1946                    Ok(false)
1947                })
1948                .await?;
1949        }
1950
1951        new_parent
1952            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1953                let parent_real_inode = match parent_real_inode {
1954                    Some(inode) => inode,
1955                    None => {
1956                        error!("BUG: parent doesn't have upper inode after copied up");
1957                        return Err(Error::from_raw_os_error(libc::EINVAL));
1958                    }
1959                };
1960
1961                parent_real_inode.link(ctx, src_ino, name).await?;
1962
1963                Ok(false)
1964            })
1965            .await?;
1966
1967        self.insert_inode(src_node.inode, src_node.clone()).await;
1968        new_parent.insert_child(name, src_node).await;
1969
1970        Ok(())
1971    }
1972
1973    async fn do_symlink(
1974        &self,
1975        ctx: Request,
1976        linkname: &str,
1977        parent_node: &Arc<OverlayInode>,
1978        name: &str,
1979    ) -> Result<()> {
1980        let name_os = OsStr::new(name);
1981        if self.upper_layer.is_none() {
1982            return Err(Error::from_raw_os_error(libc::EROFS));
1983        }
1984
1985        // parent was deleted.
1986        if parent_node.whiteout.load(Ordering::Relaxed) {
1987            return Err(Error::from_raw_os_error(libc::ENOENT));
1988        }
1989
1990        match self
1991            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1992            .await?
1993        {
1994            Some(n) => {
1995                // Node with same name exists, let's check if it's whiteout.
1996                if !n.whiteout.load(Ordering::Relaxed) {
1997                    return Err(Error::from_raw_os_error(libc::EEXIST));
1998                }
1999
2000                // Copy parent node up if necessary.
2001                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
2002                pnode
2003                    .handle_upper_inode_locked(
2004                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
2005                            let parent_real_inode = match parent_real_inode {
2006                                Some(inode) => inode,
2007                                None => {
2008                                    error!("BUG: parent doesn't have upper inode after copied up");
2009                                    return Err(Error::from_raw_os_error(libc::EINVAL));
2010                                }
2011                            };
2012
2013                            if n.in_upper_layer().await {
2014                                let _ = parent_real_inode
2015                                    .layer
2016                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
2017                                    .await;
2018                            }
2019
2020                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
2021
2022                            // Replace existing real inodes with new one.
2023                            n.add_upper_inode(child_ri, true).await;
2024                            Ok(false)
2025                        },
2026                    )
2027                    .await?;
2028            }
2029            None => {
2030                // Copy parent node up if necessary.
2031                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
2032                let new_node: Arc<Mutex<Option<OverlayInode>>> = Arc::new(Mutex::new(None));
2033                let path = format!("{}/{}", pnode.path.read().await, name);
2034                pnode
2035                    .handle_upper_inode_locked(
2036                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
2037                            let parent_real_inode = match parent_real_inode {
2038                                Some(inode) => inode,
2039                                None => {
2040                                    error!("BUG: parent doesn't have upper inode after copied up");
2041                                    return Err(Error::from_raw_os_error(libc::EINVAL));
2042                                }
2043                            };
2044
2045                            // Allocate inode number.
2046                            let ino = self.alloc_inode(&path).await?;
2047                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
2048                            let ovi = OverlayInode::new_from_real_inode(
2049                                name,
2050                                ino,
2051                                path.clone(),
2052                                child_ri,
2053                            )
2054                            .await;
2055
2056                            new_node.lock().await.replace(ovi);
2057                            Ok(false)
2058                        },
2059                    )
2060                    .await?;
2061
2062                // new_node is always 'Some'
2063                let arc_node = Arc::new(new_node.lock().await.take().unwrap());
2064                self.insert_inode(arc_node.inode, arc_node.clone()).await;
2065                pnode.insert_child(name, arc_node).await;
2066            }
2067        }
2068
2069        Ok(())
2070    }
2071
2072    /// Copies a symbolic link from a lower layer to the upper layer.
2073    ///
2074    /// This function is a part of the copy-up process, triggered when a symlink that
2075    /// only exists in a lower layer is modified. It reads the link target and attributes
2076    /// from the lower layer and creates an identical symlink in the upper layer, crucially
2077    /// preserving the original host UID and GID.
2078    async fn copy_symlink_up(
2079        &self,
2080        ctx: Request,
2081        node: Arc<OverlayInode>,
2082    ) -> Result<Arc<OverlayInode>> {
2083        if node.in_upper_layer().await {
2084            return Ok(node);
2085        }
2086
2087        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2088            Arc::clone(n)
2089        } else {
2090            return Err(Error::other("no parent?"));
2091        };
2092
2093        // To preserve original ownership, we must get the raw, unmapped host attributes.
2094        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2095        // to bypass the ID mapping logic. This is safe and does not affect other
2096        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2097        // both rely on the same underlying `stat` system call; they only differ in
2098        // whether the resulting `uid` and `gid` are mapped.
2099        let (self_layer, _, self_inode) = node.first_layer_inode().await;
2100        let re = self_layer.do_getattr_helper(self_inode, None).await?;
2101        let st = ReplyAttr {
2102            ttl: re.1,
2103            attr: convert_stat64_to_file_attr(re.0),
2104        };
2105
2106        if !parent_node.in_upper_layer().await {
2107            parent_node.clone().create_upper_dir(ctx, None).await?;
2108        }
2109
2110        // Read the linkname from lower layer.
2111        let reply_data = self_layer.readlink(ctx, self_inode).await?;
2112        // Convert path to &str.
2113        let path = std::str::from_utf8(&reply_data.data)
2114            .map_err(|_| Error::from_raw_os_error(libc::EINVAL))?;
2115
2116        let new_upper_real: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
2117        parent_node
2118            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2119                // We already create upper dir for parent_node above.
2120                let parent_real_inode =
2121                    parent_upper_inode.ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2122                // We manually unfold the `symlink` logic here instead of calling the `symlink` method directly.
2123                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2124                if !parent_real_inode.in_upper_layer {
2125                    return Err(Error::from_raw_os_error(libc::EROFS));
2126                }
2127                let link_name = OsStr::new(path);
2128                let filename = node.name.read().await;
2129                let filename = OsStr::new(filename.as_str());
2130                let entry = parent_real_inode
2131                    .layer
2132                    .do_symlink_helper(
2133                        ctx,
2134                        parent_real_inode.inode,
2135                        filename,
2136                        link_name,
2137                        st.attr.uid,
2138                        st.attr.gid,
2139                    )
2140                    .await?;
2141                let ri = RealInode {
2142                    layer: parent_real_inode.layer.clone(),
2143                    in_upper_layer: true,
2144                    inode: entry.attr.ino,
2145                    whiteout: false,
2146                    opaque: false,
2147                    stat: Some(ReplyAttr {
2148                        ttl: entry.ttl,
2149                        attr: entry.attr,
2150                    }),
2151                };
2152                new_upper_real.lock().await.replace(ri);
2153                Ok(false)
2154            })
2155            .await?;
2156
2157        if let Some(real_inode) = new_upper_real.lock().await.take() {
2158            // update upper_inode and first_inode()
2159            node.add_upper_inode(real_inode, true).await;
2160        }
2161
2162        Ok(node)
2163    }
2164
2165    /// Copies a regular file and its contents from a lower layer to the upper layer.
2166    ///
2167    /// This function is a core part of the copy-up process, triggered when a regular file
2168    /// that only exists in a lower layer is written to. It creates an empty file in the
2169    /// upper layer with the original file's attributes (mode, UID, GID), and then copies
2170    /// the entire content from the lower layer file to the new upper layer file.
2171    async fn copy_regfile_up(
2172        &self,
2173        ctx: Request,
2174        node: Arc<OverlayInode>,
2175    ) -> Result<Arc<OverlayInode>> {
2176        if node.in_upper_layer().await {
2177            return Ok(node);
2178        }
2179
2180        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2181            Arc::clone(n)
2182        } else {
2183            return Err(Error::other("no parent?"));
2184        };
2185
2186        // To preserve original ownership, we must get the raw, unmapped host attributes.
2187        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2188        // to bypass the ID mapping logic. This is safe and does not affect other
2189        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2190        // both rely on the same underlying `stat` system call; they only differ in
2191        // whether the resulting `uid` and `gid` are mapped.
2192        let (lower_layer, _, lower_inode) = node.first_layer_inode().await;
2193        let re = lower_layer.do_getattr_helper(lower_inode, None).await?;
2194        let st = ReplyAttr {
2195            ttl: re.1,
2196            attr: convert_stat64_to_file_attr(re.0),
2197        };
2198        trace!(
2199            "copy_regfile_up: node {} in lower layer's inode {}",
2200            node.inode, lower_inode
2201        );
2202
2203        if !parent_node.in_upper_layer().await {
2204            parent_node.clone().create_upper_dir(ctx, None).await?;
2205        }
2206
2207        // create the file in upper layer using information from lower layer
2208
2209        let flags = libc::O_WRONLY;
2210        let mode = mode_from_kind_and_perm(st.attr.kind, st.attr.perm);
2211
2212        let upper_handle = Arc::new(Mutex::new(0));
2213        let upper_real_inode = Arc::new(Mutex::new(None));
2214        parent_node
2215            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2216                // We already create upper dir for parent_node.
2217                let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2218                    error!("parent {} has no upper inode", parent_node.inode);
2219                    Error::from_raw_os_error(libc::EINVAL)
2220                })?;
2221                // We manually unfold the `create` logic here instead of calling the `create` method directly.
2222                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2223                if !parent_real_inode.in_upper_layer {
2224                    return Err(Error::from_raw_os_error(libc::EROFS));
2225                }
2226                let name = node.name.read().await;
2227                let name = OsStr::new(name.as_str());
2228                let create_rep = parent_real_inode
2229                    .layer
2230                    .do_create_helper(
2231                        ctx,
2232                        parent_real_inode.inode,
2233                        name,
2234                        mode,
2235                        flags.try_into().unwrap(),
2236                        st.attr.uid,
2237                        st.attr.gid,
2238                    )
2239                    .await?;
2240
2241                let (inode, h) = (
2242                    RealInode {
2243                        layer: parent_real_inode.layer.clone(),
2244                        in_upper_layer: true,
2245                        inode: create_rep.attr.ino,
2246                        whiteout: false,
2247                        opaque: false,
2248                        stat: Some(ReplyAttr {
2249                            ttl: create_rep.ttl,
2250                            attr: create_rep.attr,
2251                        }),
2252                    },
2253                    Some(create_rep.fh),
2254                );
2255                trace!(
2256                    "copy_regfile_up: created upper file {name:?} with inode {}",
2257                    inode.inode
2258                );
2259                *upper_handle.lock().await = h.unwrap_or(0);
2260                upper_real_inode.lock().await.replace(inode);
2261                Ok(false)
2262            })
2263            .await?;
2264
2265        let rep = lower_layer
2266            .open(ctx, lower_inode, libc::O_RDONLY as u32)
2267            .await?;
2268
2269        let lower_handle = rep.fh;
2270
2271        // need to use work directory and then rename file to
2272        // final destination for atomic reasons.. not deal with it for now,
2273        // use stupid copy at present.
2274        // FIXME: this need a lot of work here, ntimes, xattr, etc.
2275
2276        // Copy from lower real inode to upper real inode.
2277        // TODO: use sendfile here.
2278
2279        let u_handle = *upper_handle.lock().await;
2280        let ri = upper_real_inode.lock().await.take();
2281        if let Some(ri) = ri {
2282            let mut offset: usize = 0;
2283            let size = 4 * 1024 * 1024;
2284
2285            loop {
2286                let ret = lower_layer
2287                    .read(ctx, lower_inode, lower_handle, offset as u64, size)
2288                    .await?;
2289
2290                let len = ret.data.len();
2291                if len == 0 {
2292                    break;
2293                }
2294
2295                let ret = ri
2296                    .layer
2297                    .write(ctx, ri.inode, u_handle, offset as u64, &ret.data, 0, 0)
2298                    .await?;
2299
2300                assert_eq!(ret.written as usize, len);
2301                offset += ret.written as usize;
2302            }
2303
2304            if let Err(e) = ri.layer.release(ctx, ri.inode, u_handle, 0, 0, true).await {
2305                let e: std::io::Error = e.into();
2306                // Ignore ENOSYS.
2307                if e.raw_os_error() != Some(libc::ENOSYS) {
2308                    return Err(e);
2309                }
2310            }
2311            node.add_upper_inode(ri, true).await;
2312        } else {
2313            error!("BUG: upper real inode is None after copy up");
2314        }
2315
2316        lower_layer
2317            .release(ctx, lower_inode, lower_handle, 0, 0, true)
2318            .await?;
2319
2320        Ok(Arc::clone(&node))
2321    }
2322
2323    /// Copies the specified node to the upper layer of the filesystem
2324    ///
2325    /// Performs different operations based on the node type:
2326    /// - **Directory**: Creates a corresponding directory in the upper layer
2327    /// - **Symbolic link**: Recursively copies to the upper layer
2328    /// - **Regular file**: Copies file content to the upper layer
2329    ///
2330    /// # Parameters
2331    /// * `ctx`: FUSE request context
2332    /// * `node`: Reference to the node to be copied
2333    ///
2334    /// # Returns
2335    /// Returns a reference to the upper-layer node on success, or an error on failure
2336    async fn copy_node_up(
2337        &self,
2338        ctx: Request,
2339        node: Arc<OverlayInode>,
2340    ) -> Result<Arc<OverlayInode>> {
2341        if node.in_upper_layer().await {
2342            return Ok(node);
2343        }
2344
2345        let st = node.stat64(ctx).await?;
2346        match st.attr.kind {
2347            FileType::Directory => {
2348                node.clone().create_upper_dir(ctx, None).await?;
2349                Ok(node)
2350            }
2351            FileType::Symlink => {
2352                // For symlink.
2353                self.copy_symlink_up(ctx, node).await
2354            }
2355            FileType::RegularFile => {
2356                // For regular file.
2357                self.copy_regfile_up(ctx, node).await
2358            }
2359            _ => {
2360                // For other file types. return error.
2361                Err(Error::from_raw_os_error(libc::EINVAL))
2362            }
2363        }
2364    }
2365
2366    /// recursively copy directory and all its contents to upper layer
2367    async fn copy_directory_up(
2368        &self,
2369        ctx: Request,
2370        node: Arc<OverlayInode>,
2371    ) -> Result<Arc<OverlayInode>> {
2372        // Ensure the directory itself is copied up first
2373        self.copy_node_up(ctx, node.clone()).await?;
2374
2375        // load directory to cache
2376        self.load_directory(ctx, &node).await?;
2377
2378        // go through all children
2379        let children = node.childrens.lock().await.clone();
2380        for (_name, child) in children.iter() {
2381            if _name == "." || _name == ".." {
2382                continue;
2383            }
2384            // jump over whiteout
2385            if child.whiteout.load(Ordering::Relaxed) {
2386                continue;
2387            }
2388            let st = child.stat64(ctx).await?;
2389            if !child.in_upper_layer().await {
2390                match st.attr.kind {
2391                    FileType::Directory => {
2392                        // recursively copy subdirectory
2393                        Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2394                    }
2395                    FileType::Symlink | FileType::RegularFile => {
2396                        // copy node up symlink or regular file
2397                        Box::pin(self.copy_node_up(ctx, child.clone())).await?;
2398                    }
2399                    _ => {
2400                        // other file types are ignored
2401                    }
2402                }
2403            } else if utils::is_dir(&st.attr.kind) {
2404                // If it is already in the upper layer, but the directory is not loaded,
2405                // ensure that its contents are also copied up recursively.
2406                Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2407            }
2408        }
2409
2410        Ok(node)
2411    }
2412
2413    async fn do_rm(&self, ctx: Request, parent: u64, name: &OsStr, dir: bool) -> Result<()> {
2414        // 1. Read-only mount guard
2415        if self.upper_layer.is_none() {
2416            return Err(Error::from_raw_os_error(libc::EROFS));
2417        }
2418
2419        // 2. Locate the parent Overlay Inode.
2420        // Find parent Overlay Inode.
2421        let pnode = self.lookup_node(ctx, parent, "").await?;
2422        if pnode.whiteout.load(Ordering::Relaxed) {
2423            return Err(Error::from_raw_os_error(libc::ENOENT));
2424        }
2425        let to_name = name.to_str().unwrap();
2426
2427        // 3. Locate the child Overlay Inode for the given name
2428        // Find the Overlay Inode for child with <name>.
2429        let node = self.lookup_node(ctx, parent, to_name).await?;
2430        if node.whiteout.load(Ordering::Relaxed) {
2431            // already deleted.
2432            return Err(Error::from_raw_os_error(libc::ENOENT));
2433        }
2434
2435        // 4. If removing a directory, ensure it is empty of real entries
2436        if dir {
2437            self.load_directory(ctx, &node).await?;
2438            let (count, whiteouts) = node.count_entries_and_whiteout(ctx).await?;
2439            trace!("entries: {count}, whiteouts: {whiteouts}\n");
2440            if count > 0 {
2441                return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
2442            }
2443
2444            // Delete all whiteouts.
2445            if whiteouts > 0 && node.in_upper_layer().await {
2446                self.empty_node_directory(ctx, Arc::clone(&node)).await?;
2447            }
2448
2449            trace!("whiteouts deleted!\n");
2450        }
2451
2452        // 5. Decide whether we need to create a whiteout entry
2453        // We'll filp this off if upper-layer unlink suffices or parent is opaque
2454        let need_whiteout = AtomicBool::new(true);
2455        let pnode = self.copy_node_up(ctx, Arc::clone(&pnode)).await?;
2456
2457        if node.upper_layer_only().await {
2458            need_whiteout.store(false, Ordering::Relaxed);
2459        }
2460
2461        let mut df = |parent_upper_inode: Option<Arc<RealInode>>| async {
2462            let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2463                error!(
2464                    "BUG: parent {} has no upper inode after copy up",
2465                    pnode.inode
2466                );
2467                Error::from_raw_os_error(libc::EINVAL)
2468            })?;
2469
2470            // Parent is opaque, it shadows everything in lower layers so no need to create extra whiteouts.
2471            if parent_real_inode.opaque {
2472                need_whiteout.store(false, Ordering::Relaxed);
2473            }
2474            if dir {
2475                parent_real_inode
2476                    .layer
2477                    .rmdir(ctx, parent_real_inode.inode, name)
2478                    .await?;
2479            } else {
2480                parent_real_inode
2481                    .layer
2482                    .unlink(ctx, parent_real_inode.inode, name)
2483                    .await?;
2484            }
2485
2486            Ok(false)
2487        };
2488
2489        // 6. Perform the unlink/rmdir operation and memory cleanup
2490        if node.in_upper_layer().await {
2491            pnode.handle_upper_inode_locked(&mut df).await?;
2492        }
2493        pnode.remove_child(name.to_str().unwrap()).await;
2494        let path = node.path.read().await.clone();
2495        self.remove_inode(node.inode, Some(path)).await;
2496
2497        // 7. If needed, create a entry in the upper layer to mask lower-layer files
2498        if need_whiteout.load(Ordering::Relaxed) {
2499            trace!("do_rm: creating whiteout\n");
2500            // pnode is copied up, so it has upper layer.
2501            pnode
2502                .handle_upper_inode_locked(
2503                    &mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2504                        let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2505                            error!(
2506                                "BUG: parent {} has no upper inode after copy up",
2507                                pnode.inode
2508                            );
2509                            Error::from_raw_os_error(libc::EINVAL)
2510                        })?;
2511
2512                        let child_ri = parent_real_inode.create_whiteout(ctx, to_name).await?; //FIXME..............
2513                        let path = format!("{}/{}", pnode.path.read().await, to_name);
2514                        let ino: u64 = self.alloc_inode(&path).await?;
2515                        let ovi = Arc::new(
2516                            OverlayInode::new_from_real_inode(to_name, ino, path.clone(), child_ri)
2517                                .await,
2518                        );
2519
2520                        self.insert_inode(ino, ovi.clone()).await;
2521                        pnode.insert_child(to_name, ovi.clone()).await;
2522                        Ok(false)
2523                    },
2524                )
2525                .await?;
2526        }
2527
2528        Ok(())
2529    }
2530
2531    async fn do_fsync(
2532        &self,
2533        ctx: Request,
2534        inode: Inode,
2535        datasync: bool,
2536        handle: Handle,
2537        syncdir: bool,
2538    ) -> Result<()> {
2539        // Use O_RDONLY flags which indicates no copy up.
2540        let data = self
2541            .get_data(ctx, Some(handle), inode, libc::O_RDONLY as u32)
2542            .await?;
2543
2544        trace!("do_fsync: got data for handle: {handle}, inode:{inode}");
2545
2546        match data.real_handle {
2547            // FIXME: need to test if inode matches corresponding handle?
2548            None => {
2549                trace!("do_fsync: no real handle found for handle: {handle}, inode:{inode}");
2550                Err(Error::from_raw_os_error(libc::ENOENT))
2551            }
2552            Some(ref rh) => {
2553                let real_handle = rh.handle.load(Ordering::Relaxed);
2554                // TODO: check if it's in upper layer? @weizhang555
2555                if syncdir {
2556                    trace!(
2557                        "do_fsync: layer.fsyncdir called for handle: {}, inode:{}; rh.inode: {}, real_handle: {}",
2558                        handle, inode, rh.inode, real_handle
2559                    );
2560                    rh.layer
2561                        .fsyncdir(ctx, rh.inode, real_handle, datasync)
2562                        .await
2563                        .map_err(|e| e.into())
2564                } else {
2565                    rh.layer
2566                        .fsync(ctx, rh.inode, real_handle, datasync)
2567                        .await
2568                        .map_err(|e| e.into())
2569                }
2570            }
2571        }
2572    }
2573
2574    // Delete everything in the directory only on upper layer, ignore lower layers.
2575    async fn empty_node_directory(&self, ctx: Request, node: Arc<OverlayInode>) -> Result<()> {
2576        let st = node.stat64(ctx).await?;
2577        if !utils::is_dir(&st.attr.kind) {
2578            // This function can only be called on directories.
2579            return Err(Error::from_raw_os_error(libc::ENOTDIR));
2580        }
2581
2582        let (layer, in_upper, inode) = node.first_layer_inode().await;
2583        if !in_upper {
2584            return Ok(());
2585        }
2586
2587        // Copy node.childrens Hashmap to Vector, the Vector is also used as temp storage,
2588        // Without this, Rust won't allow us to remove them from node.childrens.
2589        let iter = node
2590            .childrens
2591            .lock()
2592            .await
2593            .values()
2594            .cloned()
2595            .collect::<Vec<_>>();
2596
2597        for child in iter {
2598            // We only care about upper layer, ignore lower layers.
2599            if child.in_upper_layer().await {
2600                let child_name = child.name.read().await.clone();
2601                let child_name_os = OsStr::new(&child_name);
2602                if child.whiteout.load(Ordering::Relaxed) {
2603                    layer.delete_whiteout(ctx, inode, child_name_os).await?
2604                } else {
2605                    let s = child.stat64(ctx).await?;
2606                    let cname: &OsStr = OsStr::new(&child_name_os);
2607                    if utils::is_dir(&s.attr.kind) {
2608                        let (count, whiteouts) = child.count_entries_and_whiteout(ctx).await?;
2609                        if count + whiteouts > 0 {
2610                            let cb = child.clone();
2611                            Box::pin(async move { self.empty_node_directory(ctx, cb).await })
2612                                .await?;
2613                        }
2614                        layer.rmdir(ctx, inode, cname).await?
2615                    } else {
2616                        layer.unlink(ctx, inode, cname).await?;
2617                    }
2618                }
2619
2620                let cpath = child.path.read().await.clone();
2621                // delete the child
2622                self.remove_inode(child.inode, Some(cpath)).await;
2623                node.remove_child(&child_name).await;
2624            }
2625        }
2626
2627        Ok(())
2628    }
2629
2630    async fn find_real_info_from_handle(
2631        &self,
2632        handle: Handle,
2633    ) -> Result<(Arc<BoxedLayer>, Inode, Handle)> {
2634        match self.handles.lock().await.get(&handle) {
2635            Some(h) => match h.real_handle {
2636                Some(ref rhd) => {
2637                    trace!(
2638                        "find_real_info_from_handle: layer in upper: {}",
2639                        rhd.in_upper_layer
2640                    );
2641                    Ok((
2642                        rhd.layer.clone(),
2643                        rhd.inode,
2644                        rhd.handle.load(Ordering::Relaxed),
2645                    ))
2646                }
2647                None => Err(Error::from_raw_os_error(libc::ENOENT)),
2648            },
2649
2650            None => Err(Error::from_raw_os_error(libc::ENOENT)),
2651        }
2652    }
2653
2654    async fn find_real_inode(&self, inode: Inode) -> Result<(Arc<BoxedLayer>, Inode)> {
2655        if let Some(n) = self.get_active_inode(inode).await {
2656            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2657            return Ok((first_layer, first_inode));
2658        } else if let Some(n) = self.get_all_inode(inode).await {
2659            trace!("find_real_inode: found inode by get_all_inode: {}", n.inode);
2660            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2661            return Ok((first_layer, first_inode));
2662        }
2663
2664        Err(Error::from_raw_os_error(libc::ENOENT))
2665    }
2666
2667    async fn get_data(
2668        &self,
2669        ctx: Request,
2670        handle: Option<Handle>,
2671        inode: Inode,
2672        flags: u32,
2673    ) -> Result<Arc<HandleData>> {
2674        let no_open = self.no_open.load(Ordering::Relaxed);
2675        if !no_open {
2676            if let Some(h) = handle
2677                && let Some(v) = self.handles.lock().await.get(&h)
2678                && v.node.inode == inode
2679            {
2680                // trace!("get_data: found handle");
2681                return Ok(Arc::clone(v));
2682            }
2683        } else {
2684            let readonly: bool = flags
2685                & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
2686                    as u32
2687                == 0;
2688
2689            // lookup node
2690            let node = self.lookup_node(ctx, inode, "").await?;
2691
2692            // whiteout node
2693            if node.whiteout.load(Ordering::Relaxed) {
2694                return Err(Error::from_raw_os_error(libc::ENOENT));
2695            }
2696
2697            if !readonly {
2698                // Check if upper layer exists, return EROFS is not exists.
2699                self.upper_layer
2700                    .as_ref()
2701                    .cloned()
2702                    .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2703                // copy up to upper layer
2704                self.copy_node_up(ctx, Arc::clone(&node)).await?;
2705            }
2706
2707            let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
2708            let handle_data = HandleData {
2709                node: Arc::clone(&node),
2710                real_handle: Some(RealHandle {
2711                    layer,
2712                    in_upper_layer,
2713                    inode,
2714                    handle: AtomicU64::new(0),
2715                }),
2716                dir_snapshot: Mutex::new(None),
2717            };
2718            return Ok(Arc::new(handle_data));
2719        }
2720
2721        Err(Error::from_raw_os_error(libc::ENOENT))
2722    }
2723
2724    // extend or init the inodes number to one overlay if the current number is done.
2725    pub async fn extend_inode_alloc(&self, key: u64) {
2726        let next_inode = key * INODE_ALLOC_BATCH;
2727        let limit_inode = next_inode + INODE_ALLOC_BATCH - 1;
2728        self.inodes
2729            .write()
2730            .await
2731            .extend_inode_number(next_inode, limit_inode);
2732    }
2733}
2734
2735/// Wrap the parameters for mounting overlay filesystem.
2736#[derive(Debug, Clone)]
2737pub struct OverlayArgs<P, Q, R, M, N, I>
2738where
2739    P: AsRef<Path>,
2740    Q: AsRef<Path>,
2741    R: AsRef<Path>,
2742    M: AsRef<str>,
2743    N: Into<String>,
2744    I: IntoIterator<Item = R>,
2745{
2746    pub mountpoint: P,
2747    pub upperdir: Q,
2748    pub lowerdir: I,
2749    pub privileged: bool,
2750    pub mapping: Option<M>,
2751    pub name: Option<N>,
2752    pub allow_other: bool,
2753}
2754
2755/// Mounts the filesystem using the given parameters and returns the mount handle.
2756///
2757/// # Parameters
2758/// - `mountpoint`: Path to the mount point.
2759/// - `upperdir`: Path to the upper directory.
2760/// - `lowerdir`: Paths to the lower directories.
2761/// - `privileged`: If true, use privileged mount; otherwise, unprivileged mount.
2762/// - `mapping`: Optional user/group ID mapping for unprivileged mounts.
2763/// - `name`: Optional name for the filesystem.
2764/// - `allow_other`: If true, allows other users to access the filesystem.
2765///
2766/// # Returns
2767/// A mount handle on success.
2768pub async fn mount_fs<P, Q, R, M, N, I>(
2769    args: OverlayArgs<P, Q, R, M, N, I>,
2770) -> rfuse3::raw::MountHandle
2771where
2772    P: AsRef<Path>,
2773    Q: AsRef<Path>,
2774    R: AsRef<Path>,
2775    M: AsRef<str>,
2776    N: Into<String>,
2777    I: IntoIterator<Item = R>,
2778{
2779    // Create lower layers
2780    let mut lower_layers = Vec::new();
2781    for lower in args.lowerdir {
2782        let layer = new_passthroughfs_layer(PassthroughArgs {
2783            root_dir: lower,
2784            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2785        })
2786        .await
2787        .expect("Failed to create lower filesystem layer");
2788        lower_layers.push(Arc::new(layer));
2789    }
2790    // Create upper layer
2791    let upper_layer = Arc::new(
2792        new_passthroughfs_layer(PassthroughArgs {
2793            root_dir: args.upperdir,
2794            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2795        })
2796        .await
2797        .expect("Failed to create upper filesystem layer"),
2798    );
2799
2800    // Configure overlay filesystem
2801    let config = Config {
2802        mountpoint: args.mountpoint.as_ref().to_path_buf(),
2803        do_import: true,
2804        ..Default::default()
2805    };
2806    let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1)
2807        .expect("Failed to initialize OverlayFs");
2808    let logfs = LoggingFileSystem::new(overlayfs);
2809
2810    let mount_path: OsString = OsString::from(args.mountpoint.as_ref().as_os_str());
2811
2812    // Obtain the current user's uid and gid
2813    let uid = unsafe { libc::getuid() };
2814    let gid = unsafe { libc::getgid() };
2815
2816    let mut mount_options = MountOptions::default();
2817    mount_options
2818        .force_readdir_plus(true)
2819        .uid(uid)
2820        .gid(gid)
2821        .allow_other(args.allow_other);
2822    if let Some(name) = args.name {
2823        mount_options.fs_name(name);
2824    }
2825
2826    // Mount filesystem based on privilege flag and return the mount handle
2827    if !args.privileged {
2828        debug!("Mounting with unprivileged mode");
2829        Session::new(mount_options)
2830            .mount_with_unprivileged(logfs, mount_path)
2831            .await
2832            .expect("Unprivileged mount failed")
2833    } else {
2834        debug!("Mounting with privileged mode");
2835        Session::new(mount_options)
2836            .mount(logfs, mount_path)
2837            .await
2838            .expect("Privileged mount failed")
2839    }
2840}