Skip to main content

libfuse_fs/overlayfs/
mod.rs

1// Copyright (C) 2023 Ant Group. All rights reserved.
2//  2024 From [fuse_backend_rs](https://github.com/cloud-hypervisor/fuse-backend-rs)
3// SPDX-License-Identifier: Apache-2.0
4
5#![allow(missing_docs)]
6mod async_io;
7pub mod config;
8mod inode_store;
9mod layer;
10mod utils;
11
12//mod tempfile;
13use core::panic;
14use std::collections::HashMap;
15use std::ffi::{OsStr, OsString};
16use std::future::Future;
17use std::io::{Error, Result};
18use std::path::Path;
19
20use config::Config;
21use futures::StreamExt as _;
22use rfuse3::raw::reply::{
23    DirectoryEntry, DirectoryEntryPlus, ReplyAttr, ReplyEntry, ReplyOpen, ReplyStatFs,
24};
25use rfuse3::raw::{Filesystem, Request, Session};
26use std::sync::{Arc, Weak};
27use tracing::debug;
28use tracing::error;
29use tracing::info;
30use tracing::trace;
31
32use rfuse3::{Errno, FileType, MountOptions, mode_from_kind_and_perm};
33const SLASH_ASCII: char = '/';
34use futures::future::join_all;
35use futures::stream::iter;
36
37use crate::passthrough::{PassthroughArgs, PassthroughFs, new_passthroughfs_layer};
38use crate::util::convert_stat64_to_file_attr;
39use inode_store::InodeStore;
40use layer::Layer;
41use rfuse3::raw::logfs::LoggingFileSystem;
42use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
43
44use tokio::sync::{Mutex, RwLock};
45
46pub type Inode = u64;
47pub type Handle = u64;
48
49type BoxedLayer = PassthroughFs;
50//type BoxedFileSystem = Box<dyn FileSystem<Inode = Inode, Handle = Handle> + Send + Sync>;
51const INODE_ALLOC_BATCH: u64 = 0x1_0000_0000;
52// RealInode represents one inode object in specific layer.
53// Also, each RealInode maps to one Entry, which should be 'forgotten' after drop.
54// Important note: do not impl Clone trait for it or refcount will be messed up.
55pub(crate) struct RealInode {
56    pub layer: Arc<PassthroughFs>,
57    pub in_upper_layer: bool,
58    pub inode: u64,
59    // File is whiteouted, we need to hide it.
60    pub whiteout: bool,
61    // Directory is opaque, we need to hide all entries inside it.
62    pub opaque: bool,
63    pub stat: Option<ReplyAttr>,
64}
65
66// OverlayInode must be protected by lock, it can be operated by multiple threads.
67// #[derive(Default)]
68pub(crate) struct OverlayInode {
69    // Inode hash table, map from 'name' to 'OverlayInode'.
70    pub childrens: Mutex<HashMap<String, Arc<OverlayInode>>>,
71    pub parent: Mutex<Weak<OverlayInode>>,
72    // Backend inodes from all layers.
73    pub real_inodes: Mutex<Vec<Arc<RealInode>>>,
74    // Inode number.
75    pub inode: u64,
76    pub path: RwLock<String>,
77    pub name: RwLock<String>,
78    pub lookups: AtomicU64,
79    // Node is whiteout-ed.
80    pub whiteout: AtomicBool,
81    // Directory is loaded.
82    pub loaded: AtomicBool,
83}
84
85#[derive(Default)]
86pub enum CachePolicy {
87    Never,
88    #[default]
89    Auto,
90    Always,
91}
92pub struct OverlayFs {
93    config: Config,
94    lower_layers: Vec<Arc<PassthroughFs>>,
95    upper_layer: Option<Arc<PassthroughFs>>,
96    // All inodes in FS.
97    inodes: RwLock<InodeStore>,
98    // Open file handles.
99    handles: Mutex<HashMap<u64, Arc<HandleData>>>,
100    next_handle: AtomicU64,
101    writeback: AtomicBool,
102    no_open: AtomicBool,
103    no_opendir: AtomicBool,
104    killpriv_v2: AtomicBool,
105    perfile_dax: AtomicBool,
106    root_inodes: u64,
107}
108
109// This is a wrapper of one inode in specific layer, It can't impl Clone trait.
110struct RealHandle {
111    layer: Arc<PassthroughFs>,
112    in_upper_layer: bool,
113    inode: u64,
114    handle: AtomicU64,
115}
116
117struct HandleData {
118    node: Arc<OverlayInode>,
119    //offset: libc::off_t,
120    real_handle: Option<RealHandle>,
121    // Cache the directory entries for stable readdir offsets.
122    // The snapshot contains all necessary info to avoid re-accessing childrens map.
123    dir_snapshot: Mutex<Option<Vec<DirectoryEntryPlus>>>,
124}
125
126// RealInode is a wrapper of one inode in specific layer.
127// All layer operations returning Entry should be wrapped in RealInode implementation
128// so that we can increase the refcount(lookup count) of each inode and decrease it after Drop.
129// Important: do not impl 'Copy' trait for it or refcount will be messed up.
130impl RealInode {
131    async fn new(
132        layer: Arc<PassthroughFs>,
133        in_upper_layer: bool,
134        inode: u64,
135        whiteout: bool,
136        opaque: bool,
137    ) -> Self {
138        let mut ri = RealInode {
139            layer,
140            in_upper_layer,
141            inode,
142            whiteout,
143            opaque,
144            stat: None,
145        };
146        match ri.stat64_ignore_enoent(&Request::default()).await {
147            Ok(v) => {
148                ri.stat = v;
149            }
150            Err(e) => {
151                error!("stat64 failed during RealInode creation: {e}");
152            }
153        }
154        ri
155    }
156
157    async fn stat64(&self, req: &Request) -> Result<ReplyAttr> {
158        let layer = self.layer.as_ref();
159        if self.inode == 0 {
160            return Err(Error::from_raw_os_error(libc::ENOENT));
161        }
162        // trace!("stat64: trying to getattr req: {:?}", req);
163        layer
164            .getattr(*req, self.inode, None, 0)
165            .await
166            .map_err(|e| e.into())
167    }
168
169    async fn stat64_ignore_enoent(&self, req: &Request) -> Result<Option<ReplyAttr>> {
170        match self.stat64(req).await {
171            Ok(v1) => Ok(Some(v1)),
172            Err(e) => match e.raw_os_error() {
173                Some(raw_error) => {
174                    if raw_error == libc::ENOENT
175                        || raw_error == libc::ENAMETOOLONG
176                        || raw_error == libc::ESTALE
177                    {
178                        return Ok(None);
179                    }
180                    Err(e)
181                }
182                None => Err(e),
183            },
184        }
185    }
186
187    // Do real lookup action in specific layer, this call will increase Entry refcount which must be released later.
188    async fn lookup_child_ignore_enoent(
189        &self,
190        ctx: Request,
191        name: &str,
192    ) -> Result<Option<ReplyEntry>> {
193        let cname = OsStr::new(name);
194        // Real inode must have a layer.
195        let layer = self.layer.as_ref();
196        match layer.lookup(ctx, self.inode, cname).await {
197            Ok(v) => {
198                // Negative entry also indicates missing entry.
199                if v.attr.ino == 0 {
200                    return Ok(None);
201                }
202                Ok(Some(v))
203            }
204            Err(e) => {
205                let ioerror: std::io::Error = e.into();
206                if let Some(raw_error) = ioerror.raw_os_error()
207                    && (raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG)
208                {
209                    return Ok(None);
210                }
211
212                Err(e.into())
213            }
214        }
215    }
216
217    // Find child inode in same layer under this directory(Self).
218    // Return None if not found.
219    async fn lookup_child(&self, ctx: Request, name: &str) -> Result<Option<RealInode>> {
220        if self.whiteout {
221            return Ok(None);
222        }
223
224        let layer = self.layer.as_ref();
225
226        // Find child Entry with <name> under directory with inode <self.inode>.
227        match self.lookup_child_ignore_enoent(ctx, name).await? {
228            Some(v) => {
229                // The Entry must be forgotten in each layer, which will be done automatically by Drop operation.
230                let (whiteout, opaque) = if v.attr.kind == FileType::Directory {
231                    (false, layer.is_opaque(ctx, v.attr.ino).await?)
232                } else {
233                    (layer.is_whiteout(ctx, v.attr.ino).await?, false)
234                };
235
236                Ok(Some(RealInode {
237                    layer: self.layer.clone(),
238                    in_upper_layer: self.in_upper_layer,
239                    inode: v.attr.ino,
240                    whiteout,
241                    opaque,
242                    stat: Some(ReplyAttr {
243                        ttl: v.ttl,
244                        attr: v.attr,
245                    }),
246                }))
247            }
248            None => Ok(None),
249        }
250    }
251
252    // Read directory entries from specific RealInode, error out if it's not directory.
253    async fn readdir(&self, ctx: Request) -> Result<HashMap<String, RealInode>> {
254        // Deleted inode should not be read.
255        if self.whiteout {
256            return Err(Error::from_raw_os_error(libc::ENOENT));
257        }
258        // trace!("readdir: before stat");
259        let stat = match self.stat.clone() {
260            Some(v) => v,
261            None => self.stat64(&ctx).await?,
262        };
263
264        // Must be directory.
265        if stat.attr.kind != FileType::Directory {
266            return Err(Error::from_raw_os_error(libc::ENOTDIR));
267        }
268
269        // Open the directory and load each entry.
270        let opendir_res = self
271            .layer
272            .opendir(ctx, self.inode, libc::O_RDONLY as u32)
273            .await;
274        // trace!("readdir: after opendir");
275        let handle = match opendir_res {
276            Ok(handle) => handle,
277
278            // opendir may not be supported if no_opendir is set, so we can ignore this error.
279            Err(e) => {
280                let ioerror: std::io::Error = e.into();
281                match ioerror.raw_os_error() {
282                    Some(raw_error) => {
283                        if raw_error == libc::ENOSYS {
284                            // We can still call readdir with inode if opendir is not supported in this layer.
285                            ReplyOpen { fh: 0, flags: 0 }
286                        } else {
287                            return Err(e.into());
288                        }
289                    }
290                    None => {
291                        return Err(e.into());
292                    }
293                }
294            }
295        };
296
297        let child_names = self.layer.readdir(ctx, self.inode, handle.fh, 0).await?;
298        // Non-zero handle indicates successful 'open', we should 'release' it.
299        if handle.fh > 0 {
300            self.layer
301                .releasedir(ctx, self.inode, handle.fh, handle.flags)
302                .await?
303            //DIFF
304        }
305
306        // Lookup all child and construct "RealInode"s.
307        let child_real_inodes = Arc::new(Mutex::new(HashMap::new()));
308        // trace!("readdir: before iter childrens");
309        let a_map = child_names.entries.map(|entery| async {
310            match entery {
311                Ok(dire) => {
312                    let dname = dire.name.into_string().unwrap();
313                    if dname == "." || dname == ".." {
314                        // Skip . and .. entries.
315                        return Ok(());
316                    }
317                    // trace!("readdir: before lookup child: dname={}", dname);
318                    if let Some(child) = self.lookup_child(ctx, &dname).await? {
319                        child_real_inodes.lock().await.insert(dname, child);
320                    }
321                    Ok(())
322                }
323                Err(err) => Err(err),
324            }
325        });
326        let k = join_all(a_map.collect::<Vec<_>>().await).await;
327        drop(k);
328        // Now into_inner func is safety.
329        let re = Arc::try_unwrap(child_real_inodes)
330            .map_err(|_| Errno::new_not_exist())?
331            .into_inner();
332        // trace!("readdir: return");
333        Ok(re)
334    }
335
336    async fn create_whiteout(&self, ctx: Request, name: &str) -> Result<RealInode> {
337        if !self.in_upper_layer {
338            return Err(Error::from_raw_os_error(libc::EROFS));
339        }
340
341        // from &str to &OsStr
342        let name_osstr = OsStr::new(name);
343        let entry = self
344            .layer
345            .create_whiteout(ctx, self.inode, name_osstr)
346            .await?;
347
348        // Wrap whiteout to RealInode.
349        Ok(RealInode {
350            layer: self.layer.clone(),
351            in_upper_layer: true,
352            inode: entry.attr.ino,
353            whiteout: true,
354            opaque: false,
355            stat: Some(ReplyAttr {
356                ttl: entry.ttl,
357                attr: entry.attr,
358            }),
359        })
360    }
361
362    async fn mkdir(&self, ctx: Request, name: &str, mode: u32, umask: u32) -> Result<RealInode> {
363        if !self.in_upper_layer {
364            return Err(Error::from_raw_os_error(libc::EROFS));
365        }
366
367        let name_osstr = OsStr::new(name);
368        let entry = self
369            .layer
370            .mkdir(ctx, self.inode, name_osstr, mode, umask)
371            .await?;
372
373        // update node's first_layer
374        Ok(RealInode {
375            layer: self.layer.clone(),
376            in_upper_layer: true,
377            inode: entry.attr.ino,
378            whiteout: false,
379            opaque: false,
380            stat: Some(ReplyAttr {
381                ttl: entry.ttl,
382                attr: entry.attr,
383            }),
384        })
385    }
386
387    async fn create(
388        &self,
389        ctx: Request,
390        name: &str,
391        mode: u32,
392        flags: u32,
393    ) -> Result<(RealInode, Option<u64>)> {
394        if !self.in_upper_layer {
395            return Err(Error::from_raw_os_error(libc::EROFS));
396        }
397        let name = OsStr::new(name);
398        let create_rep = self
399            .layer
400            .create(ctx, self.inode, name, mode, flags)
401            .await?;
402
403        Ok((
404            RealInode {
405                layer: self.layer.clone(),
406                in_upper_layer: true,
407                inode: create_rep.attr.ino,
408                whiteout: false,
409                opaque: false,
410                stat: Some(ReplyAttr {
411                    ttl: create_rep.ttl,
412                    attr: create_rep.attr,
413                }),
414            },
415            Some(create_rep.fh),
416        ))
417    }
418
419    async fn mknod(
420        &self,
421        ctx: Request,
422        name: &str,
423        mode: u32,
424        rdev: u32,
425        _umask: u32,
426    ) -> Result<RealInode> {
427        if !self.in_upper_layer {
428            return Err(Error::from_raw_os_error(libc::EROFS));
429        }
430        let name = OsStr::new(name);
431        let rep = self.layer.mknod(ctx, self.inode, name, mode, rdev).await?;
432        Ok(RealInode {
433            layer: self.layer.clone(),
434            in_upper_layer: true,
435            inode: rep.attr.ino,
436            whiteout: false,
437            opaque: false,
438            stat: Some(ReplyAttr {
439                ttl: rep.ttl,
440                attr: rep.attr,
441            }),
442        })
443    }
444
445    async fn link(&self, ctx: Request, ino: u64, name: &str) -> Result<RealInode> {
446        if !self.in_upper_layer {
447            return Err(Error::from_raw_os_error(libc::EROFS));
448        }
449        let name = OsStr::new(name);
450        let entry = self.layer.link(ctx, ino, self.inode, name).await?;
451
452        let opaque = if utils::is_dir(&entry.attr.kind) {
453            self.layer.is_opaque(ctx, entry.attr.ino).await?
454        } else {
455            false
456        };
457        Ok(RealInode {
458            layer: self.layer.clone(),
459            in_upper_layer: true,
460            inode: entry.attr.ino,
461            whiteout: false,
462            opaque,
463            stat: Some(ReplyAttr {
464                ttl: entry.ttl,
465                attr: entry.attr,
466            }),
467        })
468    }
469
470    // Create a symlink in self directory.
471    async fn symlink(&self, ctx: Request, link_name: &str, filename: &str) -> Result<RealInode> {
472        if !self.in_upper_layer {
473            return Err(Error::from_raw_os_error(libc::EROFS));
474        }
475        let link_name = OsStr::new(link_name);
476        let filename = OsStr::new(filename);
477        let entry = self
478            .layer
479            .symlink(ctx, self.inode, filename, link_name)
480            .await?;
481
482        Ok(RealInode {
483            layer: self.layer.clone(),
484            in_upper_layer: true,
485            inode: entry.attr.ino,
486            whiteout: false,
487            opaque: false,
488            stat: Some(ReplyAttr {
489                ttl: entry.ttl,
490                attr: entry.attr,
491            }),
492        })
493    }
494}
495
496impl Drop for RealInode {
497    fn drop(&mut self) {
498        let layer = Arc::clone(&self.layer);
499        let inode = self.inode;
500        tokio::spawn(async move {
501            let ctx = Request::default();
502            layer.forget(ctx, inode, 1).await;
503        });
504    }
505}
506
507impl OverlayInode {
508    pub fn new() -> Self {
509        Self {
510            childrens: Mutex::new(HashMap::new()),
511            parent: Mutex::new(Weak::new()),
512            real_inodes: Mutex::new(vec![]),
513            inode: 0,
514            path: RwLock::new(String::new()),
515            name: RwLock::new(String::new()),
516            lookups: AtomicU64::new(0),
517            whiteout: AtomicBool::new(false),
518            loaded: AtomicBool::new(false),
519        }
520    }
521    // Allocate new OverlayInode based on one RealInode,
522    // inode number is always 0 since only OverlayFs has global unique inode allocator.
523    pub async fn new_from_real_inode(
524        name: &str,
525        ino: u64,
526        path: String,
527        real_inode: RealInode,
528    ) -> Self {
529        let mut new = OverlayInode::new();
530        new.inode = ino;
531        new.path = path.into();
532        new.name = name.to_string().into();
533        new.whiteout.store(real_inode.whiteout, Ordering::Relaxed);
534        new.lookups = AtomicU64::new(1);
535        new.real_inodes = Mutex::new(vec![real_inode.into()]);
536        new
537    }
538
539    pub async fn new_from_real_inodes(
540        name: &str,
541        ino: u64,
542        path: String,
543        real_inodes: Vec<RealInode>,
544    ) -> Result<Self> {
545        if real_inodes.is_empty() {
546            error!("BUG: new_from_real_inodes() called with empty real_inodes");
547            return Err(Error::from_raw_os_error(libc::EINVAL));
548        }
549
550        let mut first = true;
551        let mut new = Self::new();
552        for ri in real_inodes {
553            let whiteout = ri.whiteout;
554            let opaque = ri.opaque;
555            let stat = match &ri.stat {
556                Some(v) => v.clone(),
557                None => ri.stat64(&Request::default()).await?,
558            };
559
560            if first {
561                first = false;
562                new = Self::new_from_real_inode(name, ino, path.clone(), ri).await;
563
564                // This is whiteout, no need to check lower layers.
565                if whiteout {
566                    break;
567                }
568
569                // A non-directory file shadows all lower layers as default.
570                if !utils::is_dir(&stat.attr.kind) {
571                    break;
572                }
573
574                // Opaque directory shadows all lower layers.
575                if opaque {
576                    break;
577                }
578            } else {
579                // This is whiteout, no need to record this, break directly.
580                if ri.whiteout {
581                    break;
582                }
583
584                // Only directory have multiple real inodes, so if this is non-first real-inode
585                // and it's not directory, it should indicates some invalid layout. @weizhang555
586                if !utils::is_dir(&stat.attr.kind) {
587                    error!("invalid layout: non-directory has multiple real inodes");
588                    break;
589                }
590
591                // Valid directory.
592                new.real_inodes.lock().await.push(ri.into());
593                // Opaque directory shadows all lower layers.
594                if opaque {
595                    break;
596                }
597            }
598        }
599        Ok(new)
600    }
601
602    pub async fn stat64(&self, ctx: Request) -> Result<ReplyAttr> {
603        // try layers in order or just take stat from first layer?
604        for l in self.real_inodes.lock().await.iter() {
605            if let Some(v) = l.stat64_ignore_enoent(&ctx).await? {
606                return Ok(v);
607            }
608        }
609
610        // not in any layer
611        Err(Error::from_raw_os_error(libc::ENOENT))
612    }
613
614    pub async fn is_dir(&self, ctx: Request) -> Result<bool> {
615        let st = self.stat64(ctx).await?;
616        Ok(utils::is_dir(&st.attr.kind))
617    }
618
619    pub async fn count_entries_and_whiteout(&self, ctx: Request) -> Result<(u64, u64)> {
620        let mut count = 0;
621        let mut whiteouts = 0;
622
623        let st = self.stat64(ctx).await?;
624
625        // must be directory
626        if !utils::is_dir(&st.attr.kind) {
627            return Err(Error::from_raw_os_error(libc::ENOTDIR));
628        }
629
630        for (_, child) in self.childrens.lock().await.iter() {
631            if child.whiteout.load(Ordering::Relaxed) {
632                whiteouts += 1;
633            } else {
634                count += 1;
635            }
636        }
637        Ok((count, whiteouts))
638    }
639
640    pub async fn open(
641        &self,
642        ctx: Request,
643        flags: u32,
644        _fuse_flags: u32,
645    ) -> Result<(Arc<BoxedLayer>, ReplyOpen)> {
646        let (layer, _, inode) = self.first_layer_inode().await;
647        let ro = layer.as_ref().open(ctx, inode, flags).await?;
648        Ok((layer, ro))
649    }
650
651    // Self is directory, fill all childrens.
652    pub async fn scan_childrens(self: &Arc<Self>, ctx: Request) -> Result<Vec<OverlayInode>> {
653        let st = self.stat64(ctx).await?;
654        if !utils::is_dir(&st.attr.kind) {
655            return Err(Error::from_raw_os_error(libc::ENOTDIR));
656        }
657
658        let mut all_layer_inodes: HashMap<String, Vec<RealInode>> = HashMap::new();
659        // read out directories from each layer
660        // Scan from upper layer to lower layer.
661        for ri in self.real_inodes.lock().await.iter() {
662            if ri.whiteout {
663                // Node is deleted from some upper layer, skip it.
664                debug!("directory is whiteout");
665                break;
666            }
667
668            let stat = match &ri.stat {
669                Some(v) => v.clone(),
670                None => ri.stat64(&ctx).await?,
671            };
672
673            if !utils::is_dir(&stat.attr.kind) {
674                debug!("{} is not a directory", self.path.read().await);
675                // not directory
676                break;
677            }
678
679            // Read all entries from one layer.
680            let entries: HashMap<String, RealInode> = ri.readdir(ctx).await?;
681
682            // Merge entries from one layer to all_layer_inodes.
683            for (name, inode) in entries {
684                match all_layer_inodes.get_mut(&name) {
685                    Some(v) => {
686                        // Append additional RealInode to the end of vector.
687                        v.push(inode)
688                    }
689                    None => {
690                        all_layer_inodes.insert(name, vec![inode]);
691                    }
692                }
693            }
694
695            // if opaque, stop here
696            if ri.opaque {
697                debug!("directory {} is opaque", self.path.read().await);
698                break;
699            }
700        }
701
702        // Construct OverlayInode for each entry.
703        let mut childrens = vec![];
704        for (name, real_inodes) in all_layer_inodes {
705            // Inode numbers are not allocated yet.
706            let path = format!("{}/{}", self.path.read().await, name);
707            let new = Self::new_from_real_inodes(name.as_str(), 0, path, real_inodes).await?;
708            childrens.push(new);
709        }
710
711        Ok(childrens)
712    }
713
714    /// Create a new directory in upper layer for node, node must be directory.
715    ///
716    /// Recursively ensures a directory path exists in the upper layer.
717    ///
718    /// This function is a critical part of the copy-up process. When a file or directory
719    /// needs to be copied up, this function is called on its parent to ensure the entire
720    /// directory hierarchy exists in the upper layer first. It works recursively:
721    /// 1. If the current directory is already in the upper layer, it does nothing.
722    /// 2. If not, it first calls itself on its own parent directory.
723    /// 3. Once the parent is guaranteed to be in the upper layer, it creates the current
724    ///    directory within the parent's upper-layer representation.
725    ///
726    /// Crucially, it preserves the original directory's ownership (UID/GID) and permissions
727    /// by using the [`do_getattr_helper`][crate::passthrough::PassthroughFs::do_getattr_helper] and
728    /// [`do_mkdir_helper`][crate::passthrough::PassthroughFs::do_mkdir_helper] functions.
729    pub async fn create_upper_dir(
730        self: Arc<Self>,
731        ctx: Request,
732        mode_umask: Option<(u32, u32)>,
733    ) -> Result<()> {
734        // To preserve original ownership, we must get the raw, unmapped host attributes.
735        // We achieve this by calling `do_getattr_helper`, which is specifically designed
736        // to bypass the ID mapping logic. This is safe and does not affect other
737        // functionalities because `do_getattr_helper` and the standard `stat64()` call
738        // both rely on the same underlying `stat` system call; they only differ in
739        // whether the resulting `uid` and `gid` are mapped.
740        let (self_layer, _, self_inode) = self.first_layer_inode().await;
741        let re = self_layer.do_getattr_helper(self_inode, None).await?;
742        let st = ReplyAttr {
743            ttl: re.1,
744            attr: convert_stat64_to_file_attr(re.0),
745        };
746        if !utils::is_dir(&st.attr.kind) {
747            return Err(Error::from_raw_os_error(libc::ENOTDIR));
748        }
749
750        // If node already has upper layer, we can just return here.
751        if self.in_upper_layer().await {
752            return Ok(());
753        }
754
755        // not in upper layer, check parent.
756        let pnode = if let Some(n) = self.parent.lock().await.upgrade() {
757            Arc::clone(&n)
758        } else {
759            return Err(Error::other("no parent?"));
760        };
761
762        if !pnode.in_upper_layer().await {
763            Box::pin(pnode.clone().create_upper_dir(ctx, None)).await?; // recursive call
764        }
765        let child: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
766        let c_name = self.name.read().await.clone();
767        let _ = pnode
768            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
769                match parent_upper_inode {
770                    Some(parent_ri) => {
771                        let ri = match mode_umask {
772                            // We manually unfold the `mkdir` logic here instead of calling the `mkdir` method directly.
773                            // This is necessary to preserve the original directory's UID and GID during the copy-up process.
774                            Some((mode, umask)) => {
775                                if !parent_ri.in_upper_layer {
776                                    return Err(Error::from_raw_os_error(libc::EROFS));
777                                }
778                                let name_osstr = OsStr::new(&c_name);
779                                let entry = parent_ri
780                                    .layer
781                                    .do_mkdir_helper(
782                                        ctx,
783                                        parent_ri.inode,
784                                        name_osstr,
785                                        mode,
786                                        umask,
787                                        st.attr.uid,
788                                        st.attr.gid,
789                                    )
790                                    .await?;
791                                RealInode {
792                                    layer: parent_ri.layer.clone(),
793                                    in_upper_layer: true,
794                                    inode: entry.attr.ino,
795                                    whiteout: false,
796                                    opaque: false,
797                                    stat: Some(ReplyAttr {
798                                        ttl: entry.ttl,
799                                        attr: entry.attr,
800                                    }),
801                                }
802                            }
803                            None => {
804                                if !parent_ri.in_upper_layer {
805                                    return Err(Error::from_raw_os_error(libc::EROFS));
806                                }
807                                let name_osstr = OsStr::new(&c_name);
808                                let entry = parent_ri
809                                    .layer
810                                    .do_mkdir_helper(
811                                        ctx,
812                                        parent_ri.inode,
813                                        name_osstr,
814                                        mode_from_kind_and_perm(st.attr.kind, st.attr.perm),
815                                        0,
816                                        st.attr.uid,
817                                        st.attr.gid,
818                                    )
819                                    .await?;
820                                RealInode {
821                                    layer: parent_ri.layer.clone(),
822                                    in_upper_layer: true,
823                                    inode: entry.attr.ino,
824                                    whiteout: false,
825                                    opaque: false,
826                                    stat: Some(ReplyAttr {
827                                        ttl: entry.ttl,
828                                        attr: entry.attr,
829                                    }),
830                                }
831                            }
832                        };
833                        // create directory here
834                        child.lock().await.replace(ri);
835                    }
836                    None => {
837                        error!(
838                            "BUG: parent {} has no upper inode after create_upper_dir",
839                            pnode.inode
840                        );
841                        return Err(Error::from_raw_os_error(libc::EINVAL));
842                    }
843                }
844                Ok(false)
845            })
846            .await?;
847
848        if let Some(ri) = child.lock().await.take() {
849            // Push the new real inode to the front of vector.
850            self.add_upper_inode(ri, false).await;
851        }
852
853        Ok(())
854    }
855
856    // Add new upper RealInode to OverlayInode, clear all lower RealInodes if 'clear_lowers' is true.
857    async fn add_upper_inode(self: &Arc<Self>, ri: RealInode, clear_lowers: bool) {
858        let mut inodes = self.real_inodes.lock().await;
859        // Update self according to upper attribute.
860        self.whiteout.store(ri.whiteout, Ordering::Relaxed);
861
862        // Push the new real inode to the front of vector.
863        let mut new = vec![Arc::new(ri)];
864        // Drain lower RealInodes.
865        let lowers = inodes.drain(..).collect::<Vec<Arc<RealInode>>>();
866        if !clear_lowers {
867            // If not clear lowers, append them to the end of vector.
868            new.extend(lowers);
869        }
870        inodes.extend(new);
871    }
872
873    // return the uppder layer fs.
874    pub async fn in_upper_layer(&self) -> bool {
875        let all_inodes = self.real_inodes.lock().await;
876        let first = all_inodes.first();
877        match first {
878            Some(v) => v.in_upper_layer,
879            None => false,
880        }
881    }
882
883    pub async fn upper_layer_only(&self) -> bool {
884        let real_inodes = self.real_inodes.lock().await;
885        let first = real_inodes.first();
886        match first {
887            Some(v) => {
888                if !v.in_upper_layer {
889                    false
890                } else {
891                    real_inodes.len() == 1
892                }
893            }
894            None => false,
895        }
896    }
897
898    pub async fn first_layer_inode(&self) -> (Arc<BoxedLayer>, bool, u64) {
899        let all_inodes = self.real_inodes.lock().await;
900        let first = all_inodes.first();
901        match first {
902            Some(v) => (v.layer.clone(), v.in_upper_layer, v.inode),
903            None => panic!("BUG: dangling OverlayInode"),
904        }
905    }
906
907    pub async fn child(&self, name: &str) -> Option<Arc<OverlayInode>> {
908        self.childrens.lock().await.get(name).cloned()
909    }
910
911    pub async fn remove_child(&self, name: &str) -> Option<Arc<OverlayInode>> {
912        self.childrens.lock().await.remove(name)
913    }
914
915    pub async fn insert_child(&self, name: &str, node: Arc<OverlayInode>) {
916        self.childrens.lock().await.insert(name.to_string(), node);
917    }
918
919    /// Handles operations on the upper layer inode of an `OverlayInode` in a thread-safe manner.
920    ///
921    /// This function locks the `real_inodes` field of the `OverlayInode` and retrieves the first
922    /// real inode (if any). If the first inode exists and belongs to the upper layer (`in_upper_layer` is true),
923    /// the provided callback `f` is invoked with the inode wrapped in `Some`. Otherwise, `f` is invoked with `None`.
924    ///
925    /// # Arguments
926    /// * `f`: A closure that takes an `Option<RealInode>` and returns a future. The future resolves to a `Result<bool>`.
927    ///
928    /// # Returns
929    /// * `Ok(bool)`: The result of invoking the callback `f`.
930    /// * `Err(Erron)`: An error is returned if:
931    ///   - There are no backend inodes (`real_inodes` is empty), indicating a dangling `OverlayInode`.
932    ///   - The callback `f` itself returns an error.
933    ///
934    /// # Behavior
935    /// 1. Locks the `real_inodes` field to ensure thread safety.
936    /// 2. Checks if the first inode exists:
937    ///    - If it exists and is in the upper layer, invokes `f(Some(inode))`.
938    ///    - If it exists but is not in the upper layer, invokes `f(None)`.
939    /// 3. If no inodes exist, returns an error indicating a dangling `OverlayInode`.
940    ///
941    /// # Example Use Case
942    /// This function is typically used to perform operations on the upper layer inode of an `OverlayInode`,
943    /// such as creating, modifying, or deleting files/directories in the overlay filesystem's upper layer.
944    pub async fn handle_upper_inode_locked<F, Fut>(&self, f: F) -> Result<bool>
945    where
946        // Can pass a &RealInode (or None) to f for any lifetime 'a
947        F: FnOnce(Option<Arc<RealInode>>) -> Fut,
948        // f returns a Future that must live at least as long as 'a
949        Fut: Future<Output = Result<bool>>,
950    {
951        let all_inodes = self.real_inodes.lock().await;
952        let first = all_inodes.first();
953        match first {
954            Some(v) => {
955                if v.in_upper_layer {
956                    f(Some(v.clone())).await
957                } else {
958                    f(None).await
959                }
960            }
961            None => Err(Error::other(format!(
962                "BUG: dangling OverlayInode {} without any backend inode",
963                self.inode
964            ))),
965        }
966    }
967}
968#[allow(unused)]
969fn entry_type_from_mode(mode: libc::mode_t) -> u8 {
970    match mode & libc::S_IFMT {
971        libc::S_IFBLK => libc::DT_BLK,
972        libc::S_IFCHR => libc::DT_CHR,
973        libc::S_IFDIR => libc::DT_DIR,
974        libc::S_IFIFO => libc::DT_FIFO,
975        libc::S_IFLNK => libc::DT_LNK,
976        libc::S_IFREG => libc::DT_REG,
977        libc::S_IFSOCK => libc::DT_SOCK,
978        _ => libc::DT_UNKNOWN,
979    }
980}
981impl OverlayFs {
982    pub fn new(
983        upper: Option<Arc<BoxedLayer>>,
984        lowers: Vec<Arc<BoxedLayer>>,
985        params: Config,
986        root_inode: u64,
987    ) -> Result<Self> {
988        Ok(OverlayFs {
989            config: params,
990            lower_layers: lowers,
991            upper_layer: upper,
992            inodes: RwLock::new(InodeStore::new()),
993            handles: Mutex::new(HashMap::new()),
994            next_handle: AtomicU64::new(1),
995            writeback: AtomicBool::new(false),
996            no_open: AtomicBool::new(false),
997            no_opendir: AtomicBool::new(false),
998            killpriv_v2: AtomicBool::new(false),
999            perfile_dax: AtomicBool::new(false),
1000            root_inodes: root_inode,
1001        })
1002    }
1003
1004    pub fn root_inode(&self) -> Inode {
1005        self.root_inodes
1006    }
1007
1008    async fn alloc_inode(&self, path: &str) -> Result<u64> {
1009        self.inodes.write().await.alloc_inode(path)
1010    }
1011
1012    /// Add a file layer and stack and merge the previous file layers.
1013    pub async fn push_layer(&mut self, layer: Arc<BoxedLayer>) -> Result<()> {
1014        let upper = self.upper_layer.take();
1015        if let Some(upper) = upper {
1016            self.lower_layers.push(upper);
1017        }
1018        self.upper_layer = Some(layer);
1019        // TODO: merge previous file layers. need optimization
1020        self.import().await?;
1021        Ok(())
1022    }
1023
1024    pub async fn import(&self) -> Result<()> {
1025        let mut root = OverlayInode::new();
1026        root.inode = self.root_inode();
1027        root.path = String::from("").into();
1028        root.name = String::from("").into();
1029        root.lookups = AtomicU64::new(2);
1030        root.real_inodes = Mutex::new(vec![]);
1031        let ctx = Request::default();
1032
1033        // Update upper inode
1034        if let Some(layer) = self.upper_layer.as_ref() {
1035            let ino = layer.root_inode();
1036            let real = RealInode::new(
1037                layer.clone(),
1038                true,
1039                ino,
1040                false,
1041                layer.is_opaque(ctx, ino).await?,
1042            )
1043            .await;
1044            root.real_inodes.lock().await.push(real.into());
1045        }
1046
1047        // Update lower inodes.
1048        for layer in self.lower_layers.iter() {
1049            let ino = layer.root_inode();
1050            let real: RealInode = RealInode::new(
1051                layer.clone(),
1052                false,
1053                ino,
1054                false,
1055                layer.is_opaque(ctx, ino).await?,
1056            )
1057            .await;
1058            root.real_inodes.lock().await.push(real.into());
1059        }
1060        let root_node = Arc::new(root);
1061
1062        // insert root inode into hash
1063        self.insert_inode(self.root_inode(), Arc::clone(&root_node))
1064            .await;
1065
1066        info!("loading root directory");
1067        self.load_directory(ctx, &root_node).await?;
1068        info!("loaded root directory");
1069
1070        Ok(())
1071    }
1072
1073    async fn root_node(&self) -> Arc<OverlayInode> {
1074        // Root node must exist.
1075        self.get_active_inode(self.root_inode()).await.unwrap()
1076    }
1077
1078    async fn insert_inode(&self, inode: u64, node: Arc<OverlayInode>) {
1079        self.inodes.write().await.insert_inode(inode, node).await;
1080    }
1081
1082    async fn get_active_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1083        self.inodes.read().await.get_inode(inode)
1084    }
1085
1086    // Get inode which is active or deleted.
1087    async fn get_all_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1088        let inode_store = self.inodes.read().await;
1089        match inode_store.get_inode(inode) {
1090            Some(n) => Some(n),
1091            None => inode_store.get_deleted_inode(inode),
1092        }
1093    }
1094
1095    // Return the inode only if it's permanently deleted from both self.inodes and self.deleted_inodes.
1096    async fn remove_inode(
1097        &self,
1098        inode: u64,
1099        path_removed: Option<String>,
1100    ) -> Option<Arc<OverlayInode>> {
1101        self.inodes
1102            .write()
1103            .await
1104            .remove_inode(inode, path_removed)
1105            .await
1106    }
1107
1108    // Lookup child OverlayInode with <name> under <parent> directory.
1109    // If name is empty, return parent itself.
1110    // Parent dir will be loaded, but returned OverlayInode won't.
1111    async fn lookup_node(
1112        &self,
1113        ctx: Request,
1114        parent: Inode,
1115        name: &str,
1116    ) -> Result<Arc<OverlayInode>> {
1117        if name.contains(SLASH_ASCII) {
1118            return Err(Error::from_raw_os_error(libc::EINVAL));
1119        }
1120
1121        // Parent inode is expected to be loaded before this function is called.
1122        // TODO: Is this correct?
1123        let pnode = match self.get_active_inode(parent).await {
1124            Some(v) => v,
1125            None => {
1126                match self.get_all_inode(parent).await {
1127                    Some(v) => {
1128                        trace!(
1129                            "overlayfs:mod.rs:1031:lookup_node: parent inode {parent} is deleted"
1130                        );
1131                        v
1132                    }
1133                    None => {
1134                        trace!(
1135                            "overlayfs:mod.rs:1034:lookup_node: parent inode {parent} not found"
1136                        );
1137                        // Parent inode is not found, return ENOENT.
1138                        return Err(Error::from_raw_os_error(libc::ENOENT));
1139                    }
1140                }
1141            }
1142        };
1143
1144        // Parent is whiteout-ed, return ENOENT.
1145        if pnode.whiteout.load(Ordering::Relaxed) {
1146            return Err(Error::from_raw_os_error(libc::ENOENT));
1147        }
1148
1149        let st = pnode.stat64(ctx).await?;
1150        if utils::is_dir(&st.attr.kind) && !pnode.loaded.load(Ordering::Relaxed) {
1151            // Parent is expected to be directory, load it first.
1152            self.load_directory(ctx, &pnode).await?;
1153        }
1154
1155        // Current file or dir.
1156        if name.eq(".")  
1157            // Root directory has no parent.
1158            || (parent == self.root_inode() && name.eq("..")) 
1159            // Special convention: empty name indicates current dir.
1160            || name.is_empty()
1161        {
1162            return Ok(Arc::clone(&pnode));
1163        }
1164
1165        match pnode.child(name).await {
1166            // Child is found.
1167            Some(v) => Ok(v),
1168            None => {
1169                trace!("lookup_node: child {name} not found");
1170                Err(Error::from_raw_os_error(libc::ENOENT))
1171            }
1172        }
1173    }
1174
1175    async fn lookup_node_ignore_enoent(
1176        &self,
1177        ctx: Request,
1178        parent: u64,
1179        name: &str,
1180    ) -> Result<Option<Arc<OverlayInode>>> {
1181        match self.lookup_node(ctx, parent, name).await {
1182            Ok(n) => Ok(Some(Arc::clone(&n))),
1183            Err(e) => {
1184                if let Some(raw_error) = e.raw_os_error()
1185                    && raw_error == libc::ENOENT
1186                {
1187                    return Ok(None);
1188                }
1189                Err(e)
1190            }
1191        }
1192    }
1193
1194    // Load entries of the directory from all layers, if node is not directory, return directly.
1195    async fn load_directory(&self, ctx: Request, node: &Arc<OverlayInode>) -> Result<()> {
1196        if node.loaded.load(Ordering::Relaxed) {
1197            return Ok(());
1198        }
1199
1200        // We got all childrens without inode.
1201        // info!("before scan childrens, ctx: {:?}, node: {:?}", ctx, node.inode);
1202        let childrens = node.scan_childrens(ctx).await?;
1203        // info!("scanned children");
1204
1205        // =============== Start Lock Area ===================
1206        // Lock OverlayFs inodes.
1207        let mut inode_store = self.inodes.write().await;
1208        // Lock the OverlayInode and its childrens.
1209        let mut node_children = node.childrens.lock().await;
1210
1211        // Check again in case another 'load_directory' function call gets locks and want to do duplicated work.
1212        if node.loaded.load(Ordering::Relaxed) {
1213            return Ok(());
1214        }
1215
1216        // Now we have two locks' protection, Fs inodes lock and OverlayInode's childrens lock.
1217        // info!("before iter childrens");
1218        for mut child in childrens.into_iter() {
1219            // Allocate inode for each child.
1220            let ino = inode_store.alloc_inode(&child.path.read().await)?;
1221
1222            let name = child.name.read().await.clone();
1223            child.inode = ino;
1224            // Create bi-directional link between parent and child.
1225            child.parent = Mutex::new(Arc::downgrade(node));
1226
1227            let arc_child = Arc::new(child);
1228            node_children.insert(name, arc_child.clone());
1229            // Record overlay inode in whole OverlayFs.
1230            inode_store.insert_inode(ino, arc_child).await;
1231        }
1232        // info!("after iter childrens");
1233
1234        node.loaded.store(true, Ordering::Relaxed);
1235
1236        Ok(())
1237    }
1238
1239    async fn forget_one(&self, inode: Inode, count: u64) {
1240        if inode == self.root_inode() || inode == 0 {
1241            return;
1242        }
1243
1244        let v = match self.get_all_inode(inode).await {
1245            Some(n) => n,
1246            None => {
1247                trace!("forget unknown inode: {inode}");
1248                return;
1249            }
1250        };
1251
1252        // Use fetch_update to atomically update lookups in a loop until it succeeds
1253        v.lookups
1254            .fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
1255                // If count is larger than current lookups, return 0
1256                // Otherwise subtract count from current lookups
1257                if current < count {
1258                    Some(0)
1259                } else {
1260                    Some(current - count)
1261                }
1262            })
1263            .expect("fetch_update failed");
1264
1265        let lookups = v.lookups.load(Ordering::Relaxed);
1266        trace!(
1267            "forget inode: {}, name {}, lookups: {}",
1268            inode,
1269            v.name.read().await,
1270            lookups
1271        );
1272        if lookups == 0 {
1273            debug!(
1274                "inode is forgotten: {}, name {}",
1275                inode,
1276                v.name.read().await
1277            );
1278            let _ = self.remove_inode(inode, None).await;
1279            let parent = v.parent.lock().await;
1280
1281            if let Some(p) = parent.upgrade() {
1282                // remove it from hashmap
1283                p.remove_child(&v.name.read().await).await;
1284            }
1285        }
1286    }
1287
1288    async fn do_lookup(&self, ctx: Request, parent: Inode, name: &str) -> Result<ReplyEntry> {
1289        let node = self.lookup_node(ctx, parent, name).await?;
1290        debug!("do_lookup: {name:?}, found");
1291
1292        if node.whiteout.load(Ordering::Relaxed) {
1293            eprintln!("Error: node.whiteout.load() called.");
1294            return Err(Error::from_raw_os_error(libc::ENOENT));
1295        }
1296
1297        let mut st = node.stat64(ctx).await?;
1298        st.attr.ino = node.inode;
1299        if utils::is_dir(&st.attr.kind) && !node.loaded.load(Ordering::Relaxed) {
1300            self.load_directory(ctx, &node).await?;
1301        }
1302
1303        // FIXME: can forget happen between found and increase reference counter?
1304        let tmp = node.lookups.fetch_add(1, Ordering::Relaxed);
1305        trace!("lookup count: {}", tmp + 1);
1306        Ok(ReplyEntry {
1307            ttl: st.ttl,
1308            attr: st.attr,
1309            generation: 0,
1310        })
1311    }
1312
1313    async fn do_statvfs(&self, ctx: Request, inode: Inode) -> Result<ReplyStatFs> {
1314        match self.get_active_inode(inode).await {
1315            Some(ovi) => {
1316                let all_inodes = ovi.real_inodes.lock().await;
1317                let real_inode = all_inodes
1318                    .first()
1319                    .ok_or(Error::other("backend inode not found"))?;
1320                Ok(real_inode.layer.statfs(ctx, real_inode.inode).await?)
1321            }
1322            None => Err(Error::from_raw_os_error(libc::ENOENT)),
1323        }
1324    }
1325
1326    #[allow(clippy::too_many_arguments)]
1327    async fn do_readdir<'a>(
1328        &self,
1329        ctx: Request,
1330        inode: Inode,
1331        handle: u64,
1332        offset: u64,
1333    ) -> Result<
1334        impl futures_util::stream::Stream<Item = std::result::Result<DirectoryEntry, Errno>> + Send + 'a,
1335    > {
1336        let snapshot = self.get_or_create_dir_snapshot(ctx, inode, handle).await?;
1337
1338        let entries: Vec<std::result::Result<DirectoryEntry, Errno>> =
1339            if offset < snapshot.len() as u64 {
1340                snapshot
1341                    .iter()
1342                    .skip(offset as usize)
1343                    .map(|entry| {
1344                        Ok(DirectoryEntry {
1345                            inode: entry.inode,
1346                            kind: entry.kind,
1347                            name: entry.name.clone(),
1348                            offset: entry.offset,
1349                        })
1350                    })
1351                    .collect()
1352            } else {
1353                vec![]
1354            };
1355
1356        Ok(iter(entries))
1357    }
1358
1359    #[allow(clippy::too_many_arguments)]
1360    async fn do_readdirplus<'a>(
1361        &self,
1362        ctx: Request,
1363        inode: Inode,
1364        handle: u64,
1365        offset: u64,
1366    ) -> Result<
1367        impl futures_util::stream::Stream<Item = std::result::Result<DirectoryEntryPlus, Errno>>
1368        + Send
1369        + 'a,
1370    > {
1371        let snapshot = self.get_or_create_dir_snapshot(ctx, inode, handle).await?;
1372
1373        let mut entries = Vec::new();
1374        if offset < snapshot.len() as u64 {
1375            for entry in snapshot.iter().skip(offset as usize) {
1376                // Increment lookup count for readdirplus as we are handing out a reference to the kernel.
1377                // We must do this here, not in snapshot creation, and we must NOT decrement it in HandleData drop.
1378                // The kernel will send a FORGET request when it's done with the entry.
1379                if let Some(node) = self.get_all_inode(entry.inode).await {
1380                    node.lookups.fetch_add(1, Ordering::Relaxed);
1381                }
1382                entries.push(Ok(entry.clone()));
1383            }
1384        }
1385
1386        Ok(iter(entries))
1387    }
1388
1389    async fn get_or_create_dir_snapshot(
1390        &self,
1391        ctx: Request,
1392        inode: Inode,
1393        handle: u64,
1394    ) -> Result<Vec<DirectoryEntryPlus>> {
1395        let handle_data = match self.handles.lock().await.get(&handle) {
1396            Some(hd) if hd.node.inode == inode => hd.clone(),
1397            _ => {
1398                // Fallback for cases without a valid handle (e.g. no-opendir)
1399                let node = self.lookup_node(ctx, inode, ".").await?;
1400                let st = node.stat64(ctx).await?;
1401                if !utils::is_dir(&st.attr.kind) {
1402                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1403                }
1404                // Create a temporary HandleData for this call only.
1405                Arc::new(HandleData {
1406                    node,
1407                    real_handle: None,
1408                    dir_snapshot: Mutex::new(None),
1409                })
1410            }
1411        };
1412
1413        // Optimistic check
1414        if let Some(snapshot) = handle_data.dir_snapshot.lock().await.as_ref() {
1415            return Ok(snapshot.clone());
1416        }
1417
1418        // Snapshot doesn't exist, create it.
1419        let ovl_inode = &handle_data.node;
1420        self.load_directory(ctx, ovl_inode).await?;
1421
1422        let mut entries = Vec::new();
1423
1424        // 1. Add "." entry
1425        let mut st_self = ovl_inode.stat64(ctx).await?;
1426        st_self.attr.ino = ovl_inode.inode;
1427        entries.push(DirectoryEntryPlus {
1428            inode: ovl_inode.inode,
1429            generation: 0,
1430            kind: st_self.attr.kind,
1431            name: ".".into(),
1432            offset: 1,
1433            attr: st_self.attr,
1434            entry_ttl: st_self.ttl,
1435            attr_ttl: st_self.ttl,
1436        });
1437
1438        // 2. Add ".." entry
1439        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1440            Some(node) => node,
1441            None => self.root_node().await,
1442        };
1443        let mut st_parent = parent_node.stat64(ctx).await?;
1444        st_parent.attr.ino = parent_node.inode;
1445        entries.push(DirectoryEntryPlus {
1446            inode: parent_node.inode,
1447            generation: 0,
1448            kind: st_parent.attr.kind,
1449            name: "..".into(),
1450            offset: 2,
1451            attr: st_parent.attr,
1452            entry_ttl: st_parent.ttl,
1453            attr_ttl: st_parent.ttl,
1454        });
1455
1456        // 3. Add children entries
1457        let children = ovl_inode.childrens.lock().await;
1458        for (name, child) in children.iter() {
1459            if child.whiteout.load(Ordering::Relaxed) {
1460                continue;
1461            }
1462            let mut st_child = child.stat64(ctx).await?;
1463            st_child.attr.ino = child.inode;
1464            entries.push(DirectoryEntryPlus {
1465                inode: child.inode,
1466                generation: 0,
1467                kind: st_child.attr.kind,
1468                name: name.clone().into(),
1469                offset: (entries.len() + 1) as i64,
1470                attr: st_child.attr,
1471                entry_ttl: st_child.ttl,
1472                attr_ttl: st_child.ttl,
1473            });
1474        }
1475        drop(children);
1476
1477        let mut snapshot_guard = handle_data.dir_snapshot.lock().await;
1478        if snapshot_guard.is_none() {
1479            // We won the race, install our prepared snapshot.
1480            *snapshot_guard = Some(entries.clone());
1481            Ok(entries)
1482        } else {
1483            // Another thread won the race while we were preparing.
1484            // Discard our work and use the existing snapshot.
1485            Ok(snapshot_guard.as_ref().unwrap().clone())
1486        }
1487    }
1488
1489    async fn do_mkdir(
1490        &self,
1491        ctx: Request,
1492        parent_node: Arc<OverlayInode>,
1493        name: &str,
1494        mode: u32,
1495        umask: u32,
1496    ) -> Result<()> {
1497        if self.upper_layer.is_none() {
1498            return Err(Error::from_raw_os_error(libc::EROFS));
1499        }
1500
1501        // Parent node was deleted.
1502        if parent_node.whiteout.load(Ordering::Relaxed) {
1503            return Err(Error::from_raw_os_error(libc::ENOENT));
1504        }
1505
1506        let mut delete_whiteout = false;
1507        let mut set_opaque = false;
1508        if let Some(n) = self
1509            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1510            .await?
1511        {
1512            // Node with same name exists, let's check if it's whiteout.
1513            if !n.whiteout.load(Ordering::Relaxed) {
1514                return Err(Error::from_raw_os_error(libc::EEXIST));
1515            }
1516
1517            if n.in_upper_layer().await {
1518                delete_whiteout = true;
1519            }
1520
1521            // Set opaque if child dir has lower layers.
1522            if !n.upper_layer_only().await {
1523                set_opaque = true;
1524            }
1525        }
1526
1527        // Copy parent node up if necessary.
1528        let pnode = self.copy_node_up(ctx, parent_node).await?;
1529
1530        let path = format!("{}/{}", pnode.path.read().await, name);
1531        let path_ref = &path;
1532        let new_node = Arc::new(Mutex::new(None));
1533        pnode
1534            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1535                let parent_real_inode = match parent_real_inode {
1536                    Some(inode) => inode,
1537                    None => {
1538                        error!("BUG: parent doesn't have upper inode after copied up");
1539                        return Err(Error::from_raw_os_error(libc::EINVAL));
1540                    }
1541                };
1542                let osstr = OsStr::new(name);
1543                if delete_whiteout {
1544                    let _ = parent_real_inode
1545                        .layer
1546                        .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1547                        .await;
1548                }
1549
1550                // Allocate inode number.
1551                let ino = self.alloc_inode(path_ref).await?;
1552                let child_dir = parent_real_inode.mkdir(ctx, name, mode, umask).await?;
1553                // Set opaque if child dir has lower layers.
1554                if set_opaque {
1555                    parent_real_inode
1556                        .layer
1557                        .set_opaque(ctx, child_dir.inode)
1558                        .await?;
1559                }
1560                let ovi =
1561                    OverlayInode::new_from_real_inode(name, ino, path_ref.clone(), child_dir).await;
1562                new_node.lock().await.replace(ovi);
1563                Ok(false)
1564            })
1565            .await?;
1566
1567        // new_node is always 'Some'
1568        let nn = new_node.lock().await.take();
1569        let arc_node = Arc::new(nn.unwrap());
1570        self.insert_inode(arc_node.inode, arc_node.clone()).await;
1571        pnode.insert_child(name, arc_node).await;
1572        Ok(())
1573    }
1574
1575    async fn do_mknod(
1576        &self,
1577        ctx: Request,
1578        parent_node: &Arc<OverlayInode>,
1579        name: &str,
1580        mode: u32,
1581        rdev: u32,
1582        umask: u32,
1583    ) -> Result<()> {
1584        if self.upper_layer.is_none() {
1585            return Err(Error::from_raw_os_error(libc::EROFS));
1586        }
1587
1588        // Parent node was deleted.
1589        if parent_node.whiteout.load(Ordering::Relaxed) {
1590            return Err(Error::from_raw_os_error(libc::ENOENT));
1591        }
1592
1593        match self
1594            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1595            .await?
1596        {
1597            Some(n) => {
1598                // Node with same name exists, let's check if it's whiteout.
1599                if !n.whiteout.load(Ordering::Relaxed) {
1600                    return Err(Error::from_raw_os_error(libc::EEXIST));
1601                }
1602
1603                // Copy parent node up if necessary.
1604                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1605                pnode
1606                    .handle_upper_inode_locked(
1607                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1608                            let parent_real_inode = match parent_real_inode {
1609                                Some(inode) => inode,
1610                                None => {
1611                                    error!("BUG: parent doesn't have upper inode after copied up");
1612                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1613                                }
1614                            };
1615                            let osstr = OsStr::new(name);
1616                            if n.in_upper_layer().await {
1617                                let _ = parent_real_inode
1618                                    .layer
1619                                    .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1620                                    .await;
1621                            }
1622
1623                            let child_ri = parent_real_inode
1624                                .mknod(ctx, name, mode, rdev, umask)
1625                                .await?;
1626
1627                            // Replace existing real inodes with new one.
1628                            n.add_upper_inode(child_ri, true).await;
1629                            Ok(false)
1630                        },
1631                    )
1632                    .await?;
1633            }
1634            None => {
1635                // Copy parent node up if necessary.
1636                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1637                let new_node = Arc::new(Mutex::new(None));
1638                let path = format!("{}/{}", pnode.path.read().await, name);
1639                pnode
1640                    .handle_upper_inode_locked(
1641                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1642                            let parent_real_inode = match parent_real_inode {
1643                                Some(inode) => inode,
1644                                None => {
1645                                    error!("BUG: parent doesn't have upper inode after copied up");
1646                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1647                                }
1648                            };
1649
1650                            // Allocate inode number.
1651                            let ino = self.alloc_inode(&path).await?;
1652                            let child_ri = parent_real_inode
1653                                .mknod(ctx, name, mode, rdev, umask)
1654                                .await?;
1655                            let ovi = OverlayInode::new_from_real_inode(
1656                                name,
1657                                ino,
1658                                path.clone(),
1659                                child_ri,
1660                            )
1661                            .await;
1662
1663                            new_node.lock().await.replace(ovi);
1664                            Ok(false)
1665                        },
1666                    )
1667                    .await?;
1668
1669                let nn = new_node.lock().await.take();
1670                let arc_node = Arc::new(nn.unwrap());
1671                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1672                pnode.insert_child(name, arc_node).await;
1673            }
1674        }
1675
1676        Ok(())
1677    }
1678
1679    async fn do_create(
1680        &self,
1681        ctx: Request,
1682        parent_node: &Arc<OverlayInode>,
1683        name: &OsStr,
1684        mode: u32,
1685        flags: u32,
1686    ) -> Result<Option<u64>> {
1687        let name_str = name.to_str().unwrap();
1688        let upper = self
1689            .upper_layer
1690            .as_ref()
1691            .cloned()
1692            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
1693
1694        // Parent node was deleted.
1695        if parent_node.whiteout.load(Ordering::Relaxed) {
1696            return Err(Error::from_raw_os_error(libc::ENOENT));
1697        }
1698
1699        let handle: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1700        let real_ino: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1701        let new_ovi = match self
1702            .lookup_node_ignore_enoent(ctx, parent_node.inode, name_str)
1703            .await?
1704        {
1705            Some(n) => {
1706                // Node with same name exists, let's check if it's whiteout.
1707                if !n.whiteout.load(Ordering::Relaxed) {
1708                    return Err(Error::from_raw_os_error(libc::EEXIST));
1709                }
1710
1711                // Copy parent node up if necessary.
1712                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1713                pnode
1714                    .handle_upper_inode_locked(
1715                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1716                            let parent_real_inode = match parent_real_inode {
1717                                Some(inode) => inode,
1718                                None => {
1719                                    error!("BUG: parent doesn't have upper inode after copied up");
1720                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1721                                }
1722                            };
1723
1724                            if n.in_upper_layer().await {
1725                                let _ = parent_real_inode
1726                                    .layer
1727                                    .delete_whiteout(ctx, parent_real_inode.inode, name)
1728                                    .await;
1729                            }
1730
1731                            let (child_ri, hd) =
1732                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1733                            real_ino.lock().await.replace(child_ri.inode);
1734                            handle.lock().await.replace(hd.unwrap());
1735
1736                            // Replace existing real inodes with new one.
1737                            n.add_upper_inode(child_ri, true).await;
1738                            Ok(false)
1739                        },
1740                    )
1741                    .await?;
1742                n.clone()
1743            }
1744            None => {
1745                // Copy parent node up if necessary.
1746                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1747                let new_node = Arc::new(Mutex::new(None));
1748                let path = format!("{}/{}", pnode.path.read().await, name_str);
1749                pnode
1750                    .handle_upper_inode_locked(
1751                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1752                            let parent_real_inode = match parent_real_inode {
1753                                Some(inode) => inode,
1754                                None => {
1755                                    error!("BUG: parent doesn't have upper inode after copied up");
1756                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1757                                }
1758                            };
1759
1760                            let (child_ri, hd) =
1761                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1762                            real_ino.lock().await.replace(child_ri.inode);
1763                            handle.lock().await.replace(hd.unwrap());
1764                            // Allocate inode number.
1765                            let ino = self.alloc_inode(&path).await?;
1766                            let ovi = OverlayInode::new_from_real_inode(
1767                                name_str,
1768                                ino,
1769                                path.clone(),
1770                                child_ri,
1771                            )
1772                            .await;
1773
1774                            new_node.lock().await.replace(ovi);
1775                            Ok(false)
1776                        },
1777                    )
1778                    .await?;
1779
1780                // new_node is always 'Some'
1781                let nn = new_node.lock().await.take();
1782                let arc_node = Arc::new(nn.unwrap());
1783                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1784                pnode.insert_child(name_str, arc_node.clone()).await;
1785                arc_node
1786            }
1787        };
1788
1789        let final_handle = match *handle.lock().await {
1790            Some(hd) => {
1791                if self.no_open.load(Ordering::Relaxed) {
1792                    None
1793                } else {
1794                    let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1795                    let handle_data = HandleData {
1796                        node: new_ovi,
1797                        real_handle: Some(RealHandle {
1798                            layer: upper.clone(),
1799                            in_upper_layer: true,
1800                            inode: real_ino.lock().await.unwrap(),
1801                            handle: AtomicU64::new(hd),
1802                        }),
1803                        dir_snapshot: Mutex::new(None),
1804                    };
1805                    self.handles
1806                        .lock()
1807                        .await
1808                        .insert(handle, Arc::new(handle_data));
1809                    Some(handle)
1810                }
1811            }
1812            None => None,
1813        };
1814        Ok(final_handle)
1815    }
1816
1817    async fn do_rename(
1818        &self,
1819        req: Request,
1820        parent: Inode,
1821        name: &OsStr,
1822        new_parent: Inode,
1823        new_name: &OsStr,
1824    ) -> Result<()> {
1825        let name_str = name.to_str().unwrap();
1826        let new_name_str = new_name.to_str().unwrap();
1827
1828        let parent_node = self.lookup_node(req, parent, "").await?;
1829        let new_parent_node = self.lookup_node(req, new_parent, "").await?;
1830        let src_node = self.lookup_node(req, parent, name_str).await?;
1831        let dest_node_opt = self
1832            .lookup_node_ignore_enoent(req, new_parent, new_name_str)
1833            .await?;
1834        // trace!("parent_node: {}, new_parent_node: {}, src_node: {}, dest_node_opt: {:?}", parent_node.inode, new_parent_node.inode, src_node.inode, dest_node_opt.as_ref().map(|n| n.inode));
1835
1836        if let Some(dest_node) = &dest_node_opt {
1837            let src_is_dir = src_node.is_dir(req).await?;
1838            let dest_is_dir = dest_node.is_dir(req).await?;
1839            if src_is_dir != dest_is_dir {
1840                return Err(Error::from_raw_os_error(libc::EISDIR));
1841            }
1842            if dest_is_dir {
1843                self.copy_directory_up(req, dest_node.clone()).await?;
1844                let (count, _) = dest_node.count_entries_and_whiteout(req).await?;
1845                if count > 0 {
1846                    return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
1847                }
1848            }
1849        }
1850
1851        let pnode = self.copy_node_up(req, parent_node).await?;
1852        let new_pnode = self.copy_node_up(req, new_parent_node).await?;
1853        let s_node = self.copy_node_up(req, src_node).await?;
1854
1855        let need_whiteout = !s_node.upper_layer_only().await;
1856
1857        let (p_layer, _, p_inode) = pnode.first_layer_inode().await;
1858        let (new_p_layer, _, new_p_inode) = new_pnode.first_layer_inode().await;
1859        assert!(Arc::ptr_eq(&p_layer, &new_p_layer));
1860
1861        p_layer
1862            .rename(req, p_inode, name, new_p_inode, new_name)
1863            .await?;
1864
1865        // Handle the replaced destination node (if any).
1866        if let Some(dest_node) = dest_node_opt {
1867            let path = dest_node.path.read().await.clone();
1868            self.remove_inode(dest_node.inode, Some(path)).await;
1869        }
1870
1871        // Update the moved source node's state.
1872
1873        // Remove from old parent.
1874        pnode.remove_child(name_str).await;
1875        self.remove_inode(s_node.inode, s_node.path.read().await.clone().into())
1876            .await;
1877        let new_path = format!("{}/{}", new_pnode.path.read().await, new_name_str);
1878        *s_node.path.write().await = new_path;
1879        *s_node.name.write().await = new_name_str.to_string();
1880        *s_node.parent.lock().await = Arc::downgrade(&new_pnode);
1881        new_pnode.insert_child(new_name_str, s_node.clone()).await;
1882        self.insert_inode(s_node.inode, s_node).await;
1883
1884        // Create whiteout at the old location if necessary.
1885        if need_whiteout {
1886            p_layer.create_whiteout(req, p_inode, name).await?;
1887        }
1888
1889        Ok(())
1890    }
1891
1892    async fn do_link(
1893        &self,
1894        ctx: Request,
1895        src_node: &Arc<OverlayInode>,
1896        new_parent: &Arc<OverlayInode>,
1897        name: &str,
1898    ) -> Result<()> {
1899        if self.upper_layer.is_none() {
1900            return Err(Error::from_raw_os_error(libc::EROFS));
1901        }
1902
1903        // Node is whiteout.
1904        if src_node.whiteout.load(Ordering::Relaxed) || new_parent.whiteout.load(Ordering::Relaxed)
1905        {
1906            return Err(Error::from_raw_os_error(libc::ENOENT));
1907        }
1908
1909        let st = src_node.stat64(ctx).await?;
1910        if utils::is_dir(&st.attr.kind) {
1911            // Directory can't be hardlinked.
1912            return Err(Error::from_raw_os_error(libc::EPERM));
1913        }
1914
1915        let src_node = self.copy_node_up(ctx, Arc::clone(src_node)).await?;
1916        let new_parent = self.copy_node_up(ctx, Arc::clone(new_parent)).await?;
1917        let src_ino = src_node.first_layer_inode().await.2;
1918
1919        if let Some(existing_node) = self
1920            .lookup_node_ignore_enoent(ctx, new_parent.inode, name)
1921            .await?
1922        {
1923            // If it's not a whiteout, it's an error
1924            if !existing_node.whiteout.load(Ordering::Relaxed) {
1925                return Err(Error::from_raw_os_error(libc::EEXIST));
1926            }
1927            // If it is a whiteout, we will overwrite it.
1928            // First, remove the physical whiteout file in the upper layer.
1929            new_parent
1930                .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1931                    let parent_ri = parent_real_inode.ok_or_else(|| {
1932                        error!("BUG: parent doesn't have upper inode after copied up");
1933                        Error::from_raw_os_error(libc::EINVAL)
1934                    })?;
1935                    // Only delete if the whiteout is in the upper layer
1936                    if existing_node.in_upper_layer().await {
1937                        let _ = parent_ri
1938                            .layer
1939                            .delete_whiteout(ctx, parent_ri.inode, OsStr::new(name))
1940                            .await;
1941                    }
1942                    Ok(false)
1943                })
1944                .await?;
1945        }
1946
1947        new_parent
1948            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1949                let parent_real_inode = match parent_real_inode {
1950                    Some(inode) => inode,
1951                    None => {
1952                        error!("BUG: parent doesn't have upper inode after copied up");
1953                        return Err(Error::from_raw_os_error(libc::EINVAL));
1954                    }
1955                };
1956
1957                parent_real_inode.link(ctx, src_ino, name).await?;
1958
1959                Ok(false)
1960            })
1961            .await?;
1962
1963        self.insert_inode(src_node.inode, src_node.clone()).await;
1964        new_parent.insert_child(name, src_node).await;
1965
1966        Ok(())
1967    }
1968
1969    async fn do_symlink(
1970        &self,
1971        ctx: Request,
1972        linkname: &str,
1973        parent_node: &Arc<OverlayInode>,
1974        name: &str,
1975    ) -> Result<()> {
1976        let name_os = OsStr::new(name);
1977        if self.upper_layer.is_none() {
1978            return Err(Error::from_raw_os_error(libc::EROFS));
1979        }
1980
1981        // parent was deleted.
1982        if parent_node.whiteout.load(Ordering::Relaxed) {
1983            return Err(Error::from_raw_os_error(libc::ENOENT));
1984        }
1985
1986        match self
1987            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1988            .await?
1989        {
1990            Some(n) => {
1991                // Node with same name exists, let's check if it's whiteout.
1992                if !n.whiteout.load(Ordering::Relaxed) {
1993                    return Err(Error::from_raw_os_error(libc::EEXIST));
1994                }
1995
1996                // Copy parent node up if necessary.
1997                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1998                pnode
1999                    .handle_upper_inode_locked(
2000                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
2001                            let parent_real_inode = match parent_real_inode {
2002                                Some(inode) => inode,
2003                                None => {
2004                                    error!("BUG: parent doesn't have upper inode after copied up");
2005                                    return Err(Error::from_raw_os_error(libc::EINVAL));
2006                                }
2007                            };
2008
2009                            if n.in_upper_layer().await {
2010                                let _ = parent_real_inode
2011                                    .layer
2012                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
2013                                    .await;
2014                            }
2015
2016                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
2017
2018                            // Replace existing real inodes with new one.
2019                            n.add_upper_inode(child_ri, true).await;
2020                            Ok(false)
2021                        },
2022                    )
2023                    .await?;
2024            }
2025            None => {
2026                // Copy parent node up if necessary.
2027                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
2028                let new_node: Arc<Mutex<Option<OverlayInode>>> = Arc::new(Mutex::new(None));
2029                let path = format!("{}/{}", pnode.path.read().await, name);
2030                pnode
2031                    .handle_upper_inode_locked(
2032                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
2033                            let parent_real_inode = match parent_real_inode {
2034                                Some(inode) => inode,
2035                                None => {
2036                                    error!("BUG: parent doesn't have upper inode after copied up");
2037                                    return Err(Error::from_raw_os_error(libc::EINVAL));
2038                                }
2039                            };
2040
2041                            // Allocate inode number.
2042                            let ino = self.alloc_inode(&path).await?;
2043                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
2044                            let ovi = OverlayInode::new_from_real_inode(
2045                                name,
2046                                ino,
2047                                path.clone(),
2048                                child_ri,
2049                            )
2050                            .await;
2051
2052                            new_node.lock().await.replace(ovi);
2053                            Ok(false)
2054                        },
2055                    )
2056                    .await?;
2057
2058                // new_node is always 'Some'
2059                let arc_node = Arc::new(new_node.lock().await.take().unwrap());
2060                self.insert_inode(arc_node.inode, arc_node.clone()).await;
2061                pnode.insert_child(name, arc_node).await;
2062            }
2063        }
2064
2065        Ok(())
2066    }
2067
2068    /// Copies a symbolic link from a lower layer to the upper layer.
2069    ///
2070    /// This function is a part of the copy-up process, triggered when a symlink that
2071    /// only exists in a lower layer is modified. It reads the link target and attributes
2072    /// from the lower layer and creates an identical symlink in the upper layer, crucially
2073    /// preserving the original host UID and GID.
2074    async fn copy_symlink_up(
2075        &self,
2076        ctx: Request,
2077        node: Arc<OverlayInode>,
2078    ) -> Result<Arc<OverlayInode>> {
2079        if node.in_upper_layer().await {
2080            return Ok(node);
2081        }
2082
2083        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2084            Arc::clone(n)
2085        } else {
2086            return Err(Error::other("no parent?"));
2087        };
2088
2089        // To preserve original ownership, we must get the raw, unmapped host attributes.
2090        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2091        // to bypass the ID mapping logic. This is safe and does not affect other
2092        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2093        // both rely on the same underlying `stat` system call; they only differ in
2094        // whether the resulting `uid` and `gid` are mapped.
2095        let (self_layer, _, self_inode) = node.first_layer_inode().await;
2096        let re = self_layer.do_getattr_helper(self_inode, None).await?;
2097        let st = ReplyAttr {
2098            ttl: re.1,
2099            attr: convert_stat64_to_file_attr(re.0),
2100        };
2101
2102        if !parent_node.in_upper_layer().await {
2103            parent_node.clone().create_upper_dir(ctx, None).await?;
2104        }
2105
2106        // Read the linkname from lower layer.
2107        let reply_data = self_layer.readlink(ctx, self_inode).await?;
2108        // Convert path to &str.
2109        let path = std::str::from_utf8(&reply_data.data)
2110            .map_err(|_| Error::from_raw_os_error(libc::EINVAL))?;
2111
2112        let new_upper_real: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
2113        parent_node
2114            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2115                // We already create upper dir for parent_node above.
2116                let parent_real_inode =
2117                    parent_upper_inode.ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2118                // We manually unfold the `symlink` logic here instead of calling the `symlink` method directly.
2119                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2120                if !parent_real_inode.in_upper_layer {
2121                    return Err(Error::from_raw_os_error(libc::EROFS));
2122                }
2123                let link_name = OsStr::new(path);
2124                let filename = node.name.read().await;
2125                let filename = OsStr::new(filename.as_str());
2126                let entry = parent_real_inode
2127                    .layer
2128                    .do_symlink_helper(
2129                        ctx,
2130                        parent_real_inode.inode,
2131                        filename,
2132                        link_name,
2133                        st.attr.uid,
2134                        st.attr.gid,
2135                    )
2136                    .await?;
2137                let ri = RealInode {
2138                    layer: parent_real_inode.layer.clone(),
2139                    in_upper_layer: true,
2140                    inode: entry.attr.ino,
2141                    whiteout: false,
2142                    opaque: false,
2143                    stat: Some(ReplyAttr {
2144                        ttl: entry.ttl,
2145                        attr: entry.attr,
2146                    }),
2147                };
2148                new_upper_real.lock().await.replace(ri);
2149                Ok(false)
2150            })
2151            .await?;
2152
2153        if let Some(real_inode) = new_upper_real.lock().await.take() {
2154            // update upper_inode and first_inode()
2155            node.add_upper_inode(real_inode, true).await;
2156        }
2157
2158        Ok(node)
2159    }
2160
2161    /// Copies a regular file and its contents from a lower layer to the upper layer.
2162    ///
2163    /// This function is a core part of the copy-up process, triggered when a regular file
2164    /// that only exists in a lower layer is written to. It creates an empty file in the
2165    /// upper layer with the original file's attributes (mode, UID, GID), and then copies
2166    /// the entire content from the lower layer file to the new upper layer file.
2167    async fn copy_regfile_up(
2168        &self,
2169        ctx: Request,
2170        node: Arc<OverlayInode>,
2171    ) -> Result<Arc<OverlayInode>> {
2172        if node.in_upper_layer().await {
2173            return Ok(node);
2174        }
2175
2176        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2177            Arc::clone(n)
2178        } else {
2179            return Err(Error::other("no parent?"));
2180        };
2181
2182        // To preserve original ownership, we must get the raw, unmapped host attributes.
2183        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2184        // to bypass the ID mapping logic. This is safe and does not affect other
2185        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2186        // both rely on the same underlying `stat` system call; they only differ in
2187        // whether the resulting `uid` and `gid` are mapped.
2188        let (lower_layer, _, lower_inode) = node.first_layer_inode().await;
2189        let re = lower_layer.do_getattr_helper(lower_inode, None).await?;
2190        let st = ReplyAttr {
2191            ttl: re.1,
2192            attr: convert_stat64_to_file_attr(re.0),
2193        };
2194        trace!(
2195            "copy_regfile_up: node {} in lower layer's inode {}",
2196            node.inode, lower_inode
2197        );
2198
2199        if !parent_node.in_upper_layer().await {
2200            parent_node.clone().create_upper_dir(ctx, None).await?;
2201        }
2202
2203        // create the file in upper layer using information from lower layer
2204
2205        let flags = libc::O_WRONLY;
2206        let mode = mode_from_kind_and_perm(st.attr.kind, st.attr.perm);
2207
2208        let upper_handle = Arc::new(Mutex::new(0));
2209        let upper_real_inode = Arc::new(Mutex::new(None));
2210        parent_node
2211            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2212                // We already create upper dir for parent_node.
2213                let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2214                    error!("parent {} has no upper inode", parent_node.inode);
2215                    Error::from_raw_os_error(libc::EINVAL)
2216                })?;
2217                // We manually unfold the `create` logic here instead of calling the `create` method directly.
2218                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2219                if !parent_real_inode.in_upper_layer {
2220                    return Err(Error::from_raw_os_error(libc::EROFS));
2221                }
2222                let name = node.name.read().await;
2223                let name = OsStr::new(name.as_str());
2224                let create_rep = parent_real_inode
2225                    .layer
2226                    .do_create_helper(
2227                        ctx,
2228                        parent_real_inode.inode,
2229                        name,
2230                        mode,
2231                        flags.try_into().unwrap(),
2232                        st.attr.uid,
2233                        st.attr.gid,
2234                    )
2235                    .await?;
2236
2237                let (inode, h) = (
2238                    RealInode {
2239                        layer: parent_real_inode.layer.clone(),
2240                        in_upper_layer: true,
2241                        inode: create_rep.attr.ino,
2242                        whiteout: false,
2243                        opaque: false,
2244                        stat: Some(ReplyAttr {
2245                            ttl: create_rep.ttl,
2246                            attr: create_rep.attr,
2247                        }),
2248                    },
2249                    Some(create_rep.fh),
2250                );
2251                trace!(
2252                    "copy_regfile_up: created upper file {name:?} with inode {}",
2253                    inode.inode
2254                );
2255                *upper_handle.lock().await = h.unwrap_or(0);
2256                upper_real_inode.lock().await.replace(inode);
2257                Ok(false)
2258            })
2259            .await?;
2260
2261        let rep = lower_layer
2262            .open(ctx, lower_inode, libc::O_RDONLY as u32)
2263            .await?;
2264
2265        let lower_handle = rep.fh;
2266
2267        // need to use work directory and then rename file to
2268        // final destination for atomic reasons.. not deal with it for now,
2269        // use stupid copy at present.
2270        // FIXME: this need a lot of work here, ntimes, xattr, etc.
2271
2272        // Copy from lower real inode to upper real inode.
2273        // TODO: use sendfile here.
2274
2275        let u_handle = *upper_handle.lock().await;
2276        let ri = upper_real_inode.lock().await.take();
2277        if let Some(ri) = ri {
2278            let mut offset: usize = 0;
2279            let size = 4 * 1024 * 1024;
2280
2281            loop {
2282                let ret = lower_layer
2283                    .read(ctx, lower_inode, lower_handle, offset as u64, size)
2284                    .await?;
2285
2286                let len = ret.data.len();
2287                if len == 0 {
2288                    break;
2289                }
2290
2291                let ret = ri
2292                    .layer
2293                    .write(ctx, ri.inode, u_handle, offset as u64, &ret.data, 0, 0)
2294                    .await?;
2295
2296                assert_eq!(ret.written as usize, len);
2297                offset += ret.written as usize;
2298            }
2299
2300            if let Err(e) = ri.layer.release(ctx, ri.inode, u_handle, 0, 0, true).await {
2301                let e: std::io::Error = e.into();
2302                // Ignore ENOSYS.
2303                if e.raw_os_error() != Some(libc::ENOSYS) {
2304                    return Err(e);
2305                }
2306            }
2307            node.add_upper_inode(ri, true).await;
2308        } else {
2309            error!("BUG: upper real inode is None after copy up");
2310        }
2311
2312        lower_layer
2313            .release(ctx, lower_inode, lower_handle, 0, 0, true)
2314            .await?;
2315
2316        Ok(Arc::clone(&node))
2317    }
2318
2319    /// Copies the specified node to the upper layer of the filesystem
2320    ///
2321    /// Performs different operations based on the node type:
2322    /// - **Directory**: Creates a corresponding directory in the upper layer
2323    /// - **Symbolic link**: Recursively copies to the upper layer
2324    /// - **Regular file**: Copies file content to the upper layer
2325    ///
2326    /// # Parameters
2327    /// * `ctx`: FUSE request context
2328    /// * `node`: Reference to the node to be copied
2329    ///
2330    /// # Returns
2331    /// Returns a reference to the upper-layer node on success, or an error on failure
2332    async fn copy_node_up(
2333        &self,
2334        ctx: Request,
2335        node: Arc<OverlayInode>,
2336    ) -> Result<Arc<OverlayInode>> {
2337        if node.in_upper_layer().await {
2338            return Ok(node);
2339        }
2340
2341        let st = node.stat64(ctx).await?;
2342        match st.attr.kind {
2343            FileType::Directory => {
2344                node.clone().create_upper_dir(ctx, None).await?;
2345                Ok(node)
2346            }
2347            FileType::Symlink => {
2348                // For symlink.
2349                self.copy_symlink_up(ctx, node).await
2350            }
2351            FileType::RegularFile => {
2352                // For regular file.
2353                self.copy_regfile_up(ctx, node).await
2354            }
2355            _ => {
2356                // For other file types. return error.
2357                Err(Error::from_raw_os_error(libc::EINVAL))
2358            }
2359        }
2360    }
2361
2362    /// recursively copy directory and all its contents to upper layer
2363    async fn copy_directory_up(
2364        &self,
2365        ctx: Request,
2366        node: Arc<OverlayInode>,
2367    ) -> Result<Arc<OverlayInode>> {
2368        // Ensure the directory itself is copied up first
2369        self.copy_node_up(ctx, node.clone()).await?;
2370
2371        // load directory to cache
2372        self.load_directory(ctx, &node).await?;
2373
2374        // go through all children
2375        let children = node.childrens.lock().await.clone();
2376        for (_name, child) in children.iter() {
2377            if _name == "." || _name == ".." {
2378                continue;
2379            }
2380            // jump over whiteout
2381            if child.whiteout.load(Ordering::Relaxed) {
2382                continue;
2383            }
2384            let st = child.stat64(ctx).await?;
2385            if !child.in_upper_layer().await {
2386                match st.attr.kind {
2387                    FileType::Directory => {
2388                        // recursively copy subdirectory
2389                        Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2390                    }
2391                    FileType::Symlink | FileType::RegularFile => {
2392                        // copy node up symlink or regular file
2393                        Box::pin(self.copy_node_up(ctx, child.clone())).await?;
2394                    }
2395                    _ => {
2396                        // other file types are ignored
2397                    }
2398                }
2399            } else if utils::is_dir(&st.attr.kind) {
2400                // If it is already in the upper layer, but the directory is not loaded,
2401                // ensure that its contents are also copied up recursively.
2402                Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2403            }
2404        }
2405
2406        Ok(node)
2407    }
2408
2409    async fn do_rm(&self, ctx: Request, parent: u64, name: &OsStr, dir: bool) -> Result<()> {
2410        // 1. Read-only mount guard
2411        if self.upper_layer.is_none() {
2412            return Err(Error::from_raw_os_error(libc::EROFS));
2413        }
2414
2415        // 2. Locate the parent Overlay Inode.
2416        // Find parent Overlay Inode.
2417        let pnode = self.lookup_node(ctx, parent, "").await?;
2418        if pnode.whiteout.load(Ordering::Relaxed) {
2419            return Err(Error::from_raw_os_error(libc::ENOENT));
2420        }
2421        let to_name = name.to_str().unwrap();
2422
2423        // 3. Locate the child Overlay Inode for the given name
2424        // Find the Overlay Inode for child with <name>.
2425        let node = self.lookup_node(ctx, parent, to_name).await?;
2426        if node.whiteout.load(Ordering::Relaxed) {
2427            // already deleted.
2428            return Err(Error::from_raw_os_error(libc::ENOENT));
2429        }
2430
2431        // 4. If removing a directory, ensure it is empty of real entries
2432        if dir {
2433            self.load_directory(ctx, &node).await?;
2434            let (count, whiteouts) = node.count_entries_and_whiteout(ctx).await?;
2435            trace!("entries: {count}, whiteouts: {whiteouts}\n");
2436            if count > 0 {
2437                return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
2438            }
2439
2440            // Delete all whiteouts.
2441            if whiteouts > 0 && node.in_upper_layer().await {
2442                self.empty_node_directory(ctx, Arc::clone(&node)).await?;
2443            }
2444
2445            trace!("whiteouts deleted!\n");
2446        }
2447
2448        // 5. Decide whether we need to create a whiteout entry
2449        // We'll filp this off if upper-layer unlink suffices or parent is opaque
2450        let need_whiteout = AtomicBool::new(true);
2451        let pnode = self.copy_node_up(ctx, Arc::clone(&pnode)).await?;
2452
2453        if node.upper_layer_only().await {
2454            need_whiteout.store(false, Ordering::Relaxed);
2455        }
2456
2457        let mut df = |parent_upper_inode: Option<Arc<RealInode>>| async {
2458            let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2459                error!(
2460                    "BUG: parent {} has no upper inode after copy up",
2461                    pnode.inode
2462                );
2463                Error::from_raw_os_error(libc::EINVAL)
2464            })?;
2465
2466            // Parent is opaque, it shadows everything in lower layers so no need to create extra whiteouts.
2467            if parent_real_inode.opaque {
2468                need_whiteout.store(false, Ordering::Relaxed);
2469            }
2470            if dir {
2471                parent_real_inode
2472                    .layer
2473                    .rmdir(ctx, parent_real_inode.inode, name)
2474                    .await?;
2475            } else {
2476                parent_real_inode
2477                    .layer
2478                    .unlink(ctx, parent_real_inode.inode, name)
2479                    .await?;
2480            }
2481
2482            Ok(false)
2483        };
2484
2485        // 6. Perform the unlink/rmdir operation and memory cleanup
2486        if node.in_upper_layer().await {
2487            pnode.handle_upper_inode_locked(&mut df).await?;
2488        }
2489        pnode.remove_child(name.to_str().unwrap()).await;
2490        let path = node.path.read().await.clone();
2491        self.remove_inode(node.inode, Some(path)).await;
2492
2493        // 7. If needed, create a entry in the upper layer to mask lower-layer files
2494        if need_whiteout.load(Ordering::Relaxed) {
2495            trace!("do_rm: creating whiteout\n");
2496            // pnode is copied up, so it has upper layer.
2497            pnode
2498                .handle_upper_inode_locked(
2499                    &mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2500                        let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2501                            error!(
2502                                "BUG: parent {} has no upper inode after copy up",
2503                                pnode.inode
2504                            );
2505                            Error::from_raw_os_error(libc::EINVAL)
2506                        })?;
2507
2508                        let child_ri = parent_real_inode.create_whiteout(ctx, to_name).await?; //FIXME..............
2509                        let path = format!("{}/{}", pnode.path.read().await, to_name);
2510                        let ino: u64 = self.alloc_inode(&path).await?;
2511                        let ovi = Arc::new(
2512                            OverlayInode::new_from_real_inode(to_name, ino, path.clone(), child_ri)
2513                                .await,
2514                        );
2515
2516                        self.insert_inode(ino, ovi.clone()).await;
2517                        pnode.insert_child(to_name, ovi.clone()).await;
2518                        Ok(false)
2519                    },
2520                )
2521                .await?;
2522        }
2523
2524        Ok(())
2525    }
2526
2527    async fn do_fsync(
2528        &self,
2529        ctx: Request,
2530        inode: Inode,
2531        datasync: bool,
2532        handle: Handle,
2533        syncdir: bool,
2534    ) -> Result<()> {
2535        // Use O_RDONLY flags which indicates no copy up.
2536        let data = self
2537            .get_data(ctx, Some(handle), inode, libc::O_RDONLY as u32)
2538            .await?;
2539
2540        trace!("do_fsync: got data for handle: {handle}, inode:{inode}");
2541
2542        match data.real_handle {
2543            // FIXME: need to test if inode matches corresponding handle?
2544            None => {
2545                trace!("do_fsync: no real handle found for handle: {handle}, inode:{inode}");
2546                Err(Error::from_raw_os_error(libc::ENOENT))
2547            }
2548            Some(ref rh) => {
2549                let real_handle = rh.handle.load(Ordering::Relaxed);
2550                // TODO: check if it's in upper layer? @weizhang555
2551                if syncdir {
2552                    trace!(
2553                        "do_fsync: layer.fsyncdir called for handle: {}, inode:{}; rh.inode: {}, real_handle: {}",
2554                        handle, inode, rh.inode, real_handle
2555                    );
2556                    rh.layer
2557                        .fsyncdir(ctx, rh.inode, real_handle, datasync)
2558                        .await
2559                        .map_err(|e| e.into())
2560                } else {
2561                    rh.layer
2562                        .fsync(ctx, rh.inode, real_handle, datasync)
2563                        .await
2564                        .map_err(|e| e.into())
2565                }
2566            }
2567        }
2568    }
2569
2570    // Delete everything in the directory only on upper layer, ignore lower layers.
2571    async fn empty_node_directory(&self, ctx: Request, node: Arc<OverlayInode>) -> Result<()> {
2572        let st = node.stat64(ctx).await?;
2573        if !utils::is_dir(&st.attr.kind) {
2574            // This function can only be called on directories.
2575            return Err(Error::from_raw_os_error(libc::ENOTDIR));
2576        }
2577
2578        let (layer, in_upper, inode) = node.first_layer_inode().await;
2579        if !in_upper {
2580            return Ok(());
2581        }
2582
2583        // Copy node.childrens Hashmap to Vector, the Vector is also used as temp storage,
2584        // Without this, Rust won't allow us to remove them from node.childrens.
2585        let iter = node
2586            .childrens
2587            .lock()
2588            .await
2589            .values()
2590            .cloned()
2591            .collect::<Vec<_>>();
2592
2593        for child in iter {
2594            // We only care about upper layer, ignore lower layers.
2595            if child.in_upper_layer().await {
2596                let child_name = child.name.read().await.clone();
2597                let child_name_os = OsStr::new(&child_name);
2598                if child.whiteout.load(Ordering::Relaxed) {
2599                    layer.delete_whiteout(ctx, inode, child_name_os).await?
2600                } else {
2601                    let s = child.stat64(ctx).await?;
2602                    let cname: &OsStr = OsStr::new(&child_name_os);
2603                    if utils::is_dir(&s.attr.kind) {
2604                        let (count, whiteouts) = child.count_entries_and_whiteout(ctx).await?;
2605                        if count + whiteouts > 0 {
2606                            let cb = child.clone();
2607                            Box::pin(async move { self.empty_node_directory(ctx, cb).await })
2608                                .await?;
2609                        }
2610                        layer.rmdir(ctx, inode, cname).await?
2611                    } else {
2612                        layer.unlink(ctx, inode, cname).await?;
2613                    }
2614                }
2615
2616                let cpath = child.path.read().await.clone();
2617                // delete the child
2618                self.remove_inode(child.inode, Some(cpath)).await;
2619                node.remove_child(&child_name).await;
2620            }
2621        }
2622
2623        Ok(())
2624    }
2625
2626    async fn find_real_info_from_handle(
2627        &self,
2628        handle: Handle,
2629    ) -> Result<(Arc<BoxedLayer>, Inode, Handle)> {
2630        match self.handles.lock().await.get(&handle) {
2631            Some(h) => match h.real_handle {
2632                Some(ref rhd) => {
2633                    trace!(
2634                        "find_real_info_from_handle: layer in upper: {}",
2635                        rhd.in_upper_layer
2636                    );
2637                    Ok((
2638                        rhd.layer.clone(),
2639                        rhd.inode,
2640                        rhd.handle.load(Ordering::Relaxed),
2641                    ))
2642                }
2643                None => Err(Error::from_raw_os_error(libc::ENOENT)),
2644            },
2645
2646            None => Err(Error::from_raw_os_error(libc::ENOENT)),
2647        }
2648    }
2649
2650    async fn find_real_inode(&self, inode: Inode) -> Result<(Arc<BoxedLayer>, Inode)> {
2651        if let Some(n) = self.get_active_inode(inode).await {
2652            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2653            return Ok((first_layer, first_inode));
2654        } else if let Some(n) = self.get_all_inode(inode).await {
2655            trace!("find_real_inode: found inode by get_all_inode: {}", n.inode);
2656            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2657            return Ok((first_layer, first_inode));
2658        }
2659
2660        Err(Error::from_raw_os_error(libc::ENOENT))
2661    }
2662
2663    async fn get_data(
2664        &self,
2665        ctx: Request,
2666        handle: Option<Handle>,
2667        inode: Inode,
2668        flags: u32,
2669    ) -> Result<Arc<HandleData>> {
2670        let no_open = self.no_open.load(Ordering::Relaxed);
2671        if !no_open {
2672            if let Some(h) = handle
2673                && let Some(v) = self.handles.lock().await.get(&h)
2674                && v.node.inode == inode
2675            {
2676                // trace!("get_data: found handle");
2677                return Ok(Arc::clone(v));
2678            }
2679        } else {
2680            let readonly: bool = flags
2681                & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
2682                    as u32
2683                == 0;
2684
2685            // lookup node
2686            let node = self.lookup_node(ctx, inode, "").await?;
2687
2688            // whiteout node
2689            if node.whiteout.load(Ordering::Relaxed) {
2690                return Err(Error::from_raw_os_error(libc::ENOENT));
2691            }
2692
2693            if !readonly {
2694                // Check if upper layer exists, return EROFS is not exists.
2695                self.upper_layer
2696                    .as_ref()
2697                    .cloned()
2698                    .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2699                // copy up to upper layer
2700                self.copy_node_up(ctx, Arc::clone(&node)).await?;
2701            }
2702
2703            let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
2704            let handle_data = HandleData {
2705                node: Arc::clone(&node),
2706                real_handle: Some(RealHandle {
2707                    layer,
2708                    in_upper_layer,
2709                    inode,
2710                    handle: AtomicU64::new(0),
2711                }),
2712                dir_snapshot: Mutex::new(None),
2713            };
2714            return Ok(Arc::new(handle_data));
2715        }
2716
2717        Err(Error::from_raw_os_error(libc::ENOENT))
2718    }
2719
2720    // extend or init the inodes number to one overlay if the current number is done.
2721    pub async fn extend_inode_alloc(&self, key: u64) {
2722        let next_inode = key * INODE_ALLOC_BATCH;
2723        let limit_inode = next_inode + INODE_ALLOC_BATCH - 1;
2724        self.inodes
2725            .write()
2726            .await
2727            .extend_inode_number(next_inode, limit_inode);
2728    }
2729}
2730
2731/// Wrap the parameters for mounting overlay filesystem.
2732#[derive(Debug, Clone)]
2733pub struct OverlayArgs<P, Q, R, M, N, I>
2734where
2735    P: AsRef<Path>,
2736    Q: AsRef<Path>,
2737    R: AsRef<Path>,
2738    M: AsRef<str>,
2739    N: Into<String>,
2740    I: IntoIterator<Item = R>,
2741{
2742    pub mountpoint: P,
2743    pub upperdir: Q,
2744    pub lowerdir: I,
2745    pub privileged: bool,
2746    pub mapping: Option<M>,
2747    pub name: Option<N>,
2748    pub allow_other: bool,
2749}
2750
2751/// Mounts the filesystem using the given parameters and returns the mount handle.
2752///
2753/// # Parameters
2754/// - `mountpoint`: Path to the mount point.
2755/// - `upperdir`: Path to the upper directory.
2756/// - `lowerdir`: Paths to the lower directories.
2757/// - `privileged`: If true, use privileged mount; otherwise, unprivileged mount.
2758/// - `mapping`: Optional user/group ID mapping for unprivileged mounts.
2759/// - `name`: Optional name for the filesystem.
2760/// - `allow_other`: If true, allows other users to access the filesystem.
2761///
2762/// # Returns
2763/// A mount handle on success.
2764pub async fn mount_fs<P, Q, R, M, N, I>(
2765    args: OverlayArgs<P, Q, R, M, N, I>,
2766) -> rfuse3::raw::MountHandle
2767where
2768    P: AsRef<Path>,
2769    Q: AsRef<Path>,
2770    R: AsRef<Path>,
2771    M: AsRef<str>,
2772    N: Into<String>,
2773    I: IntoIterator<Item = R>,
2774{
2775    // Create lower layers
2776    let mut lower_layers = Vec::new();
2777    for lower in args.lowerdir {
2778        let layer = new_passthroughfs_layer(PassthroughArgs {
2779            root_dir: lower,
2780            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2781        })
2782        .await
2783        .expect("Failed to create lower filesystem layer");
2784        lower_layers.push(Arc::new(layer));
2785    }
2786    // Create upper layer
2787    let upper_layer = Arc::new(
2788        new_passthroughfs_layer(PassthroughArgs {
2789            root_dir: args.upperdir,
2790            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2791        })
2792        .await
2793        .expect("Failed to create upper filesystem layer"),
2794    );
2795
2796    // Configure overlay filesystem
2797    let config = Config {
2798        mountpoint: args.mountpoint.as_ref().to_path_buf(),
2799        do_import: true,
2800        ..Default::default()
2801    };
2802    let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1)
2803        .expect("Failed to initialize OverlayFs");
2804    let logfs = LoggingFileSystem::new(overlayfs);
2805
2806    let mount_path: OsString = OsString::from(args.mountpoint.as_ref().as_os_str());
2807
2808    // Obtain the current user's uid and gid
2809    let uid = unsafe { libc::getuid() };
2810    let gid = unsafe { libc::getgid() };
2811
2812    let mut mount_options = MountOptions::default();
2813    #[cfg(target_os = "linux")]
2814    mount_options.force_readdir_plus(true);
2815
2816    mount_options
2817        .uid(uid)
2818        .gid(gid)
2819        .allow_other(args.allow_other);
2820    if let Some(name) = args.name {
2821        mount_options.fs_name(name);
2822    }
2823
2824    // Mount filesystem based on privilege flag and return the mount handle
2825    if !args.privileged {
2826        debug!("Mounting with unprivileged mode");
2827        Session::new(mount_options)
2828            .mount_with_unprivileged(logfs, mount_path)
2829            .await
2830            .expect("Unprivileged mount failed")
2831    } else {
2832        debug!("Mounting with privileged mode");
2833        Session::new(mount_options)
2834            .mount(logfs, mount_path)
2835            .await
2836            .expect("Privileged mount failed")
2837    }
2838}