libfuse_fs/overlayfs/
mod.rs

1// Copyright (C) 2023 Ant Group. All rights reserved.
2//  2024 From [fuse_backend_rs](https://github.com/cloud-hypervisor/fuse-backend-rs)
3// SPDX-License-Identifier: Apache-2.0
4
5#![allow(missing_docs)]
6mod async_io;
7pub mod config;
8mod inode_store;
9mod layer;
10mod utils;
11
12//mod tempfile;
13use core::panic;
14use std::collections::HashMap;
15use std::ffi::{OsStr, OsString};
16use std::future::Future;
17use std::io::{Error, Result};
18use std::path::Path;
19
20use config::Config;
21use futures::StreamExt as _;
22use rfuse3::raw::reply::{
23    DirectoryEntry, DirectoryEntryPlus, ReplyAttr, ReplyEntry, ReplyOpen, ReplyStatFs,
24};
25use rfuse3::raw::{Filesystem, Request, Session};
26use std::sync::{Arc, Weak};
27use tracing::debug;
28use tracing::error;
29use tracing::info;
30use tracing::trace;
31
32use rfuse3::{Errno, FileType, MountOptions, mode_from_kind_and_perm};
33const SLASH_ASCII: char = '/';
34use futures::future::join_all;
35use futures::stream::iter;
36
37use crate::passthrough::newlogfs::LoggingFileSystem;
38use crate::passthrough::{PassthroughArgs, PassthroughFs, new_passthroughfs_layer};
39use crate::util::convert_stat64_to_file_attr;
40use inode_store::InodeStore;
41use layer::Layer;
42use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
43
44use tokio::sync::{Mutex, RwLock};
45
46pub type Inode = u64;
47pub type Handle = u64;
48
49type BoxedLayer = PassthroughFs;
50//type BoxedFileSystem = Box<dyn FileSystem<Inode = Inode, Handle = Handle> + Send + Sync>;
51const INODE_ALLOC_BATCH: u64 = 0x1_0000_0000;
52// RealInode represents one inode object in specific layer.
53// Also, each RealInode maps to one Entry, which should be 'forgotten' after drop.
54// Important note: do not impl Clone trait for it or refcount will be messed up.
55pub(crate) struct RealInode {
56    pub layer: Arc<PassthroughFs>,
57    pub in_upper_layer: bool,
58    pub inode: u64,
59    // File is whiteouted, we need to hide it.
60    pub whiteout: bool,
61    // Directory is opaque, we need to hide all entries inside it.
62    pub opaque: bool,
63    pub stat: Option<ReplyAttr>,
64}
65
66// OverlayInode must be protected by lock, it can be operated by multiple threads.
67// #[derive(Default)]
68pub(crate) struct OverlayInode {
69    // Inode hash table, map from 'name' to 'OverlayInode'.
70    pub childrens: Mutex<HashMap<String, Arc<OverlayInode>>>,
71    pub parent: Mutex<Weak<OverlayInode>>,
72    // Backend inodes from all layers.
73    pub real_inodes: Mutex<Vec<Arc<RealInode>>>,
74    // Inode number.
75    pub inode: u64,
76    pub path: RwLock<String>,
77    pub name: RwLock<String>,
78    pub lookups: AtomicU64,
79    // Node is whiteout-ed.
80    pub whiteout: AtomicBool,
81    // Directory is loaded.
82    pub loaded: AtomicBool,
83}
84
85#[derive(Default)]
86pub enum CachePolicy {
87    Never,
88    #[default]
89    Auto,
90    Always,
91}
92pub struct OverlayFs {
93    config: Config,
94    lower_layers: Vec<Arc<PassthroughFs>>,
95    upper_layer: Option<Arc<PassthroughFs>>,
96    // All inodes in FS.
97    inodes: RwLock<InodeStore>,
98    // Open file handles.
99    handles: Mutex<HashMap<u64, Arc<HandleData>>>,
100    next_handle: AtomicU64,
101    writeback: AtomicBool,
102    no_open: AtomicBool,
103    no_opendir: AtomicBool,
104    killpriv_v2: AtomicBool,
105    perfile_dax: AtomicBool,
106    root_inodes: u64,
107}
108
109// This is a wrapper of one inode in specific layer, It can't impl Clone trait.
110struct RealHandle {
111    layer: Arc<PassthroughFs>,
112    in_upper_layer: bool,
113    inode: u64,
114    handle: AtomicU64,
115}
116
117struct HandleData {
118    node: Arc<OverlayInode>,
119    //offset: libc::off_t,
120    real_handle: Option<RealHandle>,
121}
122
123// RealInode is a wrapper of one inode in specific layer.
124// All layer operations returning Entry should be wrapped in RealInode implementation
125// so that we can increase the refcount(lookup count) of each inode and decrease it after Drop.
126// Important: do not impl 'Copy' trait for it or refcount will be messed up.
127impl RealInode {
128    async fn new(
129        layer: Arc<PassthroughFs>,
130        in_upper_layer: bool,
131        inode: u64,
132        whiteout: bool,
133        opaque: bool,
134    ) -> Self {
135        let mut ri = RealInode {
136            layer,
137            in_upper_layer,
138            inode,
139            whiteout,
140            opaque,
141            stat: None,
142        };
143        match ri.stat64_ignore_enoent(&Request::default()).await {
144            Ok(v) => {
145                ri.stat = v;
146            }
147            Err(e) => {
148                error!("stat64 failed during RealInode creation: {e}");
149            }
150        }
151        ri
152    }
153
154    async fn stat64(&self, req: &Request) -> Result<ReplyAttr> {
155        let layer = self.layer.as_ref();
156        if self.inode == 0 {
157            return Err(Error::from_raw_os_error(libc::ENOENT));
158        }
159        // trace!("stat64: trying to getattr req: {:?}", req);
160        layer
161            .getattr(*req, self.inode, None, 0)
162            .await
163            .map_err(|e| e.into())
164    }
165
166    async fn stat64_ignore_enoent(&self, req: &Request) -> Result<Option<ReplyAttr>> {
167        match self.stat64(req).await {
168            Ok(v1) => Ok(Some(v1)),
169            Err(e) => match e.raw_os_error() {
170                Some(raw_error) => {
171                    if raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG {
172                        return Ok(None);
173                    }
174                    Err(e)
175                }
176                None => Err(e),
177            },
178        }
179    }
180
181    // Do real lookup action in specific layer, this call will increase Entry refcount which must be released later.
182    async fn lookup_child_ignore_enoent(
183        &self,
184        ctx: Request,
185        name: &str,
186    ) -> Result<Option<ReplyEntry>> {
187        let cname = OsStr::new(name);
188        // Real inode must have a layer.
189        let layer = self.layer.as_ref();
190        match layer.lookup(ctx, self.inode, cname).await {
191            Ok(v) => {
192                // Negative entry also indicates missing entry.
193                if v.attr.ino == 0 {
194                    return Ok(None);
195                }
196                Ok(Some(v))
197            }
198            Err(e) => {
199                let ioerror: std::io::Error = e.into();
200                if let Some(raw_error) = ioerror.raw_os_error()
201                    && (raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG)
202                {
203                    return Ok(None);
204                }
205
206                Err(e.into())
207            }
208        }
209    }
210
211    // Find child inode in same layer under this directory(Self).
212    // Return None if not found.
213    async fn lookup_child(&self, ctx: Request, name: &str) -> Result<Option<RealInode>> {
214        if self.whiteout {
215            return Ok(None);
216        }
217
218        let layer = self.layer.as_ref();
219
220        // Find child Entry with <name> under directory with inode <self.inode>.
221        match self.lookup_child_ignore_enoent(ctx, name).await? {
222            Some(v) => {
223                // The Entry must be forgotten in each layer, which will be done automatically by Drop operation.
224                let (whiteout, opaque) = if v.attr.kind == FileType::Directory {
225                    (false, layer.is_opaque(ctx, v.attr.ino).await?)
226                } else {
227                    (layer.is_whiteout(ctx, v.attr.ino).await?, false)
228                };
229
230                Ok(Some(RealInode {
231                    layer: self.layer.clone(),
232                    in_upper_layer: self.in_upper_layer,
233                    inode: v.attr.ino,
234                    whiteout,
235                    opaque,
236                    stat: Some(ReplyAttr {
237                        ttl: v.ttl,
238                        attr: v.attr,
239                    }),
240                }))
241            }
242            None => Ok(None),
243        }
244    }
245
246    // Read directory entries from specific RealInode, error out if it's not directory.
247    async fn readdir(&self, ctx: Request) -> Result<HashMap<String, RealInode>> {
248        // Deleted inode should not be read.
249        if self.whiteout {
250            return Err(Error::from_raw_os_error(libc::ENOENT));
251        }
252        // trace!("readdir: before stat");
253        let stat = match self.stat.clone() {
254            Some(v) => v,
255            None => self.stat64(&ctx).await?,
256        };
257
258        // Must be directory.
259        if stat.attr.kind != FileType::Directory {
260            return Err(Error::from_raw_os_error(libc::ENOTDIR));
261        }
262
263        // Open the directory and load each entry.
264        let opendir_res = self
265            .layer
266            .opendir(ctx, self.inode, libc::O_RDONLY as u32)
267            .await;
268        // trace!("readdir: after opendir");
269        let handle = match opendir_res {
270            Ok(handle) => handle,
271
272            // opendir may not be supported if no_opendir is set, so we can ignore this error.
273            Err(e) => {
274                let ioerror: std::io::Error = e.into();
275                match ioerror.raw_os_error() {
276                    Some(raw_error) => {
277                        if raw_error == libc::ENOSYS {
278                            // We can still call readdir with inode if opendir is not supported in this layer.
279                            ReplyOpen { fh: 0, flags: 0 }
280                        } else {
281                            return Err(e.into());
282                        }
283                    }
284                    None => {
285                        return Err(e.into());
286                    }
287                }
288            }
289        };
290
291        let child_names = self.layer.readdir(ctx, self.inode, handle.fh, 0).await?;
292        // Non-zero handle indicates successful 'open', we should 'release' it.
293        if handle.fh > 0 {
294            self.layer
295                .releasedir(ctx, self.inode, handle.fh, handle.flags)
296                .await?
297            //DIFF
298        }
299
300        // Lookup all child and construct "RealInode"s.
301        let child_real_inodes = Arc::new(Mutex::new(HashMap::new()));
302        // trace!("readdir: before iter childrens");
303        let a_map = child_names.entries.map(|entery| async {
304            match entery {
305                Ok(dire) => {
306                    let dname = dire.name.into_string().unwrap();
307                    if dname == "." || dname == ".." {
308                        // Skip . and .. entries.
309                        return Ok(());
310                    }
311                    // trace!("readdir: before lookup child: dname={}", dname);
312                    if let Some(child) = self.lookup_child(ctx, &dname).await? {
313                        child_real_inodes.lock().await.insert(dname, child);
314                    }
315                    Ok(())
316                }
317                Err(err) => Err(err),
318            }
319        });
320        let k = join_all(a_map.collect::<Vec<_>>().await).await;
321        drop(k);
322        // Now into_inner func is safety.
323        let re = Arc::try_unwrap(child_real_inodes)
324            .map_err(|_| Errno::new_not_exist())?
325            .into_inner();
326        // trace!("readdir: return");
327        Ok(re)
328    }
329
330    async fn create_whiteout(&self, ctx: Request, name: &str) -> Result<RealInode> {
331        if !self.in_upper_layer {
332            return Err(Error::from_raw_os_error(libc::EROFS));
333        }
334
335        // from &str to &OsStr
336        let name_osstr = OsStr::new(name);
337        let entry = self
338            .layer
339            .create_whiteout(ctx, self.inode, name_osstr)
340            .await?;
341
342        // Wrap whiteout to RealInode.
343        Ok(RealInode {
344            layer: self.layer.clone(),
345            in_upper_layer: true,
346            inode: entry.attr.ino,
347            whiteout: true,
348            opaque: false,
349            stat: Some(ReplyAttr {
350                ttl: entry.ttl,
351                attr: entry.attr,
352            }),
353        })
354    }
355
356    async fn mkdir(&self, ctx: Request, name: &str, mode: u32, umask: u32) -> Result<RealInode> {
357        if !self.in_upper_layer {
358            return Err(Error::from_raw_os_error(libc::EROFS));
359        }
360
361        let name_osstr = OsStr::new(name);
362        let entry = self
363            .layer
364            .mkdir(ctx, self.inode, name_osstr, mode, umask)
365            .await?;
366
367        // update node's first_layer
368        Ok(RealInode {
369            layer: self.layer.clone(),
370            in_upper_layer: true,
371            inode: entry.attr.ino,
372            whiteout: false,
373            opaque: false,
374            stat: Some(ReplyAttr {
375                ttl: entry.ttl,
376                attr: entry.attr,
377            }),
378        })
379    }
380
381    async fn create(
382        &self,
383        ctx: Request,
384        name: &str,
385        mode: u32,
386        flags: u32,
387    ) -> Result<(RealInode, Option<u64>)> {
388        if !self.in_upper_layer {
389            return Err(Error::from_raw_os_error(libc::EROFS));
390        }
391        let name = OsStr::new(name);
392        let create_rep = self
393            .layer
394            .create(ctx, self.inode, name, mode, flags)
395            .await?;
396
397        Ok((
398            RealInode {
399                layer: self.layer.clone(),
400                in_upper_layer: true,
401                inode: create_rep.attr.ino,
402                whiteout: false,
403                opaque: false,
404                stat: Some(ReplyAttr {
405                    ttl: create_rep.ttl,
406                    attr: create_rep.attr,
407                }),
408            },
409            Some(create_rep.fh),
410        ))
411    }
412
413    async fn mknod(
414        &self,
415        ctx: Request,
416        name: &str,
417        mode: u32,
418        rdev: u32,
419        _umask: u32,
420    ) -> Result<RealInode> {
421        if !self.in_upper_layer {
422            return Err(Error::from_raw_os_error(libc::EROFS));
423        }
424        let name = OsStr::new(name);
425        let rep = self.layer.mknod(ctx, self.inode, name, mode, rdev).await?;
426        Ok(RealInode {
427            layer: self.layer.clone(),
428            in_upper_layer: true,
429            inode: rep.attr.ino,
430            whiteout: false,
431            opaque: false,
432            stat: Some(ReplyAttr {
433                ttl: rep.ttl,
434                attr: rep.attr,
435            }),
436        })
437    }
438
439    async fn link(&self, ctx: Request, ino: u64, name: &str) -> Result<RealInode> {
440        if !self.in_upper_layer {
441            return Err(Error::from_raw_os_error(libc::EROFS));
442        }
443        let name = OsStr::new(name);
444        let entry = self.layer.link(ctx, ino, self.inode, name).await?;
445
446        let opaque = if utils::is_dir(&entry.attr.kind) {
447            self.layer.is_opaque(ctx, entry.attr.ino).await?
448        } else {
449            false
450        };
451        Ok(RealInode {
452            layer: self.layer.clone(),
453            in_upper_layer: true,
454            inode: entry.attr.ino,
455            whiteout: false,
456            opaque,
457            stat: Some(ReplyAttr {
458                ttl: entry.ttl,
459                attr: entry.attr,
460            }),
461        })
462    }
463
464    // Create a symlink in self directory.
465    async fn symlink(&self, ctx: Request, link_name: &str, filename: &str) -> Result<RealInode> {
466        if !self.in_upper_layer {
467            return Err(Error::from_raw_os_error(libc::EROFS));
468        }
469        let link_name = OsStr::new(link_name);
470        let filename = OsStr::new(filename);
471        let entry = self
472            .layer
473            .symlink(ctx, self.inode, filename, link_name)
474            .await?;
475
476        Ok(RealInode {
477            layer: self.layer.clone(),
478            in_upper_layer: true,
479            inode: entry.attr.ino,
480            whiteout: false,
481            opaque: false,
482            stat: Some(ReplyAttr {
483                ttl: entry.ttl,
484                attr: entry.attr,
485            }),
486        })
487    }
488}
489
490impl Drop for RealInode {
491    fn drop(&mut self) {
492        let layer = Arc::clone(&self.layer);
493        let inode = self.inode;
494        tokio::spawn(async move {
495            let ctx = Request::default();
496            layer.forget(ctx, inode, 1).await;
497        });
498    }
499}
500
501impl OverlayInode {
502    pub fn new() -> Self {
503        Self {
504            childrens: Mutex::new(HashMap::new()),
505            parent: Mutex::new(Weak::new()),
506            real_inodes: Mutex::new(vec![]),
507            inode: 0,
508            path: RwLock::new(String::new()),
509            name: RwLock::new(String::new()),
510            lookups: AtomicU64::new(0),
511            whiteout: AtomicBool::new(false),
512            loaded: AtomicBool::new(false),
513        }
514    }
515    // Allocate new OverlayInode based on one RealInode,
516    // inode number is always 0 since only OverlayFs has global unique inode allocator.
517    pub async fn new_from_real_inode(
518        name: &str,
519        ino: u64,
520        path: String,
521        real_inode: RealInode,
522    ) -> Self {
523        let mut new = OverlayInode::new();
524        new.inode = ino;
525        new.path = path.into();
526        new.name = name.to_string().into();
527        new.whiteout.store(real_inode.whiteout, Ordering::Relaxed);
528        new.lookups = AtomicU64::new(1);
529        new.real_inodes = Mutex::new(vec![real_inode.into()]);
530        new
531    }
532
533    pub async fn new_from_real_inodes(
534        name: &str,
535        ino: u64,
536        path: String,
537        real_inodes: Vec<RealInode>,
538    ) -> Result<Self> {
539        if real_inodes.is_empty() {
540            error!("BUG: new_from_real_inodes() called with empty real_inodes");
541            return Err(Error::from_raw_os_error(libc::EINVAL));
542        }
543
544        let mut first = true;
545        let mut new = Self::new();
546        for ri in real_inodes {
547            let whiteout = ri.whiteout;
548            let opaque = ri.opaque;
549            let stat = match &ri.stat {
550                Some(v) => v.clone(),
551                None => ri.stat64(&Request::default()).await?,
552            };
553
554            if first {
555                first = false;
556                new = Self::new_from_real_inode(name, ino, path.clone(), ri).await;
557
558                // This is whiteout, no need to check lower layers.
559                if whiteout {
560                    break;
561                }
562
563                // A non-directory file shadows all lower layers as default.
564                if !utils::is_dir(&stat.attr.kind) {
565                    break;
566                }
567
568                // Opaque directory shadows all lower layers.
569                if opaque {
570                    break;
571                }
572            } else {
573                // This is whiteout, no need to record this, break directly.
574                if ri.whiteout {
575                    break;
576                }
577
578                // Only directory have multiple real inodes, so if this is non-first real-inode
579                // and it's not directory, it should indicates some invalid layout. @weizhang555
580                if !utils::is_dir(&stat.attr.kind) {
581                    error!("invalid layout: non-directory has multiple real inodes");
582                    break;
583                }
584
585                // Valid directory.
586                new.real_inodes.lock().await.push(ri.into());
587                // Opaque directory shadows all lower layers.
588                if opaque {
589                    break;
590                }
591            }
592        }
593        Ok(new)
594    }
595
596    pub async fn stat64(&self, ctx: Request) -> Result<ReplyAttr> {
597        // try layers in order or just take stat from first layer?
598        for l in self.real_inodes.lock().await.iter() {
599            if let Some(v) = l.stat64_ignore_enoent(&ctx).await? {
600                return Ok(v);
601            }
602        }
603
604        // not in any layer
605        Err(Error::from_raw_os_error(libc::ENOENT))
606    }
607
608    pub async fn is_dir(&self, ctx: Request) -> Result<bool> {
609        let st = self.stat64(ctx).await?;
610        Ok(utils::is_dir(&st.attr.kind))
611    }
612
613    pub async fn count_entries_and_whiteout(&self, ctx: Request) -> Result<(u64, u64)> {
614        let mut count = 0;
615        let mut whiteouts = 0;
616
617        let st = self.stat64(ctx).await?;
618
619        // must be directory
620        if !utils::is_dir(&st.attr.kind) {
621            return Err(Error::from_raw_os_error(libc::ENOTDIR));
622        }
623
624        for (_, child) in self.childrens.lock().await.iter() {
625            if child.whiteout.load(Ordering::Relaxed) {
626                whiteouts += 1;
627            } else {
628                count += 1;
629            }
630        }
631        Ok((count, whiteouts))
632    }
633
634    pub async fn open(
635        &self,
636        ctx: Request,
637        flags: u32,
638        _fuse_flags: u32,
639    ) -> Result<(Arc<BoxedLayer>, ReplyOpen)> {
640        let (layer, _, inode) = self.first_layer_inode().await;
641        let ro = layer.as_ref().open(ctx, inode, flags).await?;
642        Ok((layer, ro))
643    }
644
645    // Self is directory, fill all childrens.
646    pub async fn scan_childrens(self: &Arc<Self>, ctx: Request) -> Result<Vec<OverlayInode>> {
647        let st = self.stat64(ctx).await?;
648        if !utils::is_dir(&st.attr.kind) {
649            return Err(Error::from_raw_os_error(libc::ENOTDIR));
650        }
651
652        let mut all_layer_inodes: HashMap<String, Vec<RealInode>> = HashMap::new();
653        // read out directories from each layer
654        // Scan from upper layer to lower layer.
655        for ri in self.real_inodes.lock().await.iter() {
656            if ri.whiteout {
657                // Node is deleted from some upper layer, skip it.
658                debug!("directory is whiteout");
659                break;
660            }
661
662            let stat = match &ri.stat {
663                Some(v) => v.clone(),
664                None => ri.stat64(&ctx).await?,
665            };
666
667            if !utils::is_dir(&stat.attr.kind) {
668                debug!("{} is not a directory", self.path.read().await);
669                // not directory
670                break;
671            }
672
673            // Read all entries from one layer.
674            let entries: HashMap<String, RealInode> = ri.readdir(ctx).await?;
675
676            // Merge entries from one layer to all_layer_inodes.
677            for (name, inode) in entries {
678                match all_layer_inodes.get_mut(&name) {
679                    Some(v) => {
680                        // Append additional RealInode to the end of vector.
681                        v.push(inode)
682                    }
683                    None => {
684                        all_layer_inodes.insert(name, vec![inode]);
685                    }
686                }
687            }
688
689            // if opaque, stop here
690            if ri.opaque {
691                debug!("directory {} is opaque", self.path.read().await);
692                break;
693            }
694        }
695
696        // Construct OverlayInode for each entry.
697        let mut childrens = vec![];
698        for (name, real_inodes) in all_layer_inodes {
699            // Inode numbers are not allocated yet.
700            let path = format!("{}/{}", self.path.read().await, name);
701            let new = Self::new_from_real_inodes(name.as_str(), 0, path, real_inodes).await?;
702            childrens.push(new);
703        }
704
705        Ok(childrens)
706    }
707
708    /// Create a new directory in upper layer for node, node must be directory.
709    ///
710    /// Recursively ensures a directory path exists in the upper layer.
711    ///
712    /// This function is a critical part of the copy-up process. When a file or directory
713    /// needs to be copied up, this function is called on its parent to ensure the entire
714    /// directory hierarchy exists in the upper layer first. It works recursively:
715    /// 1. If the current directory is already in the upper layer, it does nothing.
716    /// 2. If not, it first calls itself on its own parent directory.
717    /// 3. Once the parent is guaranteed to be in the upper layer, it creates the current
718    ///    directory within the parent's upper-layer representation.
719    ///
720    /// Crucially, it preserves the original directory's ownership (UID/GID) and permissions
721    /// by using the [`do_getattr_helper`][crate::passthrough::PassthroughFs::do_getattr_helper] and
722    /// [`do_mkdir_helper`][crate::passthrough::PassthroughFs::do_mkdir_helper] functions.
723    pub async fn create_upper_dir(
724        self: Arc<Self>,
725        ctx: Request,
726        mode_umask: Option<(u32, u32)>,
727    ) -> Result<()> {
728        // To preserve original ownership, we must get the raw, unmapped host attributes.
729        // We achieve this by calling `do_getattr_helper`, which is specifically designed
730        // to bypass the ID mapping logic. This is safe and does not affect other
731        // functionalities because `do_getattr_helper` and the standard `stat64()` call
732        // both rely on the same underlying `stat` system call; they only differ in
733        // whether the resulting `uid` and `gid` are mapped.
734        let (self_layer, _, self_inode) = self.first_layer_inode().await;
735        let re = self_layer.do_getattr_helper(self_inode, None).await?;
736        let st = ReplyAttr {
737            ttl: re.1,
738            attr: convert_stat64_to_file_attr(re.0),
739        };
740        if !utils::is_dir(&st.attr.kind) {
741            return Err(Error::from_raw_os_error(libc::ENOTDIR));
742        }
743
744        // If node already has upper layer, we can just return here.
745        if self.in_upper_layer().await {
746            return Ok(());
747        }
748
749        // not in upper layer, check parent.
750        let pnode = if let Some(n) = self.parent.lock().await.upgrade() {
751            Arc::clone(&n)
752        } else {
753            return Err(Error::other("no parent?"));
754        };
755
756        if !pnode.in_upper_layer().await {
757            Box::pin(pnode.clone().create_upper_dir(ctx, None)).await?; // recursive call
758        }
759        let child: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
760        let c_name = self.name.read().await.clone();
761        let _ = pnode
762            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
763                match parent_upper_inode {
764                    Some(parent_ri) => {
765                        let ri = match mode_umask {
766                            // We manually unfold the `mkdir` logic here instead of calling the `mkdir` method directly.
767                            // This is necessary to preserve the original directory's UID and GID during the copy-up process.
768                            Some((mode, umask)) => {
769                                if !parent_ri.in_upper_layer {
770                                    return Err(Error::from_raw_os_error(libc::EROFS));
771                                }
772                                let name_osstr = OsStr::new(&c_name);
773                                let entry = parent_ri
774                                    .layer
775                                    .do_mkdir_helper(
776                                        ctx,
777                                        parent_ri.inode,
778                                        name_osstr,
779                                        mode,
780                                        umask,
781                                        st.attr.uid,
782                                        st.attr.gid,
783                                    )
784                                    .await?;
785                                RealInode {
786                                    layer: parent_ri.layer.clone(),
787                                    in_upper_layer: true,
788                                    inode: entry.attr.ino,
789                                    whiteout: false,
790                                    opaque: false,
791                                    stat: Some(ReplyAttr {
792                                        ttl: entry.ttl,
793                                        attr: entry.attr,
794                                    }),
795                                }
796                            }
797                            None => {
798                                if !parent_ri.in_upper_layer {
799                                    return Err(Error::from_raw_os_error(libc::EROFS));
800                                }
801                                let name_osstr = OsStr::new(&c_name);
802                                let entry = parent_ri
803                                    .layer
804                                    .do_mkdir_helper(
805                                        ctx,
806                                        parent_ri.inode,
807                                        name_osstr,
808                                        mode_from_kind_and_perm(st.attr.kind, st.attr.perm),
809                                        0,
810                                        st.attr.uid,
811                                        st.attr.gid,
812                                    )
813                                    .await?;
814                                RealInode {
815                                    layer: parent_ri.layer.clone(),
816                                    in_upper_layer: true,
817                                    inode: entry.attr.ino,
818                                    whiteout: false,
819                                    opaque: false,
820                                    stat: Some(ReplyAttr {
821                                        ttl: entry.ttl,
822                                        attr: entry.attr,
823                                    }),
824                                }
825                            }
826                        };
827                        // create directory here
828                        child.lock().await.replace(ri);
829                    }
830                    None => {
831                        error!(
832                            "BUG: parent {} has no upper inode after create_upper_dir",
833                            pnode.inode
834                        );
835                        return Err(Error::from_raw_os_error(libc::EINVAL));
836                    }
837                }
838                Ok(false)
839            })
840            .await?;
841
842        if let Some(ri) = child.lock().await.take() {
843            // Push the new real inode to the front of vector.
844            self.add_upper_inode(ri, false).await;
845        }
846
847        Ok(())
848    }
849
850    // Add new upper RealInode to OverlayInode, clear all lower RealInodes if 'clear_lowers' is true.
851    async fn add_upper_inode(self: &Arc<Self>, ri: RealInode, clear_lowers: bool) {
852        let mut inodes = self.real_inodes.lock().await;
853        // Update self according to upper attribute.
854        self.whiteout.store(ri.whiteout, Ordering::Relaxed);
855
856        // Push the new real inode to the front of vector.
857        let mut new = vec![Arc::new(ri)];
858        // Drain lower RealInodes.
859        let lowers = inodes.drain(..).collect::<Vec<Arc<RealInode>>>();
860        if !clear_lowers {
861            // If not clear lowers, append them to the end of vector.
862            new.extend(lowers);
863        }
864        inodes.extend(new);
865    }
866
867    // return the uppder layer fs.
868    pub async fn in_upper_layer(&self) -> bool {
869        let all_inodes = self.real_inodes.lock().await;
870        let first = all_inodes.first();
871        match first {
872            Some(v) => v.in_upper_layer,
873            None => false,
874        }
875    }
876
877    pub async fn upper_layer_only(&self) -> bool {
878        let real_inodes = self.real_inodes.lock().await;
879        let first = real_inodes.first();
880        match first {
881            Some(v) => {
882                if !v.in_upper_layer {
883                    false
884                } else {
885                    real_inodes.len() == 1
886                }
887            }
888            None => false,
889        }
890    }
891
892    pub async fn first_layer_inode(&self) -> (Arc<BoxedLayer>, bool, u64) {
893        let all_inodes = self.real_inodes.lock().await;
894        let first = all_inodes.first();
895        match first {
896            Some(v) => (v.layer.clone(), v.in_upper_layer, v.inode),
897            None => panic!("BUG: dangling OverlayInode"),
898        }
899    }
900
901    pub async fn child(&self, name: &str) -> Option<Arc<OverlayInode>> {
902        self.childrens.lock().await.get(name).cloned()
903    }
904
905    pub async fn remove_child(&self, name: &str) -> Option<Arc<OverlayInode>> {
906        self.childrens.lock().await.remove(name)
907    }
908
909    pub async fn insert_child(&self, name: &str, node: Arc<OverlayInode>) {
910        self.childrens.lock().await.insert(name.to_string(), node);
911    }
912
913    /// Handles operations on the upper layer inode of an `OverlayInode` in a thread-safe manner.
914    ///
915    /// This function locks the `real_inodes` field of the `OverlayInode` and retrieves the first
916    /// real inode (if any). If the first inode exists and belongs to the upper layer (`in_upper_layer` is true),
917    /// the provided callback `f` is invoked with the inode wrapped in `Some`. Otherwise, `f` is invoked with `None`.
918    ///
919    /// # Arguments
920    /// * `f`: A closure that takes an `Option<RealInode>` and returns a future. The future resolves to a `Result<bool>`.
921    ///
922    /// # Returns
923    /// * `Ok(bool)`: The result of invoking the callback `f`.
924    /// * `Err(Erron)`: An error is returned if:
925    ///   - There are no backend inodes (`real_inodes` is empty), indicating a dangling `OverlayInode`.
926    ///   - The callback `f` itself returns an error.
927    ///
928    /// # Behavior
929    /// 1. Locks the `real_inodes` field to ensure thread safety.
930    /// 2. Checks if the first inode exists:
931    ///    - If it exists and is in the upper layer, invokes `f(Some(inode))`.
932    ///    - If it exists but is not in the upper layer, invokes `f(None)`.
933    /// 3. If no inodes exist, returns an error indicating a dangling `OverlayInode`.
934    ///
935    /// # Example Use Case
936    /// This function is typically used to perform operations on the upper layer inode of an `OverlayInode`,
937    /// such as creating, modifying, or deleting files/directories in the overlay filesystem's upper layer.
938    pub async fn handle_upper_inode_locked<F, Fut>(&self, f: F) -> Result<bool>
939    where
940        // Can pass a &RealInode (or None) to f for any lifetime 'a
941        F: FnOnce(Option<Arc<RealInode>>) -> Fut,
942        // f returns a Future that must live at least as long as 'a
943        Fut: Future<Output = Result<bool>>,
944    {
945        let all_inodes = self.real_inodes.lock().await;
946        let first = all_inodes.first();
947        match first {
948            Some(v) => {
949                if v.in_upper_layer {
950                    f(Some(v.clone())).await
951                } else {
952                    f(None).await
953                }
954            }
955            None => Err(Error::other(format!(
956                "BUG: dangling OverlayInode {} without any backend inode",
957                self.inode
958            ))),
959        }
960    }
961}
962#[allow(unused)]
963fn entry_type_from_mode(mode: libc::mode_t) -> u8 {
964    match mode & libc::S_IFMT {
965        libc::S_IFBLK => libc::DT_BLK,
966        libc::S_IFCHR => libc::DT_CHR,
967        libc::S_IFDIR => libc::DT_DIR,
968        libc::S_IFIFO => libc::DT_FIFO,
969        libc::S_IFLNK => libc::DT_LNK,
970        libc::S_IFREG => libc::DT_REG,
971        libc::S_IFSOCK => libc::DT_SOCK,
972        _ => libc::DT_UNKNOWN,
973    }
974}
975impl OverlayFs {
976    pub fn new(
977        upper: Option<Arc<BoxedLayer>>,
978        lowers: Vec<Arc<BoxedLayer>>,
979        params: Config,
980        root_inode: u64,
981    ) -> Result<Self> {
982        // load root inode
983        Ok(OverlayFs {
984            config: params,
985            lower_layers: lowers,
986            upper_layer: upper,
987            inodes: RwLock::new(InodeStore::new()),
988            handles: Mutex::new(HashMap::new()),
989            next_handle: AtomicU64::new(1),
990            writeback: AtomicBool::new(false),
991            no_open: AtomicBool::new(false),
992            no_opendir: AtomicBool::new(false),
993            killpriv_v2: AtomicBool::new(false),
994            perfile_dax: AtomicBool::new(false),
995            root_inodes: root_inode,
996        })
997    }
998
999    pub fn root_inode(&self) -> Inode {
1000        self.root_inodes
1001    }
1002
1003    async fn alloc_inode(&self, path: &str) -> Result<u64> {
1004        self.inodes.write().await.alloc_inode(path)
1005    }
1006
1007    /// Add a file layer and stack and merge the previous file layers.
1008    pub async fn push_layer(&mut self, layer: Arc<BoxedLayer>) -> Result<()> {
1009        let upper = self.upper_layer.take();
1010        if let Some(upper) = upper {
1011            self.lower_layers.push(upper);
1012        }
1013        self.upper_layer = Some(layer);
1014        // TODO: merge previous file layers. need optimization
1015        self.import().await?;
1016        Ok(())
1017    }
1018
1019    pub async fn import(&self) -> Result<()> {
1020        let mut root = OverlayInode::new();
1021        root.inode = self.root_inode();
1022        root.path = String::from("").into();
1023        root.name = String::from("").into();
1024        root.lookups = AtomicU64::new(2);
1025        root.real_inodes = Mutex::new(vec![]);
1026        let ctx = Request::default();
1027
1028        // Update upper inode
1029        if let Some(layer) = self.upper_layer.as_ref() {
1030            let ino = layer.root_inode();
1031            let real = RealInode::new(
1032                layer.clone(),
1033                true,
1034                ino,
1035                false,
1036                layer.is_opaque(ctx, ino).await?,
1037            )
1038            .await;
1039            root.real_inodes.lock().await.push(real.into());
1040        }
1041
1042        // Update lower inodes.
1043        for layer in self.lower_layers.iter() {
1044            let ino = layer.root_inode();
1045            let real: RealInode = RealInode::new(
1046                layer.clone(),
1047                false,
1048                ino,
1049                false,
1050                layer.is_opaque(ctx, ino).await?,
1051            )
1052            .await;
1053            root.real_inodes.lock().await.push(real.into());
1054        }
1055        let root_node = Arc::new(root);
1056
1057        // insert root inode into hash
1058        self.insert_inode(self.root_inode(), Arc::clone(&root_node))
1059            .await;
1060
1061        info!("loading root directory");
1062        self.load_directory(ctx, &root_node).await?;
1063        info!("loaded root directory");
1064
1065        Ok(())
1066    }
1067
1068    async fn root_node(&self) -> Arc<OverlayInode> {
1069        // Root node must exist.
1070        self.get_active_inode(self.root_inode()).await.unwrap()
1071    }
1072
1073    async fn insert_inode(&self, inode: u64, node: Arc<OverlayInode>) {
1074        self.inodes.write().await.insert_inode(inode, node).await;
1075    }
1076
1077    async fn get_active_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1078        self.inodes.read().await.get_inode(inode)
1079    }
1080
1081    // Get inode which is active or deleted.
1082    async fn get_all_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1083        let inode_store = self.inodes.read().await;
1084        match inode_store.get_inode(inode) {
1085            Some(n) => Some(n),
1086            None => inode_store.get_deleted_inode(inode),
1087        }
1088    }
1089
1090    // Return the inode only if it's permanently deleted from both self.inodes and self.deleted_inodes.
1091    async fn remove_inode(
1092        &self,
1093        inode: u64,
1094        path_removed: Option<String>,
1095    ) -> Option<Arc<OverlayInode>> {
1096        self.inodes
1097            .write()
1098            .await
1099            .remove_inode(inode, path_removed)
1100            .await
1101    }
1102
1103    // Lookup child OverlayInode with <name> under <parent> directory.
1104    // If name is empty, return parent itself.
1105    // Parent dir will be loaded, but returned OverlayInode won't.
1106    async fn lookup_node(
1107        &self,
1108        ctx: Request,
1109        parent: Inode,
1110        name: &str,
1111    ) -> Result<Arc<OverlayInode>> {
1112        if name.contains(SLASH_ASCII) {
1113            return Err(Error::from_raw_os_error(libc::EINVAL));
1114        }
1115
1116        // Parent inode is expected to be loaded before this function is called.
1117        // TODO: Is this correct?
1118        let pnode = match self.get_active_inode(parent).await {
1119            Some(v) => v,
1120            None => {
1121                match self.get_all_inode(parent).await {
1122                    Some(v) => {
1123                        trace!(
1124                            "overlayfs:mod.rs:1031:lookup_node: parent inode {parent} is deleted"
1125                        );
1126                        v
1127                    }
1128                    None => {
1129                        trace!(
1130                            "overlayfs:mod.rs:1034:lookup_node: parent inode {parent} not found"
1131                        );
1132                        // Parent inode is not found, return ENOENT.
1133                        return Err(Error::from_raw_os_error(libc::ENOENT));
1134                    }
1135                }
1136            }
1137        };
1138
1139        // Parent is whiteout-ed, return ENOENT.
1140        if pnode.whiteout.load(Ordering::Relaxed) {
1141            return Err(Error::from_raw_os_error(libc::ENOENT));
1142        }
1143
1144        let st = pnode.stat64(ctx).await?;
1145        if utils::is_dir(&st.attr.kind) && !pnode.loaded.load(Ordering::Relaxed) {
1146            // Parent is expected to be directory, load it first.
1147            self.load_directory(ctx, &pnode).await?;
1148        }
1149
1150        // Current file or dir.
1151        if name.eq(".")  
1152            // Root directory has no parent.
1153            || (parent == self.root_inode() && name.eq("..")) 
1154            // Special convention: empty name indicates current dir.
1155            || name.is_empty()
1156        {
1157            return Ok(Arc::clone(&pnode));
1158        }
1159
1160        match pnode.child(name).await {
1161            // Child is found.
1162            Some(v) => Ok(v),
1163            None => {
1164                trace!("lookup_node: child {name} not found");
1165                Err(Error::from_raw_os_error(libc::ENOENT))
1166            }
1167        }
1168    }
1169
1170    async fn lookup_node_ignore_enoent(
1171        &self,
1172        ctx: Request,
1173        parent: u64,
1174        name: &str,
1175    ) -> Result<Option<Arc<OverlayInode>>> {
1176        match self.lookup_node(ctx, parent, name).await {
1177            Ok(n) => Ok(Some(Arc::clone(&n))),
1178            Err(e) => {
1179                if let Some(raw_error) = e.raw_os_error()
1180                    && raw_error == libc::ENOENT
1181                {
1182                    return Ok(None);
1183                }
1184                Err(e)
1185            }
1186        }
1187    }
1188
1189    // Load entries of the directory from all layers, if node is not directory, return directly.
1190    async fn load_directory(&self, ctx: Request, node: &Arc<OverlayInode>) -> Result<()> {
1191        if node.loaded.load(Ordering::Relaxed) {
1192            return Ok(());
1193        }
1194
1195        // We got all childrens without inode.
1196        // info!("before scan childrens, ctx: {:?}, node: {:?}", ctx, node.inode);
1197        let childrens = node.scan_childrens(ctx).await?;
1198        // info!("scanned children");
1199
1200        // =============== Start Lock Area ===================
1201        // Lock OverlayFs inodes.
1202        let mut inode_store = self.inodes.write().await;
1203        // Lock the OverlayInode and its childrens.
1204        let mut node_children = node.childrens.lock().await;
1205
1206        // Check again in case another 'load_directory' function call gets locks and want to do duplicated work.
1207        if node.loaded.load(Ordering::Relaxed) {
1208            return Ok(());
1209        }
1210
1211        // Now we have two locks' protection, Fs inodes lock and OverlayInode's childrens lock.
1212        // info!("before iter childrens");
1213        for mut child in childrens.into_iter() {
1214            // Allocate inode for each child.
1215            let ino = inode_store.alloc_inode(&child.path.read().await)?;
1216
1217            let name = child.name.read().await.clone();
1218            child.inode = ino;
1219            // Create bi-directional link between parent and child.
1220            child.parent = Mutex::new(Arc::downgrade(node));
1221
1222            let arc_child = Arc::new(child);
1223            node_children.insert(name, arc_child.clone());
1224            // Record overlay inode in whole OverlayFs.
1225            inode_store.insert_inode(ino, arc_child).await;
1226        }
1227        // info!("after iter childrens");
1228
1229        node.loaded.store(true, Ordering::Relaxed);
1230
1231        Ok(())
1232    }
1233
1234    async fn forget_one(&self, inode: Inode, count: u64) {
1235        if inode == self.root_inode() || inode == 0 {
1236            return;
1237        }
1238
1239        let v = match self.get_all_inode(inode).await {
1240            Some(n) => n,
1241            None => {
1242                trace!("forget unknown inode: {inode}");
1243                return;
1244            }
1245        };
1246
1247        // Use fetch_update to atomically update lookups in a loop until it succeeds
1248        v.lookups
1249            .fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
1250                // If count is larger than current lookups, return 0
1251                // Otherwise subtract count from current lookups
1252                if current < count {
1253                    Some(0)
1254                } else {
1255                    Some(current - count)
1256                }
1257            })
1258            .expect("fetch_update failed");
1259
1260        let lookups = v.lookups.load(Ordering::Relaxed);
1261        trace!(
1262            "forget inode: {}, name {}, lookups: {}",
1263            inode,
1264            v.name.read().await,
1265            lookups
1266        );
1267        if lookups == 0 {
1268            debug!(
1269                "inode is forgotten: {}, name {}",
1270                inode,
1271                v.name.read().await
1272            );
1273            let _ = self.remove_inode(inode, None).await;
1274            let parent = v.parent.lock().await;
1275
1276            if let Some(p) = parent.upgrade() {
1277                // remove it from hashmap
1278                p.remove_child(&v.name.read().await).await;
1279            }
1280        }
1281    }
1282
1283    async fn do_lookup(&self, ctx: Request, parent: Inode, name: &str) -> Result<ReplyEntry> {
1284        let node = self.lookup_node(ctx, parent, name).await?;
1285        debug!("do_lookup: {name:?}, found");
1286
1287        if node.whiteout.load(Ordering::Relaxed) {
1288            eprintln!("Error: node.whiteout.load() called.");
1289            return Err(Error::from_raw_os_error(libc::ENOENT));
1290        }
1291
1292        let mut st = node.stat64(ctx).await?;
1293        st.attr.ino = node.inode;
1294        if utils::is_dir(&st.attr.kind) && !node.loaded.load(Ordering::Relaxed) {
1295            self.load_directory(ctx, &node).await?;
1296        }
1297
1298        // FIXME: can forget happen between found and increase reference counter?
1299        let tmp = node.lookups.fetch_add(1, Ordering::Relaxed);
1300        trace!("lookup count: {}", tmp + 1);
1301        Ok(ReplyEntry {
1302            ttl: st.ttl,
1303            attr: st.attr,
1304            generation: 0,
1305        })
1306    }
1307
1308    async fn do_statvfs(&self, ctx: Request, inode: Inode) -> Result<ReplyStatFs> {
1309        match self.get_active_inode(inode).await {
1310            Some(ovi) => {
1311                let all_inodes = ovi.real_inodes.lock().await;
1312                let real_inode = all_inodes
1313                    .first()
1314                    .ok_or(Error::other("backend inode not found"))?;
1315                Ok(real_inode.layer.statfs(ctx, real_inode.inode).await?)
1316            }
1317            None => Err(Error::from_raw_os_error(libc::ENOENT)),
1318        }
1319    }
1320
1321    #[allow(clippy::too_many_arguments)]
1322    async fn do_readdir<'a>(
1323        &self,
1324        ctx: Request,
1325        inode: Inode,
1326        handle: u64,
1327        offset: u64,
1328    ) -> Result<
1329        impl futures_util::stream::Stream<Item = std::result::Result<DirectoryEntry, Errno>> + Send + 'a,
1330    > {
1331        // lookup the directory
1332        let ovl_inode = match self.handles.lock().await.get(&handle) {
1333            Some(dir) => dir.node.clone(),
1334            None => {
1335                // Try to get data with inode.
1336                let node = self.lookup_node(ctx, inode, ".").await?;
1337
1338                let st = node.stat64(ctx).await?;
1339                if !utils::is_dir(&st.attr.kind) {
1340                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1341                }
1342
1343                node.clone()
1344            }
1345        };
1346        self.load_directory(ctx, &ovl_inode).await?;
1347        let mut childrens = Vec::new();
1348        //add myself as "."
1349        childrens.push((".".to_string(), ovl_inode.clone()));
1350
1351        //add parent
1352        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1353            Some(p) => p.clone(),
1354            None => self.root_node().await,
1355        };
1356        childrens.push(("..".to_string(), parent_node));
1357
1358        for (name, child) in ovl_inode.childrens.lock().await.iter() {
1359            // skip whiteout node
1360            if child.whiteout.load(Ordering::Relaxed) {
1361                continue;
1362            }
1363            childrens.push((name.clone(), child.clone()));
1364        }
1365
1366        if offset >= childrens.len() as u64 {
1367            return Ok(iter(vec![].into_iter()));
1368        }
1369        let mut d: Vec<std::result::Result<DirectoryEntry, Errno>> = Vec::new();
1370
1371        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1372            // make struct DireEntry and Entry
1373            let st = child.stat64(ctx).await?;
1374            let dir_entry = DirectoryEntry {
1375                inode: child.inode,
1376                kind: st.attr.kind,
1377                name: name.into(),
1378                offset: (index + 1) as i64,
1379            };
1380            d.push(Ok(dir_entry));
1381        }
1382
1383        Ok(iter(d.into_iter()))
1384    }
1385
1386    #[allow(clippy::too_many_arguments)]
1387    async fn do_readdirplus<'a>(
1388        &self,
1389        ctx: Request,
1390        inode: Inode,
1391        handle: u64,
1392        offset: u64,
1393    ) -> Result<
1394        impl futures_util::stream::Stream<Item = std::result::Result<DirectoryEntryPlus, Errno>>
1395        + Send
1396        + 'a,
1397    > {
1398        // lookup the directory
1399        let ovl_inode = match self.handles.lock().await.get(&handle) {
1400            Some(dir) => {
1401                trace!(
1402                    "do_readdirplus: handle {} found, inode {}",
1403                    handle, dir.node.inode
1404                );
1405                dir.node.clone()
1406            }
1407            None => {
1408                trace!("do_readdirplus: handle {handle} not found, lookup inode {inode}");
1409                // Try to get data with inode.
1410                let node = self.lookup_node(ctx, inode, ".").await?;
1411
1412                let st = node.stat64(ctx).await?;
1413                if !utils::is_dir(&st.attr.kind) {
1414                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1415                }
1416
1417                node.clone()
1418            }
1419        };
1420        self.load_directory(ctx, &ovl_inode).await?;
1421
1422        let mut childrens = Vec::new();
1423        //add myself as "."
1424        childrens.push((".".to_string(), ovl_inode.clone()));
1425
1426        //add parent
1427        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1428            Some(p) => p.clone(),
1429            None => self.root_node().await,
1430        };
1431        childrens.push(("..".to_string(), parent_node));
1432
1433        for (name, child) in ovl_inode.childrens.lock().await.iter() {
1434            // skip whiteout node
1435            if child.whiteout.load(Ordering::Relaxed) {
1436                continue;
1437            }
1438            childrens.push((name.clone(), child.clone()));
1439        }
1440
1441        if offset >= childrens.len() as u64 {
1442            return Ok(iter(vec![].into_iter()));
1443        }
1444        let mut d: Vec<std::result::Result<DirectoryEntryPlus, Errno>> = Vec::new();
1445
1446        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1447            if index >= offset {
1448                // make struct DireEntry and Entry
1449                let mut st = child.stat64(ctx).await?;
1450                child.lookups.fetch_add(1, Ordering::Relaxed);
1451                st.attr.ino = child.inode;
1452                let dir_entry = DirectoryEntryPlus {
1453                    inode: child.inode,
1454                    generation: 0,
1455                    kind: st.attr.kind,
1456                    name: name.into(),
1457                    offset: (index + 1) as i64,
1458                    attr: st.attr,
1459                    entry_ttl: st.ttl,
1460                    attr_ttl: st.ttl,
1461                };
1462                d.push(Ok(dir_entry));
1463            }
1464        }
1465
1466        Ok(iter(d.into_iter()))
1467    }
1468
1469    async fn do_mkdir(
1470        &self,
1471        ctx: Request,
1472        parent_node: Arc<OverlayInode>,
1473        name: &str,
1474        mode: u32,
1475        umask: u32,
1476    ) -> Result<()> {
1477        if self.upper_layer.is_none() {
1478            return Err(Error::from_raw_os_error(libc::EROFS));
1479        }
1480
1481        // Parent node was deleted.
1482        if parent_node.whiteout.load(Ordering::Relaxed) {
1483            return Err(Error::from_raw_os_error(libc::ENOENT));
1484        }
1485
1486        let mut delete_whiteout = false;
1487        let mut set_opaque = false;
1488        if let Some(n) = self
1489            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1490            .await?
1491        {
1492            // Node with same name exists, let's check if it's whiteout.
1493            if !n.whiteout.load(Ordering::Relaxed) {
1494                return Err(Error::from_raw_os_error(libc::EEXIST));
1495            }
1496
1497            if n.in_upper_layer().await {
1498                delete_whiteout = true;
1499            }
1500
1501            // Set opaque if child dir has lower layers.
1502            if !n.upper_layer_only().await {
1503                set_opaque = true;
1504            }
1505        }
1506
1507        // Copy parent node up if necessary.
1508        let pnode = self.copy_node_up(ctx, parent_node).await?;
1509
1510        let path = format!("{}/{}", pnode.path.read().await, name);
1511        let path_ref = &path;
1512        let new_node = Arc::new(Mutex::new(None));
1513        pnode
1514            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1515                let parent_real_inode = match parent_real_inode {
1516                    Some(inode) => inode,
1517                    None => {
1518                        error!("BUG: parent doesn't have upper inode after copied up");
1519                        return Err(Error::from_raw_os_error(libc::EINVAL));
1520                    }
1521                };
1522                let osstr = OsStr::new(name);
1523                if delete_whiteout {
1524                    let _ = parent_real_inode
1525                        .layer
1526                        .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1527                        .await;
1528                }
1529
1530                // Allocate inode number.
1531                let ino = self.alloc_inode(path_ref).await?;
1532                let child_dir = parent_real_inode.mkdir(ctx, name, mode, umask).await?;
1533                // Set opaque if child dir has lower layers.
1534                if set_opaque {
1535                    parent_real_inode
1536                        .layer
1537                        .set_opaque(ctx, child_dir.inode)
1538                        .await?;
1539                }
1540                let ovi =
1541                    OverlayInode::new_from_real_inode(name, ino, path_ref.clone(), child_dir).await;
1542                new_node.lock().await.replace(ovi);
1543                Ok(false)
1544            })
1545            .await?;
1546
1547        // new_node is always 'Some'
1548        let nn = new_node.lock().await.take();
1549        let arc_node = Arc::new(nn.unwrap());
1550        self.insert_inode(arc_node.inode, arc_node.clone()).await;
1551        pnode.insert_child(name, arc_node).await;
1552        Ok(())
1553    }
1554
1555    async fn do_mknod(
1556        &self,
1557        ctx: Request,
1558        parent_node: &Arc<OverlayInode>,
1559        name: &str,
1560        mode: u32,
1561        rdev: u32,
1562        umask: u32,
1563    ) -> Result<()> {
1564        if self.upper_layer.is_none() {
1565            return Err(Error::from_raw_os_error(libc::EROFS));
1566        }
1567
1568        // Parent node was deleted.
1569        if parent_node.whiteout.load(Ordering::Relaxed) {
1570            return Err(Error::from_raw_os_error(libc::ENOENT));
1571        }
1572
1573        match self
1574            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1575            .await?
1576        {
1577            Some(n) => {
1578                // Node with same name exists, let's check if it's whiteout.
1579                if !n.whiteout.load(Ordering::Relaxed) {
1580                    return Err(Error::from_raw_os_error(libc::EEXIST));
1581                }
1582
1583                // Copy parent node up if necessary.
1584                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1585                pnode
1586                    .handle_upper_inode_locked(
1587                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1588                            let parent_real_inode = match parent_real_inode {
1589                                Some(inode) => inode,
1590                                None => {
1591                                    error!("BUG: parent doesn't have upper inode after copied up");
1592                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1593                                }
1594                            };
1595                            let osstr = OsStr::new(name);
1596                            if n.in_upper_layer().await {
1597                                let _ = parent_real_inode
1598                                    .layer
1599                                    .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1600                                    .await;
1601                            }
1602
1603                            let child_ri = parent_real_inode
1604                                .mknod(ctx, name, mode, rdev, umask)
1605                                .await?;
1606
1607                            // Replace existing real inodes with new one.
1608                            n.add_upper_inode(child_ri, true).await;
1609                            Ok(false)
1610                        },
1611                    )
1612                    .await?;
1613            }
1614            None => {
1615                // Copy parent node up if necessary.
1616                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1617                let new_node = Arc::new(Mutex::new(None));
1618                let path = format!("{}/{}", pnode.path.read().await, name);
1619                pnode
1620                    .handle_upper_inode_locked(
1621                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1622                            let parent_real_inode = match parent_real_inode {
1623                                Some(inode) => inode,
1624                                None => {
1625                                    error!("BUG: parent doesn't have upper inode after copied up");
1626                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1627                                }
1628                            };
1629
1630                            // Allocate inode number.
1631                            let ino = self.alloc_inode(&path).await?;
1632                            let child_ri = parent_real_inode
1633                                .mknod(ctx, name, mode, rdev, umask)
1634                                .await?;
1635                            let ovi = OverlayInode::new_from_real_inode(
1636                                name,
1637                                ino,
1638                                path.clone(),
1639                                child_ri,
1640                            )
1641                            .await;
1642
1643                            new_node.lock().await.replace(ovi);
1644                            Ok(false)
1645                        },
1646                    )
1647                    .await?;
1648
1649                let nn = new_node.lock().await.take();
1650                let arc_node = Arc::new(nn.unwrap());
1651                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1652                pnode.insert_child(name, arc_node).await;
1653            }
1654        }
1655
1656        Ok(())
1657    }
1658
1659    async fn do_create(
1660        &self,
1661        ctx: Request,
1662        parent_node: &Arc<OverlayInode>,
1663        name: &OsStr,
1664        mode: u32,
1665        flags: u32,
1666    ) -> Result<Option<u64>> {
1667        let name_str = name.to_str().unwrap();
1668        let upper = self
1669            .upper_layer
1670            .as_ref()
1671            .cloned()
1672            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
1673
1674        // Parent node was deleted.
1675        if parent_node.whiteout.load(Ordering::Relaxed) {
1676            return Err(Error::from_raw_os_error(libc::ENOENT));
1677        }
1678
1679        let handle: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1680        let real_ino: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1681        let new_ovi = match self
1682            .lookup_node_ignore_enoent(ctx, parent_node.inode, name_str)
1683            .await?
1684        {
1685            Some(n) => {
1686                // Node with same name exists, let's check if it's whiteout.
1687                if !n.whiteout.load(Ordering::Relaxed) {
1688                    return Err(Error::from_raw_os_error(libc::EEXIST));
1689                }
1690
1691                // Copy parent node up if necessary.
1692                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1693                pnode
1694                    .handle_upper_inode_locked(
1695                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1696                            let parent_real_inode = match parent_real_inode {
1697                                Some(inode) => inode,
1698                                None => {
1699                                    error!("BUG: parent doesn't have upper inode after copied up");
1700                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1701                                }
1702                            };
1703
1704                            if n.in_upper_layer().await {
1705                                let _ = parent_real_inode
1706                                    .layer
1707                                    .delete_whiteout(ctx, parent_real_inode.inode, name)
1708                                    .await;
1709                            }
1710
1711                            let (child_ri, hd) =
1712                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1713                            real_ino.lock().await.replace(child_ri.inode);
1714                            handle.lock().await.replace(hd.unwrap());
1715
1716                            // Replace existing real inodes with new one.
1717                            n.add_upper_inode(child_ri, true).await;
1718                            Ok(false)
1719                        },
1720                    )
1721                    .await?;
1722                n.clone()
1723            }
1724            None => {
1725                // Copy parent node up if necessary.
1726                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1727                let new_node = Arc::new(Mutex::new(None));
1728                let path = format!("{}/{}", pnode.path.read().await, name_str);
1729                pnode
1730                    .handle_upper_inode_locked(
1731                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1732                            let parent_real_inode = match parent_real_inode {
1733                                Some(inode) => inode,
1734                                None => {
1735                                    error!("BUG: parent doesn't have upper inode after copied up");
1736                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1737                                }
1738                            };
1739
1740                            let (child_ri, hd) =
1741                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1742                            real_ino.lock().await.replace(child_ri.inode);
1743                            handle.lock().await.replace(hd.unwrap());
1744                            // Allocate inode number.
1745                            let ino = self.alloc_inode(&path).await?;
1746                            let ovi = OverlayInode::new_from_real_inode(
1747                                name_str,
1748                                ino,
1749                                path.clone(),
1750                                child_ri,
1751                            )
1752                            .await;
1753
1754                            new_node.lock().await.replace(ovi);
1755                            Ok(false)
1756                        },
1757                    )
1758                    .await?;
1759
1760                // new_node is always 'Some'
1761                let nn = new_node.lock().await.take();
1762                let arc_node = Arc::new(nn.unwrap());
1763                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1764                pnode.insert_child(name_str, arc_node.clone()).await;
1765                arc_node
1766            }
1767        };
1768
1769        let final_handle = match *handle.lock().await {
1770            Some(hd) => {
1771                if self.no_open.load(Ordering::Relaxed) {
1772                    None
1773                } else {
1774                    let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1775                    let handle_data = HandleData {
1776                        node: new_ovi,
1777                        real_handle: Some(RealHandle {
1778                            layer: upper.clone(),
1779                            in_upper_layer: true,
1780                            inode: real_ino.lock().await.unwrap(),
1781                            handle: AtomicU64::new(hd),
1782                        }),
1783                    };
1784                    self.handles
1785                        .lock()
1786                        .await
1787                        .insert(handle, Arc::new(handle_data));
1788                    Some(handle)
1789                }
1790            }
1791            None => None,
1792        };
1793        Ok(final_handle)
1794    }
1795
1796    async fn do_rename(
1797        &self,
1798        req: Request,
1799        parent: Inode,
1800        name: &OsStr,
1801        new_parent: Inode,
1802        new_name: &OsStr,
1803    ) -> Result<()> {
1804        let name_str = name.to_str().unwrap();
1805        let new_name_str = new_name.to_str().unwrap();
1806
1807        let parent_node = self.lookup_node(req, parent, "").await?;
1808        let new_parent_node = self.lookup_node(req, new_parent, "").await?;
1809        let src_node = self.lookup_node(req, parent, name_str).await?;
1810        let dest_node_opt = self
1811            .lookup_node_ignore_enoent(req, new_parent, new_name_str)
1812            .await?;
1813        // trace!("parent_node: {}, new_parent_node: {}, src_node: {}, dest_node_opt: {:?}", parent_node.inode, new_parent_node.inode, src_node.inode, dest_node_opt.as_ref().map(|n| n.inode));
1814
1815        if let Some(dest_node) = &dest_node_opt {
1816            let src_is_dir = src_node.is_dir(req).await?;
1817            let dest_is_dir = dest_node.is_dir(req).await?;
1818            if src_is_dir != dest_is_dir {
1819                return Err(Error::from_raw_os_error(libc::EISDIR));
1820            }
1821            if dest_is_dir {
1822                self.copy_directory_up(req, dest_node.clone()).await?;
1823                let (count, _) = dest_node.count_entries_and_whiteout(req).await?;
1824                if count > 0 {
1825                    return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
1826                }
1827            }
1828        }
1829
1830        let pnode = self.copy_node_up(req, parent_node).await?;
1831        let new_pnode = self.copy_node_up(req, new_parent_node).await?;
1832        let s_node = self.copy_node_up(req, src_node).await?;
1833
1834        let need_whiteout = !s_node.upper_layer_only().await;
1835
1836        let (p_layer, _, p_inode) = pnode.first_layer_inode().await;
1837        let (new_p_layer, _, new_p_inode) = new_pnode.first_layer_inode().await;
1838        assert!(Arc::ptr_eq(&p_layer, &new_p_layer));
1839
1840        p_layer
1841            .rename(req, p_inode, name, new_p_inode, new_name)
1842            .await?;
1843
1844        // Handle the replaced destination node (if any).
1845        if let Some(dest_node) = dest_node_opt {
1846            let path = dest_node.path.read().await.clone();
1847            self.remove_inode(dest_node.inode, Some(path)).await;
1848        }
1849
1850        // Update the moved source node's state.
1851
1852        // Remove from old parent.
1853        pnode.remove_child(name_str).await;
1854        self.remove_inode(s_node.inode, s_node.path.read().await.clone().into())
1855            .await;
1856        let new_path = format!("{}/{}", new_pnode.path.read().await, new_name_str);
1857        *s_node.path.write().await = new_path;
1858        *s_node.name.write().await = new_name_str.to_string();
1859        *s_node.parent.lock().await = Arc::downgrade(&new_pnode);
1860        new_pnode.insert_child(new_name_str, s_node.clone()).await;
1861        self.insert_inode(s_node.inode, s_node).await;
1862
1863        // Create whiteout at the old location if necessary.
1864        if need_whiteout {
1865            p_layer.create_whiteout(req, p_inode, name).await?;
1866        }
1867
1868        Ok(())
1869    }
1870
1871    async fn do_link(
1872        &self,
1873        ctx: Request,
1874        src_node: &Arc<OverlayInode>,
1875        new_parent: &Arc<OverlayInode>,
1876        name: &str,
1877    ) -> Result<()> {
1878        if self.upper_layer.is_none() {
1879            return Err(Error::from_raw_os_error(libc::EROFS));
1880        }
1881
1882        // Node is whiteout.
1883        if src_node.whiteout.load(Ordering::Relaxed) || new_parent.whiteout.load(Ordering::Relaxed)
1884        {
1885            return Err(Error::from_raw_os_error(libc::ENOENT));
1886        }
1887
1888        let st = src_node.stat64(ctx).await?;
1889        if utils::is_dir(&st.attr.kind) {
1890            // Directory can't be hardlinked.
1891            return Err(Error::from_raw_os_error(libc::EPERM));
1892        }
1893
1894        let src_node = self.copy_node_up(ctx, Arc::clone(src_node)).await?;
1895        let new_parent = self.copy_node_up(ctx, Arc::clone(new_parent)).await?;
1896        let src_ino = src_node.first_layer_inode().await.2;
1897
1898        if let Some(existing_node) = self
1899            .lookup_node_ignore_enoent(ctx, new_parent.inode, name)
1900            .await?
1901        {
1902            // If it's not a whiteout, it's an error
1903            if !existing_node.whiteout.load(Ordering::Relaxed) {
1904                return Err(Error::from_raw_os_error(libc::EEXIST));
1905            }
1906            // If it is a whiteout, we will overwrite it.
1907            // First, remove the physical whiteout file in the upper layer.
1908            new_parent
1909                .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1910                    let parent_ri = parent_real_inode.ok_or_else(|| {
1911                        error!("BUG: parent doesn't have upper inode after copied up");
1912                        Error::from_raw_os_error(libc::EINVAL)
1913                    })?;
1914                    // Only delete if the whiteout is in the upper layer
1915                    if existing_node.in_upper_layer().await {
1916                        let _ = parent_ri
1917                            .layer
1918                            .delete_whiteout(ctx, parent_ri.inode, OsStr::new(name))
1919                            .await;
1920                    }
1921                    Ok(false)
1922                })
1923                .await?;
1924        }
1925
1926        new_parent
1927            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1928                let parent_real_inode = match parent_real_inode {
1929                    Some(inode) => inode,
1930                    None => {
1931                        error!("BUG: parent doesn't have upper inode after copied up");
1932                        return Err(Error::from_raw_os_error(libc::EINVAL));
1933                    }
1934                };
1935
1936                parent_real_inode.link(ctx, src_ino, name).await?;
1937
1938                Ok(false)
1939            })
1940            .await?;
1941
1942        self.insert_inode(src_node.inode, src_node.clone()).await;
1943        new_parent.insert_child(name, src_node).await;
1944
1945        Ok(())
1946    }
1947
1948    async fn do_symlink(
1949        &self,
1950        ctx: Request,
1951        linkname: &str,
1952        parent_node: &Arc<OverlayInode>,
1953        name: &str,
1954    ) -> Result<()> {
1955        let name_os = OsStr::new(name);
1956        if self.upper_layer.is_none() {
1957            return Err(Error::from_raw_os_error(libc::EROFS));
1958        }
1959
1960        // parent was deleted.
1961        if parent_node.whiteout.load(Ordering::Relaxed) {
1962            return Err(Error::from_raw_os_error(libc::ENOENT));
1963        }
1964
1965        match self
1966            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1967            .await?
1968        {
1969            Some(n) => {
1970                // Node with same name exists, let's check if it's whiteout.
1971                if !n.whiteout.load(Ordering::Relaxed) {
1972                    return Err(Error::from_raw_os_error(libc::EEXIST));
1973                }
1974
1975                // Copy parent node up if necessary.
1976                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1977                pnode
1978                    .handle_upper_inode_locked(
1979                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1980                            let parent_real_inode = match parent_real_inode {
1981                                Some(inode) => inode,
1982                                None => {
1983                                    error!("BUG: parent doesn't have upper inode after copied up");
1984                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1985                                }
1986                            };
1987
1988                            if n.in_upper_layer().await {
1989                                let _ = parent_real_inode
1990                                    .layer
1991                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
1992                                    .await;
1993                            }
1994
1995                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
1996
1997                            // Replace existing real inodes with new one.
1998                            n.add_upper_inode(child_ri, true).await;
1999                            Ok(false)
2000                        },
2001                    )
2002                    .await?;
2003            }
2004            None => {
2005                // Copy parent node up if necessary.
2006                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
2007                let new_node: Arc<Mutex<Option<OverlayInode>>> = Arc::new(Mutex::new(None));
2008                let path = format!("{}/{}", pnode.path.read().await, name);
2009                pnode
2010                    .handle_upper_inode_locked(
2011                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
2012                            let parent_real_inode = match parent_real_inode {
2013                                Some(inode) => inode,
2014                                None => {
2015                                    error!("BUG: parent doesn't have upper inode after copied up");
2016                                    return Err(Error::from_raw_os_error(libc::EINVAL));
2017                                }
2018                            };
2019
2020                            // Allocate inode number.
2021                            let ino = self.alloc_inode(&path).await?;
2022                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
2023                            let ovi = OverlayInode::new_from_real_inode(
2024                                name,
2025                                ino,
2026                                path.clone(),
2027                                child_ri,
2028                            )
2029                            .await;
2030
2031                            new_node.lock().await.replace(ovi);
2032                            Ok(false)
2033                        },
2034                    )
2035                    .await?;
2036
2037                // new_node is always 'Some'
2038                let arc_node = Arc::new(new_node.lock().await.take().unwrap());
2039                self.insert_inode(arc_node.inode, arc_node.clone()).await;
2040                pnode.insert_child(name, arc_node).await;
2041            }
2042        }
2043
2044        Ok(())
2045    }
2046
2047    /// Copies a symbolic link from a lower layer to the upper layer.
2048    ///
2049    /// This function is a part of the copy-up process, triggered when a symlink that
2050    /// only exists in a lower layer is modified. It reads the link target and attributes
2051    /// from the lower layer and creates an identical symlink in the upper layer, crucially
2052    /// preserving the original host UID and GID.
2053    async fn copy_symlink_up(
2054        &self,
2055        ctx: Request,
2056        node: Arc<OverlayInode>,
2057    ) -> Result<Arc<OverlayInode>> {
2058        if node.in_upper_layer().await {
2059            return Ok(node);
2060        }
2061
2062        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2063            Arc::clone(n)
2064        } else {
2065            return Err(Error::other("no parent?"));
2066        };
2067
2068        // To preserve original ownership, we must get the raw, unmapped host attributes.
2069        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2070        // to bypass the ID mapping logic. This is safe and does not affect other
2071        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2072        // both rely on the same underlying `stat` system call; they only differ in
2073        // whether the resulting `uid` and `gid` are mapped.
2074        let (self_layer, _, self_inode) = node.first_layer_inode().await;
2075        let re = self_layer.do_getattr_helper(self_inode, None).await?;
2076        let st = ReplyAttr {
2077            ttl: re.1,
2078            attr: convert_stat64_to_file_attr(re.0),
2079        };
2080
2081        if !parent_node.in_upper_layer().await {
2082            parent_node.clone().create_upper_dir(ctx, None).await?;
2083        }
2084
2085        // Read the linkname from lower layer.
2086        let reply_data = self_layer.readlink(ctx, self_inode).await?;
2087        // Convert path to &str.
2088        let path = std::str::from_utf8(&reply_data.data)
2089            .map_err(|_| Error::from_raw_os_error(libc::EINVAL))?;
2090
2091        let new_upper_real: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
2092        parent_node
2093            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2094                // We already create upper dir for parent_node above.
2095                let parent_real_inode =
2096                    parent_upper_inode.ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2097                // We manually unfold the `symlink` logic here instead of calling the `symlink` method directly.
2098                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2099                if !parent_real_inode.in_upper_layer {
2100                    return Err(Error::from_raw_os_error(libc::EROFS));
2101                }
2102                let link_name = OsStr::new(path);
2103                let filename = node.name.read().await;
2104                let filename = OsStr::new(filename.as_str());
2105                let entry = parent_real_inode
2106                    .layer
2107                    .do_symlink_helper(
2108                        ctx,
2109                        parent_real_inode.inode,
2110                        filename,
2111                        link_name,
2112                        st.attr.uid,
2113                        st.attr.gid,
2114                    )
2115                    .await?;
2116                let ri = RealInode {
2117                    layer: parent_real_inode.layer.clone(),
2118                    in_upper_layer: true,
2119                    inode: entry.attr.ino,
2120                    whiteout: false,
2121                    opaque: false,
2122                    stat: Some(ReplyAttr {
2123                        ttl: entry.ttl,
2124                        attr: entry.attr,
2125                    }),
2126                };
2127                new_upper_real.lock().await.replace(ri);
2128                Ok(false)
2129            })
2130            .await?;
2131
2132        if let Some(real_inode) = new_upper_real.lock().await.take() {
2133            // update upper_inode and first_inode()
2134            node.add_upper_inode(real_inode, true).await;
2135        }
2136
2137        Ok(node)
2138    }
2139
2140    /// Copies a regular file and its contents from a lower layer to the upper layer.
2141    ///
2142    /// This function is a core part of the copy-up process, triggered when a regular file
2143    /// that only exists in a lower layer is written to. It creates an empty file in the
2144    /// upper layer with the original file's attributes (mode, UID, GID), and then copies
2145    /// the entire content from the lower layer file to the new upper layer file.
2146    async fn copy_regfile_up(
2147        &self,
2148        ctx: Request,
2149        node: Arc<OverlayInode>,
2150    ) -> Result<Arc<OverlayInode>> {
2151        if node.in_upper_layer().await {
2152            return Ok(node);
2153        }
2154
2155        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2156            Arc::clone(n)
2157        } else {
2158            return Err(Error::other("no parent?"));
2159        };
2160
2161        // To preserve original ownership, we must get the raw, unmapped host attributes.
2162        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2163        // to bypass the ID mapping logic. This is safe and does not affect other
2164        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2165        // both rely on the same underlying `stat` system call; they only differ in
2166        // whether the resulting `uid` and `gid` are mapped.
2167        let (lower_layer, _, lower_inode) = node.first_layer_inode().await;
2168        let re = lower_layer.do_getattr_helper(lower_inode, None).await?;
2169        let st = ReplyAttr {
2170            ttl: re.1,
2171            attr: convert_stat64_to_file_attr(re.0),
2172        };
2173        trace!(
2174            "copy_regfile_up: node {} in lower layer's inode {}",
2175            node.inode, lower_inode
2176        );
2177
2178        if !parent_node.in_upper_layer().await {
2179            parent_node.clone().create_upper_dir(ctx, None).await?;
2180        }
2181
2182        // create the file in upper layer using information from lower layer
2183
2184        let flags = libc::O_WRONLY;
2185        let mode = mode_from_kind_and_perm(st.attr.kind, st.attr.perm);
2186
2187        let upper_handle = Arc::new(Mutex::new(0));
2188        let upper_real_inode = Arc::new(Mutex::new(None));
2189        parent_node
2190            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2191                // We already create upper dir for parent_node.
2192                let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2193                    error!("parent {} has no upper inode", parent_node.inode);
2194                    Error::from_raw_os_error(libc::EINVAL)
2195                })?;
2196                // We manually unfold the `create` logic here instead of calling the `create` method directly.
2197                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2198                if !parent_real_inode.in_upper_layer {
2199                    return Err(Error::from_raw_os_error(libc::EROFS));
2200                }
2201                let name = node.name.read().await;
2202                let name = OsStr::new(name.as_str());
2203                let create_rep = parent_real_inode
2204                    .layer
2205                    .do_create_helper(
2206                        ctx,
2207                        parent_real_inode.inode,
2208                        name,
2209                        mode,
2210                        flags.try_into().unwrap(),
2211                        st.attr.uid,
2212                        st.attr.gid,
2213                    )
2214                    .await?;
2215
2216                let (inode, h) = (
2217                    RealInode {
2218                        layer: parent_real_inode.layer.clone(),
2219                        in_upper_layer: true,
2220                        inode: create_rep.attr.ino,
2221                        whiteout: false,
2222                        opaque: false,
2223                        stat: Some(ReplyAttr {
2224                            ttl: create_rep.ttl,
2225                            attr: create_rep.attr,
2226                        }),
2227                    },
2228                    Some(create_rep.fh),
2229                );
2230                trace!(
2231                    "copy_regfile_up: created upper file {name:?} with inode {}",
2232                    inode.inode
2233                );
2234                *upper_handle.lock().await = h.unwrap_or(0);
2235                upper_real_inode.lock().await.replace(inode);
2236                Ok(false)
2237            })
2238            .await?;
2239
2240        let rep = lower_layer
2241            .open(ctx, lower_inode, libc::O_RDONLY as u32)
2242            .await?;
2243
2244        let lower_handle = rep.fh;
2245
2246        // need to use work directory and then rename file to
2247        // final destination for atomic reasons.. not deal with it for now,
2248        // use stupid copy at present.
2249        // FIXME: this need a lot of work here, ntimes, xattr, etc.
2250
2251        // Copy from lower real inode to upper real inode.
2252        // TODO: use sendfile here.
2253
2254        let u_handle = *upper_handle.lock().await;
2255        let ri = upper_real_inode.lock().await.take();
2256        if let Some(ri) = ri {
2257            let mut offset: usize = 0;
2258            let size = 4 * 1024 * 1024;
2259
2260            loop {
2261                let ret = lower_layer
2262                    .read(ctx, lower_inode, lower_handle, offset as u64, size)
2263                    .await?;
2264
2265                let len = ret.data.len();
2266                if len == 0 {
2267                    break;
2268                }
2269
2270                let ret = ri
2271                    .layer
2272                    .write(ctx, ri.inode, u_handle, offset as u64, &ret.data, 0, 0)
2273                    .await?;
2274
2275                assert_eq!(ret.written as usize, len);
2276                offset += ret.written as usize;
2277            }
2278
2279            if let Err(e) = ri.layer.release(ctx, ri.inode, u_handle, 0, 0, true).await {
2280                let e: std::io::Error = e.into();
2281                // Ignore ENOSYS.
2282                if e.raw_os_error() != Some(libc::ENOSYS) {
2283                    return Err(e);
2284                }
2285            }
2286            node.add_upper_inode(ri, true).await;
2287        } else {
2288            error!("BUG: upper real inode is None after copy up");
2289        }
2290
2291        lower_layer
2292            .release(ctx, lower_inode, lower_handle, 0, 0, true)
2293            .await?;
2294
2295        Ok(Arc::clone(&node))
2296    }
2297
2298    /// Copies the specified node to the upper layer of the filesystem
2299    ///
2300    /// Performs different operations based on the node type:
2301    /// - **Directory**: Creates a corresponding directory in the upper layer
2302    /// - **Symbolic link**: Recursively copies to the upper layer
2303    /// - **Regular file**: Copies file content to the upper layer
2304    ///
2305    /// # Parameters
2306    /// * `ctx`: FUSE request context
2307    /// * `node`: Reference to the node to be copied
2308    ///
2309    /// # Returns
2310    /// Returns a reference to the upper-layer node on success, or an error on failure
2311    async fn copy_node_up(
2312        &self,
2313        ctx: Request,
2314        node: Arc<OverlayInode>,
2315    ) -> Result<Arc<OverlayInode>> {
2316        if node.in_upper_layer().await {
2317            return Ok(node);
2318        }
2319
2320        let st = node.stat64(ctx).await?;
2321        match st.attr.kind {
2322            FileType::Directory => {
2323                node.clone().create_upper_dir(ctx, None).await?;
2324                Ok(node)
2325            }
2326            FileType::Symlink => {
2327                // For symlink.
2328                self.copy_symlink_up(ctx, node).await
2329            }
2330            FileType::RegularFile => {
2331                // For regular file.
2332                self.copy_regfile_up(ctx, node).await
2333            }
2334            _ => {
2335                // For other file types. return error.
2336                Err(Error::from_raw_os_error(libc::EINVAL))
2337            }
2338        }
2339    }
2340
2341    /// recursively copy directory and all its contents to upper layer
2342    async fn copy_directory_up(
2343        &self,
2344        ctx: Request,
2345        node: Arc<OverlayInode>,
2346    ) -> Result<Arc<OverlayInode>> {
2347        // Ensure the directory itself is copied up first
2348        self.copy_node_up(ctx, node.clone()).await?;
2349
2350        // load directory to cache
2351        self.load_directory(ctx, &node).await?;
2352
2353        // go through all children
2354        let children = node.childrens.lock().await.clone();
2355        for (_name, child) in children.iter() {
2356            if _name == "." || _name == ".." {
2357                continue;
2358            }
2359            // jump over whiteout
2360            if child.whiteout.load(Ordering::Relaxed) {
2361                continue;
2362            }
2363            let st = child.stat64(ctx).await?;
2364            if !child.in_upper_layer().await {
2365                match st.attr.kind {
2366                    FileType::Directory => {
2367                        // recursively copy subdirectory
2368                        Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2369                    }
2370                    FileType::Symlink | FileType::RegularFile => {
2371                        // copy node up symlink or regular file
2372                        Box::pin(self.copy_node_up(ctx, child.clone())).await?;
2373                    }
2374                    _ => {
2375                        // other file types are ignored
2376                    }
2377                }
2378            } else if utils::is_dir(&st.attr.kind) {
2379                // If it is already in the upper layer, but the directory is not loaded,
2380                // ensure that its contents are also copied up recursively.
2381                Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2382            }
2383        }
2384
2385        Ok(node)
2386    }
2387
2388    async fn do_rm(&self, ctx: Request, parent: u64, name: &OsStr, dir: bool) -> Result<()> {
2389        // 1. Read-only mount guard
2390        if self.upper_layer.is_none() {
2391            return Err(Error::from_raw_os_error(libc::EROFS));
2392        }
2393
2394        // 2. Locate the parent Overlay Inode.
2395        // Find parent Overlay Inode.
2396        let pnode = self.lookup_node(ctx, parent, "").await?;
2397        if pnode.whiteout.load(Ordering::Relaxed) {
2398            return Err(Error::from_raw_os_error(libc::ENOENT));
2399        }
2400        let to_name = name.to_str().unwrap();
2401
2402        // 3. Locate the child Overlay Inode for the given name
2403        // Find the Overlay Inode for child with <name>.
2404        let node = self.lookup_node(ctx, parent, to_name).await?;
2405        if node.whiteout.load(Ordering::Relaxed) {
2406            // already deleted.
2407            return Err(Error::from_raw_os_error(libc::ENOENT));
2408        }
2409
2410        // 4. If removing a directory, ensure it is empty of real entries
2411        if dir {
2412            self.load_directory(ctx, &node).await?;
2413            let (count, whiteouts) = node.count_entries_and_whiteout(ctx).await?;
2414            trace!("entries: {count}, whiteouts: {whiteouts}\n");
2415            if count > 0 {
2416                return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
2417            }
2418
2419            // Delete all whiteouts.
2420            if whiteouts > 0 && node.in_upper_layer().await {
2421                self.empty_node_directory(ctx, Arc::clone(&node)).await?;
2422            }
2423
2424            trace!("whiteouts deleted!\n");
2425        }
2426
2427        // 5. Decide whether we need to create a whiteout entry
2428        // We'll filp this off if upper-layer unlink suffices or parent is opaque
2429        let need_whiteout = AtomicBool::new(true);
2430        let pnode = self.copy_node_up(ctx, Arc::clone(&pnode)).await?;
2431
2432        if node.upper_layer_only().await {
2433            need_whiteout.store(false, Ordering::Relaxed);
2434        }
2435
2436        let mut df = |parent_upper_inode: Option<Arc<RealInode>>| async {
2437            let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2438                error!(
2439                    "BUG: parent {} has no upper inode after copy up",
2440                    pnode.inode
2441                );
2442                Error::from_raw_os_error(libc::EINVAL)
2443            })?;
2444
2445            // Parent is opaque, it shadows everything in lower layers so no need to create extra whiteouts.
2446            if parent_real_inode.opaque {
2447                need_whiteout.store(false, Ordering::Relaxed);
2448            }
2449            if dir {
2450                parent_real_inode
2451                    .layer
2452                    .rmdir(ctx, parent_real_inode.inode, name)
2453                    .await?;
2454            } else {
2455                parent_real_inode
2456                    .layer
2457                    .unlink(ctx, parent_real_inode.inode, name)
2458                    .await?;
2459            }
2460
2461            Ok(false)
2462        };
2463
2464        // 6. Perform the unlink/rmdir operation and memory cleanup
2465        if node.in_upper_layer().await {
2466            pnode.handle_upper_inode_locked(&mut df).await?;
2467        }
2468        pnode.remove_child(name.to_str().unwrap()).await;
2469        let path = node.path.read().await.clone();
2470        self.remove_inode(node.inode, Some(path)).await;
2471
2472        // 7. If needed, create a entry in the upper layer to mask lower-layer files
2473        if need_whiteout.load(Ordering::Relaxed) {
2474            trace!("do_rm: creating whiteout\n");
2475            // pnode is copied up, so it has upper layer.
2476            pnode
2477                .handle_upper_inode_locked(
2478                    &mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2479                        let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2480                            error!(
2481                                "BUG: parent {} has no upper inode after copy up",
2482                                pnode.inode
2483                            );
2484                            Error::from_raw_os_error(libc::EINVAL)
2485                        })?;
2486
2487                        let child_ri = parent_real_inode.create_whiteout(ctx, to_name).await?; //FIXME..............
2488                        let path = format!("{}/{}", pnode.path.read().await, to_name);
2489                        let ino: u64 = self.alloc_inode(&path).await?;
2490                        let ovi = Arc::new(
2491                            OverlayInode::new_from_real_inode(to_name, ino, path.clone(), child_ri)
2492                                .await,
2493                        );
2494
2495                        self.insert_inode(ino, ovi.clone()).await;
2496                        pnode.insert_child(to_name, ovi.clone()).await;
2497                        Ok(false)
2498                    },
2499                )
2500                .await?;
2501        }
2502
2503        Ok(())
2504    }
2505
2506    async fn do_fsync(
2507        &self,
2508        ctx: Request,
2509        inode: Inode,
2510        datasync: bool,
2511        handle: Handle,
2512        syncdir: bool,
2513    ) -> Result<()> {
2514        // Use O_RDONLY flags which indicates no copy up.
2515        let data = self
2516            .get_data(ctx, Some(handle), inode, libc::O_RDONLY as u32)
2517            .await?;
2518
2519        trace!("do_fsync: got data for handle: {handle}, inode:{inode}");
2520
2521        match data.real_handle {
2522            // FIXME: need to test if inode matches corresponding handle?
2523            None => {
2524                trace!("do_fsync: no real handle found for handle: {handle}, inode:{inode}");
2525                Err(Error::from_raw_os_error(libc::ENOENT))
2526            }
2527            Some(ref rh) => {
2528                let real_handle = rh.handle.load(Ordering::Relaxed);
2529                // TODO: check if it's in upper layer? @weizhang555
2530                if syncdir {
2531                    trace!(
2532                        "do_fsync: layer.fsyncdir called for handle: {}, inode:{}; rh.inode: {}, real_handle: {}",
2533                        handle, inode, rh.inode, real_handle
2534                    );
2535                    rh.layer
2536                        .fsyncdir(ctx, rh.inode, real_handle, datasync)
2537                        .await
2538                        .map_err(|e| e.into())
2539                } else {
2540                    rh.layer
2541                        .fsync(ctx, rh.inode, real_handle, datasync)
2542                        .await
2543                        .map_err(|e| e.into())
2544                }
2545            }
2546        }
2547    }
2548
2549    // Delete everything in the directory only on upper layer, ignore lower layers.
2550    async fn empty_node_directory(&self, ctx: Request, node: Arc<OverlayInode>) -> Result<()> {
2551        let st = node.stat64(ctx).await?;
2552        if !utils::is_dir(&st.attr.kind) {
2553            // This function can only be called on directories.
2554            return Err(Error::from_raw_os_error(libc::ENOTDIR));
2555        }
2556
2557        let (layer, in_upper, inode) = node.first_layer_inode().await;
2558        if !in_upper {
2559            return Ok(());
2560        }
2561
2562        // Copy node.childrens Hashmap to Vector, the Vector is also used as temp storage,
2563        // Without this, Rust won't allow us to remove them from node.childrens.
2564        let iter = node
2565            .childrens
2566            .lock()
2567            .await
2568            .values()
2569            .cloned()
2570            .collect::<Vec<_>>();
2571
2572        for child in iter {
2573            // We only care about upper layer, ignore lower layers.
2574            if child.in_upper_layer().await {
2575                let child_name = child.name.read().await.clone();
2576                let child_name_os = OsStr::new(&child_name);
2577                if child.whiteout.load(Ordering::Relaxed) {
2578                    layer.delete_whiteout(ctx, inode, child_name_os).await?
2579                } else {
2580                    let s = child.stat64(ctx).await?;
2581                    let cname: &OsStr = OsStr::new(&child_name_os);
2582                    if utils::is_dir(&s.attr.kind) {
2583                        let (count, whiteouts) = child.count_entries_and_whiteout(ctx).await?;
2584                        if count + whiteouts > 0 {
2585                            let cb = child.clone();
2586                            Box::pin(async move { self.empty_node_directory(ctx, cb).await })
2587                                .await?;
2588                        }
2589                        layer.rmdir(ctx, inode, cname).await?
2590                    } else {
2591                        layer.unlink(ctx, inode, cname).await?;
2592                    }
2593                }
2594
2595                let cpath = child.path.read().await.clone();
2596                // delete the child
2597                self.remove_inode(child.inode, Some(cpath)).await;
2598                node.remove_child(&child_name).await;
2599            }
2600        }
2601
2602        Ok(())
2603    }
2604
2605    async fn find_real_info_from_handle(
2606        &self,
2607        handle: Handle,
2608    ) -> Result<(Arc<BoxedLayer>, Inode, Handle)> {
2609        match self.handles.lock().await.get(&handle) {
2610            Some(h) => match h.real_handle {
2611                Some(ref rhd) => {
2612                    trace!(
2613                        "find_real_info_from_handle: layer in upper: {}",
2614                        rhd.in_upper_layer
2615                    );
2616                    Ok((
2617                        rhd.layer.clone(),
2618                        rhd.inode,
2619                        rhd.handle.load(Ordering::Relaxed),
2620                    ))
2621                }
2622                None => Err(Error::from_raw_os_error(libc::ENOENT)),
2623            },
2624
2625            None => Err(Error::from_raw_os_error(libc::ENOENT)),
2626        }
2627    }
2628
2629    async fn find_real_inode(&self, inode: Inode) -> Result<(Arc<BoxedLayer>, Inode)> {
2630        if let Some(n) = self.get_active_inode(inode).await {
2631            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2632            return Ok((first_layer, first_inode));
2633        } else if let Some(n) = self.get_all_inode(inode).await {
2634            trace!("find_real_inode: found inode by get_all_inode: {}", n.inode);
2635            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2636            return Ok((first_layer, first_inode));
2637        }
2638
2639        Err(Error::from_raw_os_error(libc::ENOENT))
2640    }
2641
2642    async fn get_data(
2643        &self,
2644        ctx: Request,
2645        handle: Option<Handle>,
2646        inode: Inode,
2647        flags: u32,
2648    ) -> Result<Arc<HandleData>> {
2649        let no_open = self.no_open.load(Ordering::Relaxed);
2650        if !no_open {
2651            if let Some(h) = handle
2652                && let Some(v) = self.handles.lock().await.get(&h)
2653                && v.node.inode == inode
2654            {
2655                // trace!("get_data: found handle");
2656                return Ok(Arc::clone(v));
2657            }
2658        } else {
2659            let readonly: bool = flags
2660                & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
2661                    as u32
2662                == 0;
2663
2664            // lookup node
2665            let node = self.lookup_node(ctx, inode, "").await?;
2666
2667            // whiteout node
2668            if node.whiteout.load(Ordering::Relaxed) {
2669                return Err(Error::from_raw_os_error(libc::ENOENT));
2670            }
2671
2672            if !readonly {
2673                // Check if upper layer exists, return EROFS is not exists.
2674                self.upper_layer
2675                    .as_ref()
2676                    .cloned()
2677                    .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2678                // copy up to upper layer
2679                self.copy_node_up(ctx, Arc::clone(&node)).await?;
2680            }
2681
2682            let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
2683            let handle_data = HandleData {
2684                node: Arc::clone(&node),
2685                real_handle: Some(RealHandle {
2686                    layer,
2687                    in_upper_layer,
2688                    inode,
2689                    handle: AtomicU64::new(0),
2690                }),
2691            };
2692            return Ok(Arc::new(handle_data));
2693        }
2694
2695        Err(Error::from_raw_os_error(libc::ENOENT))
2696    }
2697
2698    // extend or init the inodes number to one overlay if the current number is done.
2699    pub async fn extend_inode_alloc(&self, key: u64) {
2700        let next_inode = key * INODE_ALLOC_BATCH;
2701        let limit_inode = next_inode + INODE_ALLOC_BATCH - 1;
2702        self.inodes
2703            .write()
2704            .await
2705            .extend_inode_number(next_inode, limit_inode);
2706    }
2707}
2708
2709/// Wrap the parameters for mounting overlay filesystem.
2710#[derive(Debug, Clone)]
2711pub struct OverlayArgs<P, Q, R, M, N, I>
2712where
2713    P: AsRef<Path>,
2714    Q: AsRef<Path>,
2715    R: AsRef<Path>,
2716    M: AsRef<str>,
2717    N: Into<String>,
2718    I: IntoIterator<Item = R>,
2719{
2720    pub mountpoint: P,
2721    pub upperdir: Q,
2722    pub lowerdir: I,
2723    pub privileged: bool,
2724    pub mapping: Option<M>,
2725    pub name: Option<N>,
2726    pub allow_other: bool,
2727}
2728
2729/// Mounts the filesystem using the given parameters and returns the mount handle.
2730///
2731/// # Parameters
2732/// - `mountpoint`: Path to the mount point.
2733/// - `upperdir`: Path to the upper directory.
2734/// - `lowerdir`: Paths to the lower directories.
2735/// - `privileged`: If true, use privileged mount; otherwise, unprivileged mount.
2736/// - `mapping`: Optional user/group ID mapping for unprivileged mounts.
2737/// - `name`: Optional name for the filesystem.
2738/// - `allow_other`: If true, allows other users to access the filesystem.
2739///
2740/// # Returns
2741/// A mount handle on success.
2742pub async fn mount_fs<P, Q, R, M, N, I>(
2743    args: OverlayArgs<P, Q, R, M, N, I>,
2744) -> rfuse3::raw::MountHandle
2745where
2746    P: AsRef<Path>,
2747    Q: AsRef<Path>,
2748    R: AsRef<Path>,
2749    M: AsRef<str>,
2750    N: Into<String>,
2751    I: IntoIterator<Item = R>,
2752{
2753    // Create lower layers
2754    let mut lower_layers = Vec::new();
2755    for lower in args.lowerdir {
2756        let layer = new_passthroughfs_layer(PassthroughArgs {
2757            root_dir: lower,
2758            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2759        })
2760        .await
2761        .expect("Failed to create lower filesystem layer");
2762        lower_layers.push(Arc::new(layer));
2763    }
2764    // Create upper layer
2765    let upper_layer = Arc::new(
2766        new_passthroughfs_layer(PassthroughArgs {
2767            root_dir: args.upperdir,
2768            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2769        })
2770        .await
2771        .expect("Failed to create upper filesystem layer"),
2772    );
2773
2774    // Configure overlay filesystem
2775    let config = Config {
2776        mountpoint: args.mountpoint.as_ref().to_path_buf(),
2777        do_import: true,
2778        ..Default::default()
2779    };
2780    let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1)
2781        .expect("Failed to initialize OverlayFs");
2782    let logfs = LoggingFileSystem::new(overlayfs);
2783
2784    let mount_path: OsString = OsString::from(args.mountpoint.as_ref().as_os_str());
2785
2786    // Obtain the current user's uid and gid
2787    let uid = unsafe { libc::getuid() };
2788    let gid = unsafe { libc::getgid() };
2789
2790    let mut mount_options = MountOptions::default();
2791    mount_options
2792        .force_readdir_plus(true)
2793        .uid(uid)
2794        .gid(gid)
2795        .allow_other(args.allow_other);
2796    if let Some(name) = args.name {
2797        mount_options.fs_name(name);
2798    }
2799
2800    // Mount filesystem based on privilege flag and return the mount handle
2801    if !args.privileged {
2802        debug!("Mounting with unprivileged mode");
2803        Session::new(mount_options)
2804            .mount_with_unprivileged(logfs, mount_path)
2805            .await
2806            .expect("Unprivileged mount failed")
2807    } else {
2808        debug!("Mounting with privileged mode");
2809        Session::new(mount_options)
2810            .mount(logfs, mount_path)
2811            .await
2812            .expect("Privileged mount failed")
2813    }
2814}