libfuse_fs/unionfs/
mod.rs

1// Copyright (C) 2023 Ant Group. All rights reserved.
2//  2024 From [fuse_backend_rs](https://github.com/cloud-hypervisor/fuse-backend-rs)
3// SPDX-License-Identifier: Apache-2.0
4
5#![allow(missing_docs)]
6mod async_io;
7pub mod config;
8mod inode_store;
9pub mod layer;
10mod utils;
11
12//mod tempfile;
13use core::panic;
14use std::collections::HashMap;
15use std::ffi::{OsStr, OsString};
16use std::future::Future;
17use std::io::{Error, Result};
18use std::path::Path;
19
20use config::Config;
21use futures::StreamExt as _;
22use rfuse3::raw::reply::{
23    DirectoryEntry, DirectoryEntryPlus, ReplyAttr, ReplyEntry, ReplyOpen, ReplyStatFs,
24};
25use rfuse3::raw::{DirectoryPlusStream, DirectoryStream, Request, Session};
26use std::sync::{Arc, Weak};
27use tracing::debug;
28use tracing::error;
29use tracing::info;
30use tracing::trace;
31
32use rfuse3::{Errno, FileType, MountOptions, mode_from_kind_and_perm};
33const SLASH_ASCII: char = '/';
34use futures::future::join_all;
35use futures::stream::iter;
36
37use crate::passthrough::newlogfs::LoggingFileSystem;
38use crate::passthrough::{PassthroughArgs, new_passthroughfs_layer};
39use crate::util::convert_stat64_to_file_attr;
40use inode_store::InodeStore;
41use layer::Layer;
42use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
43
44use tokio::sync::{Mutex, RwLock};
45
46pub type Inode = u64;
47pub type Handle = u64;
48
49pub(crate) type BoxedLayer = dyn Layer;
50//type BoxedFileSystem = Box<dyn FileSystem<Inode = Inode, Handle = Handle> + Send + Sync>;
51const INODE_ALLOC_BATCH: u64 = 0x1_0000_0000;
52// RealInode represents one inode object in specific layer.
53// Also, each RealInode maps to one Entry, which should be 'forgotten' after drop.
54// Important note: do not impl Clone trait for it or refcount will be messed up.
55pub(crate) struct RealInode {
56    pub layer: Arc<BoxedLayer>,
57    pub in_upper_layer: bool,
58    pub inode: u64,
59    // File is whiteouted, we need to hide it.
60    pub whiteout: bool,
61    // Directory is opaque, we need to hide all entries inside it.
62    pub opaque: bool,
63    pub stat: Option<ReplyAttr>,
64}
65
66// OverlayInode must be protected by lock, it can be operated by multiple threads.
67// #[derive(Default)]
68pub(crate) struct OverlayInode {
69    // Inode hash table, map from 'name' to 'OverlayInode'.
70    pub childrens: Mutex<HashMap<String, Arc<OverlayInode>>>,
71    pub parent: Mutex<Weak<OverlayInode>>,
72    // Backend inodes from all layers.
73    pub real_inodes: Mutex<Vec<Arc<RealInode>>>,
74    // Inode number.
75    pub inode: u64,
76    pub path: RwLock<String>,
77    pub name: RwLock<String>,
78    pub lookups: AtomicU64,
79    // Node is whiteout-ed.
80    pub whiteout: AtomicBool,
81    // Directory is loaded.
82    pub loaded: AtomicBool,
83}
84
85#[derive(Default)]
86pub enum CachePolicy {
87    Never,
88    #[default]
89    Auto,
90    Always,
91}
92pub struct OverlayFs {
93    config: Config,
94    lower_layers: Vec<Arc<BoxedLayer>>,
95    upper_layer: Option<Arc<BoxedLayer>>,
96    // All inodes in FS.
97    inodes: RwLock<InodeStore>,
98    // Open file handles.
99    handles: Mutex<HashMap<u64, Arc<HandleData>>>,
100    next_handle: AtomicU64,
101    writeback: AtomicBool,
102    no_open: AtomicBool,
103    no_opendir: AtomicBool,
104    killpriv_v2: AtomicBool,
105    perfile_dax: AtomicBool,
106    root_inodes: u64,
107}
108
109// This is a wrapper of one inode in specific layer, It can't impl Clone trait.
110struct RealHandle {
111    layer: Arc<BoxedLayer>,
112    in_upper_layer: bool,
113    inode: u64,
114    handle: AtomicU64,
115}
116
117struct HandleData {
118    node: Arc<OverlayInode>,
119    //offset: libc::off_t,
120    real_handle: Option<RealHandle>,
121}
122
123// RealInode is a wrapper of one inode in specific layer.
124// All layer operations returning Entry should be wrapped in RealInode implementation
125// so that we can increase the refcount(lookup count) of each inode and decrease it after Drop.
126// Important: do not impl 'Copy' trait for it or refcount will be messed up.
127impl RealInode {
128    async fn new(
129        layer: Arc<BoxedLayer>,
130        in_upper_layer: bool,
131        inode: u64,
132        whiteout: bool,
133        opaque: bool,
134    ) -> Self {
135        let mut ri = RealInode {
136            layer,
137            in_upper_layer,
138            inode,
139            whiteout,
140            opaque,
141            stat: None,
142        };
143        match ri.stat64_ignore_enoent(&Request::default()).await {
144            Ok(v) => {
145                ri.stat = v;
146            }
147            Err(e) => {
148                error!("stat64 failed during RealInode creation: {e}");
149            }
150        }
151        ri
152    }
153
154    async fn stat64(&self, req: &Request) -> Result<ReplyAttr> {
155        let layer = self.layer.as_ref();
156        if self.inode == 0 {
157            return Err(Error::from_raw_os_error(libc::ENOENT));
158        }
159        // trace!("stat64: trying to getattr req: {:?}", req);
160        layer
161            .getattr(*req, self.inode, None, 0)
162            .await
163            .map_err(|e| e.into())
164    }
165
166    async fn stat64_ignore_enoent(&self, req: &Request) -> Result<Option<ReplyAttr>> {
167        match self.stat64(req).await {
168            Ok(v1) => Ok(Some(v1)),
169            Err(e) => match e.raw_os_error() {
170                Some(raw_error) => {
171                    if raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG {
172                        return Ok(None);
173                    }
174                    Err(e)
175                }
176                None => Err(e),
177            },
178        }
179    }
180
181    // Do real lookup action in specific layer, this call will increase Entry refcount which must be released later.
182    async fn lookup_child_ignore_enoent(
183        &self,
184        ctx: Request,
185        name: &str,
186    ) -> Result<Option<ReplyEntry>> {
187        let cname = OsStr::new(name);
188        // Real inode must have a layer.
189        let layer = self.layer.as_ref();
190        match layer.lookup(ctx, self.inode, cname).await {
191            Ok(v) => {
192                // Negative entry also indicates missing entry.
193                if v.attr.ino == 0 {
194                    return Ok(None);
195                }
196                Ok(Some(v))
197            }
198            Err(e) => {
199                let ioerror: std::io::Error = e.into();
200                if let Some(raw_error) = ioerror.raw_os_error()
201                    && (raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG)
202                {
203                    return Ok(None);
204                }
205
206                Err(e.into())
207            }
208        }
209    }
210
211    // Find child inode in same layer under this directory(Self).
212    // Return None if not found.
213    async fn lookup_child(&self, ctx: Request, name: &str) -> Result<Option<RealInode>> {
214        if self.whiteout {
215            return Ok(None);
216        }
217
218        let layer = self.layer.as_ref();
219
220        // Find child Entry with <name> under directory with inode <self.inode>.
221        match self.lookup_child_ignore_enoent(ctx, name).await? {
222            Some(v) => {
223                // The Entry must be forgotten in each layer, which will be done automatically by Drop operation.
224                let (whiteout, opaque) = if v.attr.kind == FileType::Directory {
225                    (false, layer.is_opaque(ctx, v.attr.ino).await?)
226                } else {
227                    (layer.is_whiteout(ctx, v.attr.ino).await?, false)
228                };
229
230                Ok(Some(RealInode {
231                    layer: self.layer.clone(),
232                    in_upper_layer: self.in_upper_layer,
233                    inode: v.attr.ino,
234                    whiteout,
235                    opaque,
236                    stat: Some(ReplyAttr {
237                        ttl: v.ttl,
238                        attr: v.attr,
239                    }),
240                }))
241            }
242            None => Ok(None),
243        }
244    }
245
246    // Read directory entries from specific RealInode, error out if it's not directory.
247    async fn readdir(&self, ctx: Request) -> Result<HashMap<String, RealInode>> {
248        // Deleted inode should not be read.
249        if self.whiteout {
250            return Err(Error::from_raw_os_error(libc::ENOENT));
251        }
252        // trace!("readdir: before stat");
253        let stat = match self.stat.clone() {
254            Some(v) => v,
255            None => self.stat64(&ctx).await?,
256        };
257
258        // Must be directory.
259        if stat.attr.kind != FileType::Directory {
260            return Err(Error::from_raw_os_error(libc::ENOTDIR));
261        }
262
263        // Open the directory and load each entry.
264        let opendir_res = self
265            .layer
266            .opendir(ctx, self.inode, libc::O_RDONLY as u32)
267            .await;
268        // trace!("readdir: after opendir");
269        let handle = match opendir_res {
270            Ok(handle) => handle,
271
272            // opendir may not be supported if no_opendir is set, so we can ignore this error.
273            Err(e) => {
274                let ioerror: std::io::Error = e.into();
275                match ioerror.raw_os_error() {
276                    Some(raw_error) => {
277                        if raw_error == libc::ENOSYS {
278                            // We can still call readdir with inode if opendir is not supported in this layer.
279                            ReplyOpen { fh: 0, flags: 0 }
280                        } else {
281                            return Err(e.into());
282                        }
283                    }
284                    None => {
285                        return Err(e.into());
286                    }
287                }
288            }
289        };
290
291        let child_names = self.layer.readdir(ctx, self.inode, handle.fh, 0).await?;
292        // Non-zero handle indicates successful 'open', we should 'release' it.
293        if handle.fh > 0 {
294            self.layer
295                .releasedir(ctx, self.inode, handle.fh, handle.flags)
296                .await?
297            //DIFF
298        }
299
300        // Lookup all child and construct "RealInode"s.
301        let child_real_inodes = Arc::new(Mutex::new(HashMap::new()));
302        // trace!("readdir: before iter childrens");
303        let a_map = child_names.entries.map(|entery| async {
304            match entery {
305                Ok(dire) => {
306                    let dname = dire.name.into_string().unwrap();
307                    if dname == "." || dname == ".." {
308                        // Skip . and .. entries.
309                        return Ok(());
310                    }
311                    // trace!("readdir: before lookup child: dname={}", dname);
312                    if let Some(child) = self.lookup_child(ctx, &dname).await? {
313                        child_real_inodes.lock().await.insert(dname, child);
314                    }
315                    Ok(())
316                }
317                Err(err) => Err(err),
318            }
319        });
320        let k = join_all(a_map.collect::<Vec<_>>().await).await;
321        drop(k);
322        // Now into_inner func is safety.
323        let re = Arc::try_unwrap(child_real_inodes)
324            .map_err(|_| Errno::new_not_exist())?
325            .into_inner();
326        // trace!("readdir: return");
327        Ok(re)
328    }
329
330    async fn create_whiteout(&self, ctx: Request, name: &str) -> Result<RealInode> {
331        if !self.in_upper_layer {
332            return Err(Error::from_raw_os_error(libc::EROFS));
333        }
334
335        // from &str to &OsStr
336        let name_osstr = OsStr::new(name);
337        let entry = self
338            .layer
339            .create_whiteout(ctx, self.inode, name_osstr)
340            .await?;
341
342        // Wrap whiteout to RealInode.
343        Ok(RealInode {
344            layer: self.layer.clone(),
345            in_upper_layer: true,
346            inode: entry.attr.ino,
347            whiteout: true,
348            opaque: false,
349            stat: Some(ReplyAttr {
350                ttl: entry.ttl,
351                attr: entry.attr,
352            }),
353        })
354    }
355
356    async fn mkdir(&self, ctx: Request, name: &str, mode: u32, umask: u32) -> Result<RealInode> {
357        if !self.in_upper_layer {
358            return Err(Error::from_raw_os_error(libc::EROFS));
359        }
360
361        let name_osstr = OsStr::new(name);
362        let entry = self
363            .layer
364            .mkdir(ctx, self.inode, name_osstr, mode, umask)
365            .await?;
366
367        // update node's first_layer
368        Ok(RealInode {
369            layer: self.layer.clone(),
370            in_upper_layer: true,
371            inode: entry.attr.ino,
372            whiteout: false,
373            opaque: false,
374            stat: Some(ReplyAttr {
375                ttl: entry.ttl,
376                attr: entry.attr,
377            }),
378        })
379    }
380
381    async fn create(
382        &self,
383        ctx: Request,
384        name: &str,
385        mode: u32,
386        flags: u32,
387    ) -> Result<(RealInode, Option<u64>)> {
388        if !self.in_upper_layer {
389            return Err(Error::from_raw_os_error(libc::EROFS));
390        }
391        let name = OsStr::new(name);
392        let create_rep = self
393            .layer
394            .create(ctx, self.inode, name, mode, flags)
395            .await?;
396
397        Ok((
398            RealInode {
399                layer: self.layer.clone(),
400                in_upper_layer: true,
401                inode: create_rep.attr.ino,
402                whiteout: false,
403                opaque: false,
404                stat: Some(ReplyAttr {
405                    ttl: create_rep.ttl,
406                    attr: create_rep.attr,
407                }),
408            },
409            Some(create_rep.fh),
410        ))
411    }
412
413    async fn mknod(
414        &self,
415        ctx: Request,
416        name: &str,
417        mode: u32,
418        rdev: u32,
419        _umask: u32,
420    ) -> Result<RealInode> {
421        if !self.in_upper_layer {
422            return Err(Error::from_raw_os_error(libc::EROFS));
423        }
424        let name = OsStr::new(name);
425        let rep = self.layer.mknod(ctx, self.inode, name, mode, rdev).await?;
426        Ok(RealInode {
427            layer: self.layer.clone(),
428            in_upper_layer: true,
429            inode: rep.attr.ino,
430            whiteout: false,
431            opaque: false,
432            stat: Some(ReplyAttr {
433                ttl: rep.ttl,
434                attr: rep.attr,
435            }),
436        })
437    }
438
439    async fn link(&self, ctx: Request, ino: u64, name: &str) -> Result<RealInode> {
440        if !self.in_upper_layer {
441            return Err(Error::from_raw_os_error(libc::EROFS));
442        }
443        let name = OsStr::new(name);
444        let entry = self.layer.link(ctx, ino, self.inode, name).await?;
445
446        let opaque = if utils::is_dir(&entry.attr.kind) {
447            self.layer.is_opaque(ctx, entry.attr.ino).await?
448        } else {
449            false
450        };
451        Ok(RealInode {
452            layer: self.layer.clone(),
453            in_upper_layer: true,
454            inode: entry.attr.ino,
455            whiteout: false,
456            opaque,
457            stat: Some(ReplyAttr {
458                ttl: entry.ttl,
459                attr: entry.attr,
460            }),
461        })
462    }
463
464    // Create a symlink in self directory.
465    async fn symlink(&self, ctx: Request, link_name: &str, filename: &str) -> Result<RealInode> {
466        if !self.in_upper_layer {
467            return Err(Error::from_raw_os_error(libc::EROFS));
468        }
469        let link_name = OsStr::new(link_name);
470        let filename = OsStr::new(filename);
471        let entry = self
472            .layer
473            .symlink(ctx, self.inode, filename, link_name)
474            .await?;
475
476        Ok(RealInode {
477            layer: self.layer.clone(),
478            in_upper_layer: true,
479            inode: entry.attr.ino,
480            whiteout: false,
481            opaque: false,
482            stat: Some(ReplyAttr {
483                ttl: entry.ttl,
484                attr: entry.attr,
485            }),
486        })
487    }
488}
489
490impl Drop for RealInode {
491    fn drop(&mut self) {
492        let layer = Arc::clone(&self.layer);
493        let inode = self.inode;
494        tokio::spawn(async move {
495            let ctx = Request::default();
496            layer.forget(ctx, inode, 1).await;
497        });
498    }
499}
500
501impl OverlayInode {
502    pub fn new() -> Self {
503        Self {
504            childrens: Mutex::new(HashMap::new()),
505            parent: Mutex::new(Weak::new()),
506            real_inodes: Mutex::new(vec![]),
507            inode: 0,
508            path: RwLock::new(String::new()),
509            name: RwLock::new(String::new()),
510            lookups: AtomicU64::new(0),
511            whiteout: AtomicBool::new(false),
512            loaded: AtomicBool::new(false),
513        }
514    }
515    // Allocate new OverlayInode based on one RealInode,
516    // inode number is always 0 since only OverlayFs has global unique inode allocator.
517    pub async fn new_from_real_inode(
518        name: &str,
519        ino: u64,
520        path: String,
521        real_inode: RealInode,
522    ) -> Self {
523        let mut new = OverlayInode::new();
524        new.inode = ino;
525        new.path = path.into();
526        new.name = name.to_string().into();
527        new.whiteout.store(real_inode.whiteout, Ordering::Relaxed);
528        new.lookups = AtomicU64::new(1);
529        new.real_inodes = Mutex::new(vec![real_inode.into()]);
530        new
531    }
532
533    pub async fn new_from_real_inodes(
534        name: &str,
535        ino: u64,
536        path: String,
537        real_inodes: Vec<RealInode>,
538    ) -> Result<Self> {
539        if real_inodes.is_empty() {
540            error!("BUG: new_from_real_inodes() called with empty real_inodes");
541            return Err(Error::from_raw_os_error(libc::EINVAL));
542        }
543
544        let mut first = true;
545        let mut new = Self::new();
546        for ri in real_inodes {
547            let whiteout = ri.whiteout;
548            let opaque = ri.opaque;
549            let stat = match &ri.stat {
550                Some(v) => v.clone(),
551                None => ri.stat64(&Request::default()).await?,
552            };
553
554            if first {
555                first = false;
556                new = Self::new_from_real_inode(name, ino, path.clone(), ri).await;
557
558                // This is whiteout, no need to check lower layers.
559                if whiteout {
560                    break;
561                }
562
563                // A non-directory file shadows all lower layers as default.
564                if !utils::is_dir(&stat.attr.kind) {
565                    break;
566                }
567
568                // Opaque directory shadows all lower layers.
569                if opaque {
570                    break;
571                }
572            } else {
573                // This is whiteout, no need to record this, break directly.
574                if ri.whiteout {
575                    break;
576                }
577
578                // Only directory have multiple real inodes, so if this is non-first real-inode
579                // and it's not directory, it should indicates some invalid layout. @weizhang555
580                if !utils::is_dir(&stat.attr.kind) {
581                    error!("invalid layout: non-directory has multiple real inodes");
582                    break;
583                }
584
585                // Valid directory.
586                new.real_inodes.lock().await.push(ri.into());
587                // Opaque directory shadows all lower layers.
588                if opaque {
589                    break;
590                }
591            }
592        }
593        Ok(new)
594    }
595
596    pub async fn stat64(&self, ctx: Request) -> Result<ReplyAttr> {
597        // try layers in order or just take stat from first layer?
598        for l in self.real_inodes.lock().await.iter() {
599            if let Some(v) = l.stat64_ignore_enoent(&ctx).await? {
600                return Ok(v);
601            }
602        }
603
604        // not in any layer
605        Err(Error::from_raw_os_error(libc::ENOENT))
606    }
607
608    pub async fn is_dir(&self, ctx: Request) -> Result<bool> {
609        let st = self.stat64(ctx).await?;
610        Ok(utils::is_dir(&st.attr.kind))
611    }
612
613    pub async fn count_entries_and_whiteout(&self, ctx: Request) -> Result<(u64, u64)> {
614        let mut count = 0;
615        let mut whiteouts = 0;
616
617        let st = self.stat64(ctx).await?;
618
619        // must be directory
620        if !utils::is_dir(&st.attr.kind) {
621            return Err(Error::from_raw_os_error(libc::ENOTDIR));
622        }
623
624        for (_, child) in self.childrens.lock().await.iter() {
625            if child.whiteout.load(Ordering::Relaxed) {
626                whiteouts += 1;
627            } else {
628                count += 1;
629            }
630        }
631        Ok((count, whiteouts))
632    }
633
634    pub async fn open(
635        &self,
636        ctx: Request,
637        flags: u32,
638        _fuse_flags: u32,
639    ) -> Result<(Arc<BoxedLayer>, ReplyOpen)> {
640        let (layer, _, inode) = self.first_layer_inode().await;
641        let ro = layer.as_ref().open(ctx, inode, flags).await?;
642        Ok((layer, ro))
643    }
644
645    // Self is directory, fill all childrens.
646    pub async fn scan_childrens(self: &Arc<Self>, ctx: Request) -> Result<Vec<OverlayInode>> {
647        let st = self.stat64(ctx).await?;
648        if !utils::is_dir(&st.attr.kind) {
649            return Err(Error::from_raw_os_error(libc::ENOTDIR));
650        }
651
652        let mut all_layer_inodes: HashMap<String, Vec<RealInode>> = HashMap::new();
653        // read out directories from each layer
654        // Scan from upper layer to lower layer.
655        for ri in self.real_inodes.lock().await.iter() {
656            if ri.whiteout {
657                // Node is deleted from some upper layer, skip it.
658                debug!("directory is whiteout");
659                break;
660            }
661
662            let stat = match &ri.stat {
663                Some(v) => v.clone(),
664                None => ri.stat64(&ctx).await?,
665            };
666
667            if !utils::is_dir(&stat.attr.kind) {
668                debug!("{} is not a directory", self.path.read().await);
669                // not directory
670                break;
671            }
672
673            // Read all entries from one layer.
674            let entries: HashMap<String, RealInode> = ri.readdir(ctx).await?;
675
676            // Merge entries from one layer to all_layer_inodes.
677            for (name, inode) in entries {
678                match all_layer_inodes.get_mut(&name) {
679                    Some(v) => {
680                        // Append additional RealInode to the end of vector.
681                        v.push(inode)
682                    }
683                    None => {
684                        all_layer_inodes.insert(name, vec![inode]);
685                    }
686                }
687            }
688
689            // if opaque, stop here
690            if ri.opaque {
691                debug!("directory {} is opaque", self.path.read().await);
692                break;
693            }
694        }
695
696        // Construct OverlayInode for each entry.
697        let mut childrens = vec![];
698        for (name, real_inodes) in all_layer_inodes {
699            // Inode numbers are not allocated yet.
700            let path = format!("{}/{}", self.path.read().await, name);
701            let new = Self::new_from_real_inodes(name.as_str(), 0, path, real_inodes).await?;
702            childrens.push(new);
703        }
704
705        Ok(childrens)
706    }
707
708    /// Create a new directory in upper layer for node, node must be directory.
709    ///
710    /// Recursively ensures a directory path exists in the upper layer.
711    ///
712    /// This function is a critical part of the copy-up process. When a file or directory
713    /// needs to be copied up, this function is called on its parent to ensure the entire
714    /// directory hierarchy exists in the upper layer first. It works recursively:
715    /// 1. If the current directory is already in the upper layer, it does nothing.
716    /// 2. If not, it first calls itself on its own parent directory.
717    /// 3. Once the parent is guaranteed to be in the upper layer, it creates the current
718    ///    directory within the parent's upper-layer representation.
719    ///
720    /// Crucially, it preserves the original directory's ownership (UID/GID) and permissions
721    /// by using [`do_getattr_helper`][crate::unionfs::layer::Layer::do_getattr_helper] and
722    /// [`mkdir_with_context`][crate::unionfs::layer::Layer::mkdir_with_context] with [`OperationContext`][crate::context::OperationContext].
723    pub async fn create_upper_dir(
724        self: Arc<Self>,
725        ctx: Request,
726        mode_umask: Option<(u32, u32)>,
727    ) -> Result<()> {
728        // To preserve original ownership, we must get the raw, unmapped host attributes.
729        // We achieve this by calling `do_getattr_helper`, which is specifically designed
730        // to bypass the ID mapping logic. This is safe and does not affect other
731        // functionalities because `do_getattr_helper` and the standard `stat64()` call
732        // both rely on the same underlying `stat` system call; they only differ in
733        // whether the resulting `uid` and `gid` are mapped.
734        let (self_layer, _, self_inode) = self.first_layer_inode().await;
735        let re = self_layer.do_getattr_helper(self_inode, None).await?;
736        let st = ReplyAttr {
737            ttl: re.1,
738            attr: convert_stat64_to_file_attr(re.0),
739        };
740        if !utils::is_dir(&st.attr.kind) {
741            return Err(Error::from_raw_os_error(libc::ENOTDIR));
742        }
743
744        // If node already has upper layer, we can just return here.
745        if self.in_upper_layer().await {
746            return Ok(());
747        }
748
749        // not in upper layer, check parent.
750        let pnode = if let Some(n) = self.parent.lock().await.upgrade() {
751            Arc::clone(&n)
752        } else {
753            return Err(Error::other("no parent?"));
754        };
755
756        if !pnode.in_upper_layer().await {
757            Box::pin(pnode.clone().create_upper_dir(ctx, None)).await?; // recursive call
758        }
759        let child: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
760        let c_name = self.name.read().await.clone();
761        let _ = pnode
762            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
763                match parent_upper_inode {
764                    Some(parent_ri) => {
765                        let ri = match mode_umask {
766                            // We manually unfold the `mkdir` logic here instead of calling the `mkdir` method directly.
767                            // This is necessary to preserve the original directory's UID and GID during the copy-up process.
768                            Some((mode, umask)) => {
769                                if !parent_ri.in_upper_layer {
770                                    return Err(Error::from_raw_os_error(libc::EROFS));
771                                }
772                                let name_osstr = OsStr::new(&c_name);
773                                let op_ctx = crate::context::OperationContext::with_credentials(
774                                    ctx,
775                                    st.attr.uid,
776                                    st.attr.gid,
777                                );
778                                let entry = parent_ri
779                                    .layer
780                                    .mkdir_with_context(
781                                        op_ctx,
782                                        parent_ri.inode,
783                                        name_osstr,
784                                        mode,
785                                        umask,
786                                    )
787                                    .await?;
788                                RealInode {
789                                    layer: parent_ri.layer.clone(),
790                                    in_upper_layer: true,
791                                    inode: entry.attr.ino,
792                                    whiteout: false,
793                                    opaque: false,
794                                    stat: Some(ReplyAttr {
795                                        ttl: entry.ttl,
796                                        attr: entry.attr,
797                                    }),
798                                }
799                            }
800                            None => {
801                                if !parent_ri.in_upper_layer {
802                                    return Err(Error::from_raw_os_error(libc::EROFS));
803                                }
804                                let name_osstr = OsStr::new(&c_name);
805                                let op_ctx = crate::context::OperationContext::with_credentials(
806                                    ctx,
807                                    st.attr.uid,
808                                    st.attr.gid,
809                                );
810                                let entry = parent_ri
811                                    .layer
812                                    .mkdir_with_context(
813                                        op_ctx,
814                                        parent_ri.inode,
815                                        name_osstr,
816                                        mode_from_kind_and_perm(st.attr.kind, st.attr.perm),
817                                        0,
818                                    )
819                                    .await?;
820                                RealInode {
821                                    layer: parent_ri.layer.clone(),
822                                    in_upper_layer: true,
823                                    inode: entry.attr.ino,
824                                    whiteout: false,
825                                    opaque: false,
826                                    stat: Some(ReplyAttr {
827                                        ttl: entry.ttl,
828                                        attr: entry.attr,
829                                    }),
830                                }
831                            }
832                        };
833                        // create directory here
834                        child.lock().await.replace(ri);
835                    }
836                    None => {
837                        error!(
838                            "BUG: parent {} has no upper inode after create_upper_dir",
839                            pnode.inode
840                        );
841                        return Err(Error::from_raw_os_error(libc::EINVAL));
842                    }
843                }
844                Ok(false)
845            })
846            .await?;
847
848        if let Some(ri) = child.lock().await.take() {
849            // Push the new real inode to the front of vector.
850            self.add_upper_inode(ri, false).await;
851        }
852
853        Ok(())
854    }
855
856    // Add new upper RealInode to OverlayInode, clear all lower RealInodes if 'clear_lowers' is true.
857    async fn add_upper_inode(self: &Arc<Self>, ri: RealInode, clear_lowers: bool) {
858        let mut inodes = self.real_inodes.lock().await;
859        // Update self according to upper attribute.
860        self.whiteout.store(ri.whiteout, Ordering::Relaxed);
861
862        // Push the new real inode to the front of vector.
863        let mut new = vec![Arc::new(ri)];
864        // Drain lower RealInodes.
865        let lowers = inodes.drain(..).collect::<Vec<Arc<RealInode>>>();
866        if !clear_lowers {
867            // If not clear lowers, append them to the end of vector.
868            new.extend(lowers);
869        }
870        inodes.extend(new);
871    }
872
873    // return the uppder layer fs.
874    pub async fn in_upper_layer(&self) -> bool {
875        let all_inodes = self.real_inodes.lock().await;
876        let first = all_inodes.first();
877        match first {
878            Some(v) => v.in_upper_layer,
879            None => false,
880        }
881    }
882
883    pub async fn upper_layer_only(&self) -> bool {
884        let real_inodes = self.real_inodes.lock().await;
885        let first = real_inodes.first();
886        match first {
887            Some(v) => {
888                if !v.in_upper_layer {
889                    false
890                } else {
891                    real_inodes.len() == 1
892                }
893            }
894            None => false,
895        }
896    }
897
898    pub async fn first_layer_inode(&self) -> (Arc<BoxedLayer>, bool, u64) {
899        let all_inodes = self.real_inodes.lock().await;
900        let first = all_inodes.first();
901        match first {
902            Some(v) => (v.layer.clone(), v.in_upper_layer, v.inode),
903            None => panic!("BUG: dangling OverlayInode"),
904        }
905    }
906
907    pub async fn child(&self, name: &str) -> Option<Arc<OverlayInode>> {
908        self.childrens.lock().await.get(name).cloned()
909    }
910
911    pub async fn remove_child(&self, name: &str) -> Option<Arc<OverlayInode>> {
912        self.childrens.lock().await.remove(name)
913    }
914
915    pub async fn insert_child(&self, name: &str, node: Arc<OverlayInode>) {
916        self.childrens.lock().await.insert(name.to_string(), node);
917    }
918
919    /// Handles operations on the upper layer inode of an `OverlayInode` in a thread-safe manner.
920    ///
921    /// This function locks the `real_inodes` field of the `OverlayInode` and retrieves the first
922    /// real inode (if any). If the first inode exists and belongs to the upper layer (`in_upper_layer` is true),
923    /// the provided callback `f` is invoked with the inode wrapped in `Some`. Otherwise, `f` is invoked with `None`.
924    ///
925    /// # Arguments
926    /// * `f`: A closure that takes an `Option<RealInode>` and returns a future. The future resolves to a `Result<bool>`.
927    ///
928    /// # Returns
929    /// * `Ok(bool)`: The result of invoking the callback `f`.
930    /// * `Err(Erron)`: An error is returned if:
931    ///   - There are no backend inodes (`real_inodes` is empty), indicating a dangling `OverlayInode`.
932    ///   - The callback `f` itself returns an error.
933    ///
934    /// # Behavior
935    /// 1. Locks the `real_inodes` field to ensure thread safety.
936    /// 2. Checks if the first inode exists:
937    ///    - If it exists and is in the upper layer, invokes `f(Some(inode))`.
938    ///    - If it exists but is not in the upper layer, invokes `f(None)`.
939    /// 3. If no inodes exist, returns an error indicating a dangling `OverlayInode`.
940    ///
941    /// # Example Use Case
942    /// This function is typically used to perform operations on the upper layer inode of an `OverlayInode`,
943    /// such as creating, modifying, or deleting files/directories in the overlay filesystem's upper layer.
944    pub async fn handle_upper_inode_locked<F, Fut>(&self, f: F) -> Result<bool>
945    where
946        // Can pass a &RealInode (or None) to f for any lifetime 'a
947        F: FnOnce(Option<Arc<RealInode>>) -> Fut,
948        // f returns a Future that must live at least as long as 'a
949        Fut: Future<Output = Result<bool>>,
950    {
951        let all_inodes = self.real_inodes.lock().await;
952        let first = all_inodes.first();
953        match first {
954            Some(v) => {
955                if v.in_upper_layer {
956                    f(Some(v.clone())).await
957                } else {
958                    f(None).await
959                }
960            }
961            None => Err(Error::other(format!(
962                "BUG: dangling OverlayInode {} without any backend inode",
963                self.inode
964            ))),
965        }
966    }
967}
968#[allow(unused)]
969fn entry_type_from_mode(mode: libc::mode_t) -> u8 {
970    match mode & libc::S_IFMT {
971        libc::S_IFBLK => libc::DT_BLK,
972        libc::S_IFCHR => libc::DT_CHR,
973        libc::S_IFDIR => libc::DT_DIR,
974        libc::S_IFIFO => libc::DT_FIFO,
975        libc::S_IFLNK => libc::DT_LNK,
976        libc::S_IFREG => libc::DT_REG,
977        libc::S_IFSOCK => libc::DT_SOCK,
978        _ => libc::DT_UNKNOWN,
979    }
980}
981impl OverlayFs {
982    pub fn new(
983        upper: Option<Arc<BoxedLayer>>,
984        lowers: Vec<Arc<BoxedLayer>>,
985        params: Config,
986        root_inode: u64,
987    ) -> Result<Self> {
988        // load root inode
989        Ok(OverlayFs {
990            config: params,
991            lower_layers: lowers,
992            upper_layer: upper,
993            inodes: RwLock::new(InodeStore::new()),
994            handles: Mutex::new(HashMap::new()),
995            next_handle: AtomicU64::new(1),
996            writeback: AtomicBool::new(false),
997            no_open: AtomicBool::new(false),
998            no_opendir: AtomicBool::new(false),
999            killpriv_v2: AtomicBool::new(false),
1000            perfile_dax: AtomicBool::new(false),
1001            root_inodes: root_inode,
1002        })
1003    }
1004
1005    pub fn root_inode(&self) -> Inode {
1006        self.root_inodes
1007    }
1008
1009    async fn alloc_inode(&self, path: &str) -> Result<u64> {
1010        self.inodes.write().await.alloc_inode(path)
1011    }
1012
1013    /// Add a file layer and stack and merge the previous file layers.
1014    pub async fn push_layer(&mut self, layer: Arc<BoxedLayer>) -> Result<()> {
1015        let upper = self.upper_layer.take();
1016        if let Some(upper) = upper {
1017            self.lower_layers.push(upper);
1018        }
1019        self.upper_layer = Some(layer);
1020        // TODO: merge previous file layers. need optimization
1021        self.import().await?;
1022        Ok(())
1023    }
1024
1025    pub async fn import(&self) -> Result<()> {
1026        let mut root = OverlayInode::new();
1027        root.inode = self.root_inode();
1028        root.path = String::from("").into();
1029        root.name = String::from("").into();
1030        root.lookups = AtomicU64::new(2);
1031        root.real_inodes = Mutex::new(vec![]);
1032        let ctx = Request::default();
1033
1034        // Update upper inode
1035        if let Some(layer) = self.upper_layer.as_ref() {
1036            let ino = layer.root_inode();
1037            let real = RealInode::new(
1038                layer.clone(),
1039                true,
1040                ino,
1041                false,
1042                layer.is_opaque(ctx, ino).await?,
1043            )
1044            .await;
1045            root.real_inodes.lock().await.push(real.into());
1046        }
1047
1048        // Update lower inodes.
1049        for layer in self.lower_layers.iter() {
1050            let ino = layer.root_inode();
1051            let real: RealInode = RealInode::new(
1052                layer.clone(),
1053                false,
1054                ino,
1055                false,
1056                layer.is_opaque(ctx, ino).await?,
1057            )
1058            .await;
1059            root.real_inodes.lock().await.push(real.into());
1060        }
1061        let root_node = Arc::new(root);
1062
1063        // insert root inode into hash
1064        self.insert_inode(self.root_inode(), Arc::clone(&root_node))
1065            .await;
1066
1067        info!("loading root directory");
1068        self.load_directory(ctx, &root_node).await?;
1069        info!("loaded root directory");
1070
1071        Ok(())
1072    }
1073
1074    async fn root_node(&self) -> Arc<OverlayInode> {
1075        // Root node must exist.
1076        self.get_active_inode(self.root_inode()).await.unwrap()
1077    }
1078
1079    async fn insert_inode(&self, inode: u64, node: Arc<OverlayInode>) {
1080        self.inodes.write().await.insert_inode(inode, node).await;
1081    }
1082
1083    async fn get_active_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1084        self.inodes.read().await.get_inode(inode)
1085    }
1086
1087    // Get inode which is active or deleted.
1088    async fn get_all_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1089        let inode_store = self.inodes.read().await;
1090        match inode_store.get_inode(inode) {
1091            Some(n) => Some(n),
1092            None => inode_store.get_deleted_inode(inode),
1093        }
1094    }
1095
1096    // Return the inode only if it's permanently deleted from both self.inodes and self.deleted_inodes.
1097    async fn remove_inode(
1098        &self,
1099        inode: u64,
1100        path_removed: Option<String>,
1101    ) -> Option<Arc<OverlayInode>> {
1102        self.inodes
1103            .write()
1104            .await
1105            .remove_inode(inode, path_removed)
1106            .await
1107    }
1108
1109    // Lookup child OverlayInode with <name> under <parent> directory.
1110    // If name is empty, return parent itself.
1111    // Parent dir will be loaded, but returned OverlayInode won't.
1112    async fn lookup_node(
1113        &self,
1114        ctx: Request,
1115        parent: Inode,
1116        name: &str,
1117    ) -> Result<Arc<OverlayInode>> {
1118        if name.contains(SLASH_ASCII) {
1119            return Err(Error::from_raw_os_error(libc::EINVAL));
1120        }
1121
1122        // Parent inode is expected to be loaded before this function is called.
1123        // TODO: Is this correct?
1124        let pnode = match self.get_active_inode(parent).await {
1125            Some(v) => v,
1126            None => {
1127                match self.get_all_inode(parent).await {
1128                    Some(v) => {
1129                        trace!(
1130                            "overlayfs:mod.rs:1031:lookup_node: parent inode {parent} is deleted"
1131                        );
1132                        v
1133                    }
1134                    None => {
1135                        trace!(
1136                            "overlayfs:mod.rs:1034:lookup_node: parent inode {parent} not found"
1137                        );
1138                        // Parent inode is not found, return ENOENT.
1139                        return Err(Error::from_raw_os_error(libc::ENOENT));
1140                    }
1141                }
1142            }
1143        };
1144
1145        // Parent is whiteout-ed, return ENOENT.
1146        if pnode.whiteout.load(Ordering::Relaxed) {
1147            return Err(Error::from_raw_os_error(libc::ENOENT));
1148        }
1149
1150        let st = pnode.stat64(ctx).await?;
1151        if utils::is_dir(&st.attr.kind) && !pnode.loaded.load(Ordering::Relaxed) {
1152            // Parent is expected to be directory, load it first.
1153            self.load_directory(ctx, &pnode).await?;
1154        }
1155
1156        // Current file or dir.
1157        if name.eq(".")  
1158            // Root directory has no parent.
1159            || (parent == self.root_inode() && name.eq("..")) 
1160            // Special convention: empty name indicates current dir.
1161            || name.is_empty()
1162        {
1163            return Ok(Arc::clone(&pnode));
1164        }
1165
1166        match pnode.child(name).await {
1167            // Child is found.
1168            Some(v) => Ok(v),
1169            None => {
1170                trace!("lookup_node: child {name} not found");
1171                Err(Error::from_raw_os_error(libc::ENOENT))
1172            }
1173        }
1174    }
1175
1176    async fn lookup_node_ignore_enoent(
1177        &self,
1178        ctx: Request,
1179        parent: u64,
1180        name: &str,
1181    ) -> Result<Option<Arc<OverlayInode>>> {
1182        match self.lookup_node(ctx, parent, name).await {
1183            Ok(n) => Ok(Some(Arc::clone(&n))),
1184            Err(e) => {
1185                if let Some(raw_error) = e.raw_os_error()
1186                    && raw_error == libc::ENOENT
1187                {
1188                    return Ok(None);
1189                }
1190                Err(e)
1191            }
1192        }
1193    }
1194
1195    // Load entries of the directory from all layers, if node is not directory, return directly.
1196    async fn load_directory(&self, ctx: Request, node: &Arc<OverlayInode>) -> Result<()> {
1197        if node.loaded.load(Ordering::Relaxed) {
1198            return Ok(());
1199        }
1200
1201        // We got all childrens without inode.
1202        // info!("before scan childrens, ctx: {:?}, node: {:?}", ctx, node.inode);
1203        let childrens = node.scan_childrens(ctx).await?;
1204        // info!("scanned children");
1205
1206        // =============== Start Lock Area ===================
1207        // Lock OverlayFs inodes.
1208        let mut inode_store = self.inodes.write().await;
1209        // Lock the OverlayInode and its childrens.
1210        let mut node_children = node.childrens.lock().await;
1211
1212        // Check again in case another 'load_directory' function call gets locks and want to do duplicated work.
1213        if node.loaded.load(Ordering::Relaxed) {
1214            return Ok(());
1215        }
1216
1217        // Now we have two locks' protection, Fs inodes lock and OverlayInode's childrens lock.
1218        // info!("before iter childrens");
1219        for mut child in childrens.into_iter() {
1220            // Allocate inode for each child.
1221            let ino = inode_store.alloc_inode(&child.path.read().await)?;
1222
1223            let name = child.name.read().await.clone();
1224            child.inode = ino;
1225            // Create bi-directional link between parent and child.
1226            child.parent = Mutex::new(Arc::downgrade(node));
1227
1228            let arc_child = Arc::new(child);
1229            node_children.insert(name, arc_child.clone());
1230            // Record overlay inode in whole OverlayFs.
1231            inode_store.insert_inode(ino, arc_child).await;
1232        }
1233        // info!("after iter childrens");
1234
1235        node.loaded.store(true, Ordering::Relaxed);
1236
1237        Ok(())
1238    }
1239
1240    async fn forget_one(&self, inode: Inode, count: u64) {
1241        if inode == self.root_inode() || inode == 0 {
1242            return;
1243        }
1244
1245        let v = match self.get_all_inode(inode).await {
1246            Some(n) => n,
1247            None => {
1248                trace!("forget unknown inode: {inode}");
1249                return;
1250            }
1251        };
1252
1253        // Use fetch_update to atomically update lookups in a loop until it succeeds
1254        v.lookups
1255            .fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
1256                // If count is larger than current lookups, return 0
1257                // Otherwise subtract count from current lookups
1258                if current < count {
1259                    Some(0)
1260                } else {
1261                    Some(current - count)
1262                }
1263            })
1264            .expect("fetch_update failed");
1265
1266        let lookups = v.lookups.load(Ordering::Relaxed);
1267        trace!(
1268            "forget inode: {}, name {}, lookups: {}",
1269            inode,
1270            v.name.read().await,
1271            lookups
1272        );
1273        if lookups == 0 {
1274            debug!(
1275                "inode is forgotten: {}, name {}",
1276                inode,
1277                v.name.read().await
1278            );
1279            let _ = self.remove_inode(inode, None).await;
1280            let parent = v.parent.lock().await;
1281
1282            if let Some(p) = parent.upgrade() {
1283                // remove it from hashmap
1284                p.remove_child(&v.name.read().await).await;
1285            }
1286        }
1287    }
1288
1289    async fn do_lookup(&self, ctx: Request, parent: Inode, name: &str) -> Result<ReplyEntry> {
1290        let node = self.lookup_node(ctx, parent, name).await?;
1291        debug!("do_lookup: {name:?}, found");
1292
1293        if node.whiteout.load(Ordering::Relaxed) {
1294            eprintln!("Error: node.whiteout.load() called.");
1295            return Err(Error::from_raw_os_error(libc::ENOENT));
1296        }
1297
1298        let mut st = node.stat64(ctx).await?;
1299        st.attr.ino = node.inode;
1300        if utils::is_dir(&st.attr.kind) && !node.loaded.load(Ordering::Relaxed) {
1301            self.load_directory(ctx, &node).await?;
1302        }
1303
1304        // FIXME: can forget happen between found and increase reference counter?
1305        let tmp = node.lookups.fetch_add(1, Ordering::Relaxed);
1306        trace!("lookup count: {}", tmp + 1);
1307        Ok(ReplyEntry {
1308            ttl: st.ttl,
1309            attr: st.attr,
1310            generation: 0,
1311        })
1312    }
1313
1314    async fn do_statvfs(&self, ctx: Request, inode: Inode) -> Result<ReplyStatFs> {
1315        match self.get_active_inode(inode).await {
1316            Some(ovi) => {
1317                let all_inodes = ovi.real_inodes.lock().await;
1318                let real_inode = all_inodes
1319                    .first()
1320                    .ok_or(Error::other("backend inode not found"))?;
1321                Ok(real_inode.layer.statfs(ctx, real_inode.inode).await?)
1322            }
1323            None => Err(Error::from_raw_os_error(libc::ENOENT)),
1324        }
1325    }
1326
1327    #[allow(clippy::too_many_arguments)]
1328    async fn do_readdir<'a>(
1329        &self,
1330        ctx: Request,
1331        inode: Inode,
1332        handle: u64,
1333        offset: u64,
1334    ) -> Result<DirectoryStream<'a>> {
1335        // lookup the directory
1336        let ovl_inode = match self.handles.lock().await.get(&handle) {
1337            Some(dir) => dir.node.clone(),
1338            None => {
1339                // Try to get data with inode.
1340                let node = self.lookup_node(ctx, inode, ".").await?;
1341
1342                let st = node.stat64(ctx).await?;
1343                if !utils::is_dir(&st.attr.kind) {
1344                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1345                }
1346
1347                node.clone()
1348            }
1349        };
1350        self.load_directory(ctx, &ovl_inode).await?;
1351        let mut childrens = Vec::new();
1352        //add myself as "."
1353        childrens.push((".".to_string(), ovl_inode.clone()));
1354
1355        //add parent
1356        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1357            Some(p) => p.clone(),
1358            None => self.root_node().await,
1359        };
1360        childrens.push(("..".to_string(), parent_node));
1361
1362        for (name, child) in ovl_inode.childrens.lock().await.iter() {
1363            // skip whiteout node
1364            if child.whiteout.load(Ordering::Relaxed) {
1365                continue;
1366            }
1367            childrens.push((name.clone(), child.clone()));
1368        }
1369
1370        if offset >= childrens.len() as u64 {
1371            let empty: Vec<std::result::Result<DirectoryEntry, Errno>> = Vec::new();
1372            return Ok(Box::pin(iter(empty.into_iter())) as DirectoryStream<'a>);
1373        }
1374        let mut d: Vec<std::result::Result<DirectoryEntry, Errno>> = Vec::new();
1375
1376        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1377            // make struct DireEntry and Entry
1378            let st = child.stat64(ctx).await?;
1379            let dir_entry = DirectoryEntry {
1380                inode: child.inode,
1381                kind: st.attr.kind,
1382                name: name.into(),
1383                offset: (index + 1) as i64,
1384            };
1385            d.push(Ok(dir_entry));
1386        }
1387
1388        Ok(Box::pin(iter(d.into_iter())) as DirectoryStream<'a>)
1389    }
1390
1391    #[allow(clippy::too_many_arguments)]
1392    async fn do_readdirplus<'a>(
1393        &self,
1394        ctx: Request,
1395        inode: Inode,
1396        handle: u64,
1397        offset: u64,
1398    ) -> Result<DirectoryPlusStream<'a>> {
1399        // lookup the directory
1400        let ovl_inode = match self.handles.lock().await.get(&handle) {
1401            Some(dir) => {
1402                trace!(
1403                    "do_readdirplus: handle {} found, inode {}",
1404                    handle, dir.node.inode
1405                );
1406                dir.node.clone()
1407            }
1408            None => {
1409                trace!("do_readdirplus: handle {handle} not found, lookup inode {inode}");
1410                // Try to get data with inode.
1411                let node = self.lookup_node(ctx, inode, ".").await?;
1412
1413                let st = node.stat64(ctx).await?;
1414                if !utils::is_dir(&st.attr.kind) {
1415                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1416                }
1417
1418                node.clone()
1419            }
1420        };
1421        self.load_directory(ctx, &ovl_inode).await?;
1422
1423        let mut childrens = Vec::new();
1424        //add myself as "."
1425        childrens.push((".".to_string(), ovl_inode.clone()));
1426
1427        //add parent
1428        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1429            Some(p) => p.clone(),
1430            None => self.root_node().await,
1431        };
1432        childrens.push(("..".to_string(), parent_node));
1433
1434        for (name, child) in ovl_inode.childrens.lock().await.iter() {
1435            // skip whiteout node
1436            if child.whiteout.load(Ordering::Relaxed) {
1437                continue;
1438            }
1439            childrens.push((name.clone(), child.clone()));
1440        }
1441
1442        if offset >= childrens.len() as u64 {
1443            let empty: Vec<std::result::Result<DirectoryEntryPlus, Errno>> = Vec::new();
1444            return Ok(Box::pin(iter(empty.into_iter())) as DirectoryPlusStream<'a>);
1445        }
1446        let mut d: Vec<std::result::Result<DirectoryEntryPlus, Errno>> = Vec::new();
1447
1448        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1449            if index >= offset {
1450                // make struct DireEntry and Entry
1451                let mut st = child.stat64(ctx).await?;
1452                child.lookups.fetch_add(1, Ordering::Relaxed);
1453                st.attr.ino = child.inode;
1454                let dir_entry = DirectoryEntryPlus {
1455                    inode: child.inode,
1456                    generation: 0,
1457                    kind: st.attr.kind,
1458                    name: name.into(),
1459                    offset: (index + 1) as i64,
1460                    attr: st.attr,
1461                    entry_ttl: st.ttl,
1462                    attr_ttl: st.ttl,
1463                };
1464                d.push(Ok(dir_entry));
1465            }
1466        }
1467
1468        Ok(Box::pin(iter(d.into_iter())) as DirectoryPlusStream<'a>)
1469    }
1470
1471    async fn do_mkdir(
1472        &self,
1473        ctx: Request,
1474        parent_node: Arc<OverlayInode>,
1475        name: &str,
1476        mode: u32,
1477        umask: u32,
1478    ) -> Result<()> {
1479        if self.upper_layer.is_none() {
1480            return Err(Error::from_raw_os_error(libc::EROFS));
1481        }
1482
1483        // Parent node was deleted.
1484        if parent_node.whiteout.load(Ordering::Relaxed) {
1485            return Err(Error::from_raw_os_error(libc::ENOENT));
1486        }
1487
1488        let mut delete_whiteout = false;
1489        let mut set_opaque = false;
1490        if let Some(n) = self
1491            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1492            .await?
1493        {
1494            // Node with same name exists, let's check if it's whiteout.
1495            if !n.whiteout.load(Ordering::Relaxed) {
1496                return Err(Error::from_raw_os_error(libc::EEXIST));
1497            }
1498
1499            if n.in_upper_layer().await {
1500                delete_whiteout = true;
1501            }
1502
1503            // Set opaque if child dir has lower layers.
1504            if !n.upper_layer_only().await {
1505                set_opaque = true;
1506            }
1507        }
1508
1509        // Copy parent node up if necessary.
1510        let pnode = self.copy_node_up(ctx, parent_node).await?;
1511
1512        let path = format!("{}/{}", pnode.path.read().await, name);
1513        let path_ref = &path;
1514        let new_node = Arc::new(Mutex::new(None));
1515        pnode
1516            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1517                let parent_real_inode = match parent_real_inode {
1518                    Some(inode) => inode,
1519                    None => {
1520                        error!("BUG: parent doesn't have upper inode after copied up");
1521                        return Err(Error::from_raw_os_error(libc::EINVAL));
1522                    }
1523                };
1524                let osstr = OsStr::new(name);
1525                if delete_whiteout {
1526                    let _ = parent_real_inode
1527                        .layer
1528                        .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1529                        .await;
1530                }
1531
1532                // Allocate inode number.
1533                let ino = self.alloc_inode(path_ref).await?;
1534                let child_dir = parent_real_inode.mkdir(ctx, name, mode, umask).await?;
1535                // Set opaque if child dir has lower layers.
1536                if set_opaque {
1537                    parent_real_inode
1538                        .layer
1539                        .set_opaque(ctx, child_dir.inode)
1540                        .await?;
1541                }
1542                let ovi =
1543                    OverlayInode::new_from_real_inode(name, ino, path_ref.clone(), child_dir).await;
1544                new_node.lock().await.replace(ovi);
1545                Ok(false)
1546            })
1547            .await?;
1548
1549        // new_node is always 'Some'
1550        let nn = new_node.lock().await.take();
1551        let arc_node = Arc::new(nn.unwrap());
1552        self.insert_inode(arc_node.inode, arc_node.clone()).await;
1553        pnode.insert_child(name, arc_node).await;
1554        Ok(())
1555    }
1556
1557    async fn do_mknod(
1558        &self,
1559        ctx: Request,
1560        parent_node: &Arc<OverlayInode>,
1561        name: &str,
1562        mode: u32,
1563        rdev: u32,
1564        umask: u32,
1565    ) -> Result<()> {
1566        if self.upper_layer.is_none() {
1567            return Err(Error::from_raw_os_error(libc::EROFS));
1568        }
1569
1570        // Parent node was deleted.
1571        if parent_node.whiteout.load(Ordering::Relaxed) {
1572            return Err(Error::from_raw_os_error(libc::ENOENT));
1573        }
1574
1575        match self
1576            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1577            .await?
1578        {
1579            Some(n) => {
1580                // Node with same name exists, let's check if it's whiteout.
1581                if !n.whiteout.load(Ordering::Relaxed) {
1582                    return Err(Error::from_raw_os_error(libc::EEXIST));
1583                }
1584
1585                // Copy parent node up if necessary.
1586                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1587                pnode
1588                    .handle_upper_inode_locked(
1589                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1590                            let parent_real_inode = match parent_real_inode {
1591                                Some(inode) => inode,
1592                                None => {
1593                                    error!("BUG: parent doesn't have upper inode after copied up");
1594                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1595                                }
1596                            };
1597                            let osstr = OsStr::new(name);
1598                            if n.in_upper_layer().await {
1599                                let _ = parent_real_inode
1600                                    .layer
1601                                    .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1602                                    .await;
1603                            }
1604
1605                            let child_ri = parent_real_inode
1606                                .mknod(ctx, name, mode, rdev, umask)
1607                                .await?;
1608
1609                            // Replace existing real inodes with new one.
1610                            n.add_upper_inode(child_ri, true).await;
1611                            Ok(false)
1612                        },
1613                    )
1614                    .await?;
1615            }
1616            None => {
1617                // Copy parent node up if necessary.
1618                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1619                let new_node = Arc::new(Mutex::new(None));
1620                let path = format!("{}/{}", pnode.path.read().await, name);
1621                pnode
1622                    .handle_upper_inode_locked(
1623                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1624                            let parent_real_inode = match parent_real_inode {
1625                                Some(inode) => inode,
1626                                None => {
1627                                    error!("BUG: parent doesn't have upper inode after copied up");
1628                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1629                                }
1630                            };
1631
1632                            // Allocate inode number.
1633                            let ino = self.alloc_inode(&path).await?;
1634                            let child_ri = parent_real_inode
1635                                .mknod(ctx, name, mode, rdev, umask)
1636                                .await?;
1637                            let ovi = OverlayInode::new_from_real_inode(
1638                                name,
1639                                ino,
1640                                path.clone(),
1641                                child_ri,
1642                            )
1643                            .await;
1644
1645                            new_node.lock().await.replace(ovi);
1646                            Ok(false)
1647                        },
1648                    )
1649                    .await?;
1650
1651                let nn = new_node.lock().await.take();
1652                let arc_node = Arc::new(nn.unwrap());
1653                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1654                pnode.insert_child(name, arc_node).await;
1655            }
1656        }
1657
1658        Ok(())
1659    }
1660
1661    async fn do_create(
1662        &self,
1663        ctx: Request,
1664        parent_node: &Arc<OverlayInode>,
1665        name: &OsStr,
1666        mode: u32,
1667        flags: u32,
1668    ) -> Result<Option<u64>> {
1669        let name_str = name.to_str().unwrap();
1670        let upper = self
1671            .upper_layer
1672            .as_ref()
1673            .cloned()
1674            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
1675
1676        // Parent node was deleted.
1677        if parent_node.whiteout.load(Ordering::Relaxed) {
1678            return Err(Error::from_raw_os_error(libc::ENOENT));
1679        }
1680
1681        let handle: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1682        let real_ino: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1683        let new_ovi = match self
1684            .lookup_node_ignore_enoent(ctx, parent_node.inode, name_str)
1685            .await?
1686        {
1687            Some(n) => {
1688                // Node with same name exists, let's check if it's whiteout.
1689                if !n.whiteout.load(Ordering::Relaxed) {
1690                    return Err(Error::from_raw_os_error(libc::EEXIST));
1691                }
1692
1693                // Copy parent node up if necessary.
1694                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1695                pnode
1696                    .handle_upper_inode_locked(
1697                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1698                            let parent_real_inode = match parent_real_inode {
1699                                Some(inode) => inode,
1700                                None => {
1701                                    error!("BUG: parent doesn't have upper inode after copied up");
1702                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1703                                }
1704                            };
1705
1706                            if n.in_upper_layer().await {
1707                                let _ = parent_real_inode
1708                                    .layer
1709                                    .delete_whiteout(ctx, parent_real_inode.inode, name)
1710                                    .await;
1711                            }
1712
1713                            let (child_ri, hd) =
1714                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1715                            real_ino.lock().await.replace(child_ri.inode);
1716                            handle.lock().await.replace(hd.unwrap());
1717
1718                            // Replace existing real inodes with new one.
1719                            n.add_upper_inode(child_ri, true).await;
1720                            Ok(false)
1721                        },
1722                    )
1723                    .await?;
1724                n.clone()
1725            }
1726            None => {
1727                // Copy parent node up if necessary.
1728                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1729                let new_node = Arc::new(Mutex::new(None));
1730                let path = format!("{}/{}", pnode.path.read().await, name_str);
1731                pnode
1732                    .handle_upper_inode_locked(
1733                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1734                            let parent_real_inode = match parent_real_inode {
1735                                Some(inode) => inode,
1736                                None => {
1737                                    error!("BUG: parent doesn't have upper inode after copied up");
1738                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1739                                }
1740                            };
1741
1742                            let (child_ri, hd) =
1743                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1744                            real_ino.lock().await.replace(child_ri.inode);
1745                            handle.lock().await.replace(hd.unwrap());
1746                            // Allocate inode number.
1747                            let ino = self.alloc_inode(&path).await?;
1748                            let ovi = OverlayInode::new_from_real_inode(
1749                                name_str,
1750                                ino,
1751                                path.clone(),
1752                                child_ri,
1753                            )
1754                            .await;
1755
1756                            new_node.lock().await.replace(ovi);
1757                            Ok(false)
1758                        },
1759                    )
1760                    .await?;
1761
1762                // new_node is always 'Some'
1763                let nn = new_node.lock().await.take();
1764                let arc_node = Arc::new(nn.unwrap());
1765                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1766                pnode.insert_child(name_str, arc_node.clone()).await;
1767                arc_node
1768            }
1769        };
1770
1771        let final_handle = match *handle.lock().await {
1772            Some(hd) => {
1773                if self.no_open.load(Ordering::Relaxed) {
1774                    None
1775                } else {
1776                    let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1777                    let handle_data = HandleData {
1778                        node: new_ovi,
1779                        real_handle: Some(RealHandle {
1780                            layer: upper.clone(),
1781                            in_upper_layer: true,
1782                            inode: real_ino.lock().await.unwrap(),
1783                            handle: AtomicU64::new(hd),
1784                        }),
1785                    };
1786                    self.handles
1787                        .lock()
1788                        .await
1789                        .insert(handle, Arc::new(handle_data));
1790                    Some(handle)
1791                }
1792            }
1793            None => None,
1794        };
1795        Ok(final_handle)
1796    }
1797
1798    async fn do_rename(
1799        &self,
1800        req: Request,
1801        parent: Inode,
1802        name: &OsStr,
1803        new_parent: Inode,
1804        new_name: &OsStr,
1805    ) -> Result<()> {
1806        let name_str = name.to_str().unwrap();
1807        let new_name_str = new_name.to_str().unwrap();
1808
1809        let parent_node = self.lookup_node(req, parent, "").await?;
1810        let new_parent_node = self.lookup_node(req, new_parent, "").await?;
1811        let src_node = self.lookup_node(req, parent, name_str).await?;
1812        let dest_node_opt = self
1813            .lookup_node_ignore_enoent(req, new_parent, new_name_str)
1814            .await?;
1815        // trace!("parent_node: {}, new_parent_node: {}, src_node: {}, dest_node_opt: {:?}", parent_node.inode, new_parent_node.inode, src_node.inode, dest_node_opt.as_ref().map(|n| n.inode));
1816
1817        if let Some(dest_node) = &dest_node_opt {
1818            let src_is_dir = src_node.is_dir(req).await?;
1819            let dest_is_dir = dest_node.is_dir(req).await?;
1820            if src_is_dir != dest_is_dir {
1821                return Err(Error::from_raw_os_error(libc::EISDIR));
1822            }
1823            if dest_is_dir {
1824                self.copy_directory_up(req, dest_node.clone()).await?;
1825                let (count, _) = dest_node.count_entries_and_whiteout(req).await?;
1826                if count > 0 {
1827                    return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
1828                }
1829            }
1830        }
1831
1832        let pnode = self.copy_node_up(req, parent_node).await?;
1833        let new_pnode = self.copy_node_up(req, new_parent_node).await?;
1834        let s_node = self.copy_node_up(req, src_node).await?;
1835
1836        let need_whiteout = !s_node.upper_layer_only().await;
1837
1838        let (p_layer, _, p_inode) = pnode.first_layer_inode().await;
1839        let (new_p_layer, _, new_p_inode) = new_pnode.first_layer_inode().await;
1840        assert!(Arc::ptr_eq(&p_layer, &new_p_layer));
1841
1842        p_layer
1843            .rename(req, p_inode, name, new_p_inode, new_name)
1844            .await?;
1845
1846        // Handle the replaced destination node (if any).
1847        if let Some(dest_node) = dest_node_opt {
1848            let path = dest_node.path.read().await.clone();
1849            self.remove_inode(dest_node.inode, Some(path)).await;
1850        }
1851
1852        // Update the moved source node's state.
1853
1854        // Remove from old parent.
1855        pnode.remove_child(name_str).await;
1856        self.remove_inode(s_node.inode, s_node.path.read().await.clone().into())
1857            .await;
1858        let new_path = format!("{}/{}", new_pnode.path.read().await, new_name_str);
1859        *s_node.path.write().await = new_path;
1860        *s_node.name.write().await = new_name_str.to_string();
1861        *s_node.parent.lock().await = Arc::downgrade(&new_pnode);
1862        new_pnode.insert_child(new_name_str, s_node.clone()).await;
1863        self.insert_inode(s_node.inode, s_node).await;
1864
1865        // Create whiteout at the old location if necessary.
1866        if need_whiteout {
1867            p_layer.create_whiteout(req, p_inode, name).await?;
1868        }
1869
1870        Ok(())
1871    }
1872
1873    async fn do_link(
1874        &self,
1875        ctx: Request,
1876        src_node: &Arc<OverlayInode>,
1877        new_parent: &Arc<OverlayInode>,
1878        name: &str,
1879    ) -> Result<()> {
1880        if self.upper_layer.is_none() {
1881            return Err(Error::from_raw_os_error(libc::EROFS));
1882        }
1883
1884        // Node is whiteout.
1885        if src_node.whiteout.load(Ordering::Relaxed) || new_parent.whiteout.load(Ordering::Relaxed)
1886        {
1887            return Err(Error::from_raw_os_error(libc::ENOENT));
1888        }
1889
1890        let st = src_node.stat64(ctx).await?;
1891        if utils::is_dir(&st.attr.kind) {
1892            // Directory can't be hardlinked.
1893            return Err(Error::from_raw_os_error(libc::EPERM));
1894        }
1895
1896        let src_node = self.copy_node_up(ctx, Arc::clone(src_node)).await?;
1897        let new_parent = self.copy_node_up(ctx, Arc::clone(new_parent)).await?;
1898        let src_ino = src_node.first_layer_inode().await.2;
1899
1900        if let Some(existing_node) = self
1901            .lookup_node_ignore_enoent(ctx, new_parent.inode, name)
1902            .await?
1903        {
1904            // If it's not a whiteout, it's an error
1905            if !existing_node.whiteout.load(Ordering::Relaxed) {
1906                return Err(Error::from_raw_os_error(libc::EEXIST));
1907            }
1908            // If it is a whiteout, we will overwrite it.
1909            // First, remove the physical whiteout file in the upper layer.
1910            new_parent
1911                .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1912                    let parent_ri = parent_real_inode.ok_or_else(|| {
1913                        error!("BUG: parent doesn't have upper inode after copied up");
1914                        Error::from_raw_os_error(libc::EINVAL)
1915                    })?;
1916                    // Only delete if the whiteout is in the upper layer
1917                    if existing_node.in_upper_layer().await {
1918                        let _ = parent_ri
1919                            .layer
1920                            .delete_whiteout(ctx, parent_ri.inode, OsStr::new(name))
1921                            .await;
1922                    }
1923                    Ok(false)
1924                })
1925                .await?;
1926        }
1927
1928        new_parent
1929            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1930                let parent_real_inode = match parent_real_inode {
1931                    Some(inode) => inode,
1932                    None => {
1933                        error!("BUG: parent doesn't have upper inode after copied up");
1934                        return Err(Error::from_raw_os_error(libc::EINVAL));
1935                    }
1936                };
1937
1938                parent_real_inode.link(ctx, src_ino, name).await?;
1939
1940                Ok(false)
1941            })
1942            .await?;
1943
1944        self.insert_inode(src_node.inode, src_node.clone()).await;
1945        new_parent.insert_child(name, src_node).await;
1946
1947        Ok(())
1948    }
1949
1950    async fn do_symlink(
1951        &self,
1952        ctx: Request,
1953        linkname: &str,
1954        parent_node: &Arc<OverlayInode>,
1955        name: &str,
1956    ) -> Result<()> {
1957        let name_os = OsStr::new(name);
1958        if self.upper_layer.is_none() {
1959            return Err(Error::from_raw_os_error(libc::EROFS));
1960        }
1961
1962        // parent was deleted.
1963        if parent_node.whiteout.load(Ordering::Relaxed) {
1964            return Err(Error::from_raw_os_error(libc::ENOENT));
1965        }
1966
1967        match self
1968            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1969            .await?
1970        {
1971            Some(n) => {
1972                // Node with same name exists, let's check if it's whiteout.
1973                if !n.whiteout.load(Ordering::Relaxed) {
1974                    return Err(Error::from_raw_os_error(libc::EEXIST));
1975                }
1976
1977                // Copy parent node up if necessary.
1978                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1979                pnode
1980                    .handle_upper_inode_locked(
1981                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1982                            let parent_real_inode = match parent_real_inode {
1983                                Some(inode) => inode,
1984                                None => {
1985                                    error!("BUG: parent doesn't have upper inode after copied up");
1986                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1987                                }
1988                            };
1989
1990                            if n.in_upper_layer().await {
1991                                let _ = parent_real_inode
1992                                    .layer
1993                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
1994                                    .await;
1995                            }
1996
1997                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
1998
1999                            // Replace existing real inodes with new one.
2000                            n.add_upper_inode(child_ri, true).await;
2001                            Ok(false)
2002                        },
2003                    )
2004                    .await?;
2005            }
2006            None => {
2007                // Copy parent node up if necessary.
2008                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
2009                let new_node: Arc<Mutex<Option<OverlayInode>>> = Arc::new(Mutex::new(None));
2010                let path = format!("{}/{}", pnode.path.read().await, name);
2011                pnode
2012                    .handle_upper_inode_locked(
2013                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
2014                            let parent_real_inode = match parent_real_inode {
2015                                Some(inode) => inode,
2016                                None => {
2017                                    error!("BUG: parent doesn't have upper inode after copied up");
2018                                    return Err(Error::from_raw_os_error(libc::EINVAL));
2019                                }
2020                            };
2021
2022                            // Allocate inode number.
2023                            let ino = self.alloc_inode(&path).await?;
2024                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
2025                            let ovi = OverlayInode::new_from_real_inode(
2026                                name,
2027                                ino,
2028                                path.clone(),
2029                                child_ri,
2030                            )
2031                            .await;
2032
2033                            new_node.lock().await.replace(ovi);
2034                            Ok(false)
2035                        },
2036                    )
2037                    .await?;
2038
2039                // new_node is always 'Some'
2040                let arc_node = Arc::new(new_node.lock().await.take().unwrap());
2041                self.insert_inode(arc_node.inode, arc_node.clone()).await;
2042                pnode.insert_child(name, arc_node).await;
2043            }
2044        }
2045
2046        Ok(())
2047    }
2048
2049    /// Copies a symbolic link from a lower layer to the upper layer.
2050    ///
2051    /// This function is a part of the copy-up process, triggered when a symlink that
2052    /// only exists in a lower layer is modified. It reads the link target and attributes
2053    /// from the lower layer and creates an identical symlink in the upper layer, crucially
2054    /// preserving the original host UID and GID.
2055    async fn copy_symlink_up(
2056        &self,
2057        ctx: Request,
2058        node: Arc<OverlayInode>,
2059    ) -> Result<Arc<OverlayInode>> {
2060        if node.in_upper_layer().await {
2061            return Ok(node);
2062        }
2063
2064        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2065            Arc::clone(n)
2066        } else {
2067            return Err(Error::other("no parent?"));
2068        };
2069
2070        // To preserve original ownership, we must get the raw, unmapped host attributes.
2071        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2072        // to bypass the ID mapping logic. This is safe and does not affect other
2073        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2074        // both rely on the same underlying `stat` system call; they only differ in
2075        // whether the resulting `uid` and `gid` are mapped.
2076        let (self_layer, _, self_inode) = node.first_layer_inode().await;
2077        let re = self_layer.do_getattr_helper(self_inode, None).await?;
2078        let st = ReplyAttr {
2079            ttl: re.1,
2080            attr: convert_stat64_to_file_attr(re.0),
2081        };
2082
2083        if !parent_node.in_upper_layer().await {
2084            parent_node.clone().create_upper_dir(ctx, None).await?;
2085        }
2086
2087        // Read the linkname from lower layer.
2088        let reply_data = self_layer.readlink(ctx, self_inode).await?;
2089        // Convert path to &str.
2090        let path = std::str::from_utf8(&reply_data.data)
2091            .map_err(|_| Error::from_raw_os_error(libc::EINVAL))?;
2092
2093        let new_upper_real: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
2094        parent_node
2095            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2096                // We already create upper dir for parent_node above.
2097                let parent_real_inode =
2098                    parent_upper_inode.ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2099                // We manually unfold the `symlink` logic here instead of calling the `symlink` method directly.
2100                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2101                if !parent_real_inode.in_upper_layer {
2102                    return Err(Error::from_raw_os_error(libc::EROFS));
2103                }
2104                let link_name = OsStr::new(path);
2105                let filename = node.name.read().await;
2106                let filename = OsStr::new(filename.as_str());
2107                let op_ctx = crate::context::OperationContext::with_credentials(
2108                    ctx,
2109                    st.attr.uid,
2110                    st.attr.gid,
2111                );
2112                let entry = parent_real_inode
2113                    .layer
2114                    .symlink_with_context(op_ctx, parent_real_inode.inode, filename, link_name)
2115                    .await?;
2116                let ri = RealInode {
2117                    layer: parent_real_inode.layer.clone(),
2118                    in_upper_layer: true,
2119                    inode: entry.attr.ino,
2120                    whiteout: false,
2121                    opaque: false,
2122                    stat: Some(ReplyAttr {
2123                        ttl: entry.ttl,
2124                        attr: entry.attr,
2125                    }),
2126                };
2127                new_upper_real.lock().await.replace(ri);
2128                Ok(false)
2129            })
2130            .await?;
2131
2132        if let Some(real_inode) = new_upper_real.lock().await.take() {
2133            // update upper_inode and first_inode()
2134            node.add_upper_inode(real_inode, true).await;
2135        }
2136
2137        Ok(node)
2138    }
2139
2140    /// Copies a regular file and its contents from a lower layer to the upper layer.
2141    ///
2142    /// This function is a core part of the copy-up process, triggered when a regular file
2143    /// that only exists in a lower layer is written to. It creates an empty file in the
2144    /// upper layer with the original file's attributes (mode, UID, GID), and then copies
2145    /// the entire content from the lower layer file to the new upper layer file.
2146    async fn copy_regfile_up(
2147        &self,
2148        ctx: Request,
2149        node: Arc<OverlayInode>,
2150    ) -> Result<Arc<OverlayInode>> {
2151        if node.in_upper_layer().await {
2152            return Ok(node);
2153        }
2154
2155        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2156            Arc::clone(n)
2157        } else {
2158            return Err(Error::other("no parent?"));
2159        };
2160
2161        // To preserve original ownership, we must get the raw, unmapped host attributes.
2162        // We achieve this by calling `do_getattr_helper`, which is specifically designed
2163        // to bypass the ID mapping logic. This is safe and does not affect other
2164        // functionalities because `do_getattr_helper` and the standard `stat64()` call
2165        // both rely on the same underlying `stat` system call; they only differ in
2166        // whether the resulting `uid` and `gid` are mapped.
2167        let (lower_layer, _, lower_inode) = node.first_layer_inode().await;
2168        let re = lower_layer.do_getattr_helper(lower_inode, None).await?;
2169        let st = ReplyAttr {
2170            ttl: re.1,
2171            attr: convert_stat64_to_file_attr(re.0),
2172        };
2173        trace!(
2174            "copy_regfile_up: node {} in lower layer's inode {}",
2175            node.inode, lower_inode
2176        );
2177
2178        if !parent_node.in_upper_layer().await {
2179            parent_node.clone().create_upper_dir(ctx, None).await?;
2180        }
2181
2182        // create the file in upper layer using information from lower layer
2183
2184        let flags = libc::O_WRONLY;
2185        let mode = mode_from_kind_and_perm(st.attr.kind, st.attr.perm);
2186
2187        let upper_handle = Arc::new(Mutex::new(0));
2188        let upper_real_inode = Arc::new(Mutex::new(None));
2189        parent_node
2190            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2191                // We already create upper dir for parent_node.
2192                let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2193                    error!("parent {} has no upper inode", parent_node.inode);
2194                    Error::from_raw_os_error(libc::EINVAL)
2195                })?;
2196                // We manually unfold the `create` logic here instead of calling the `create` method directly.
2197                // This is necessary to preserve the original file's UID and GID during the copy-up process.
2198                if !parent_real_inode.in_upper_layer {
2199                    return Err(Error::from_raw_os_error(libc::EROFS));
2200                }
2201                let name = node.name.read().await;
2202                let name = OsStr::new(name.as_str());
2203                let op_ctx = crate::context::OperationContext::with_credentials(
2204                    ctx,
2205                    st.attr.uid,
2206                    st.attr.gid,
2207                );
2208                let create_rep = parent_real_inode
2209                    .layer
2210                    .create_with_context(
2211                        op_ctx,
2212                        parent_real_inode.inode,
2213                        name,
2214                        mode,
2215                        flags.try_into().unwrap(),
2216                    )
2217                    .await?;
2218
2219                let (inode, h) = (
2220                    RealInode {
2221                        layer: parent_real_inode.layer.clone(),
2222                        in_upper_layer: true,
2223                        inode: create_rep.attr.ino,
2224                        whiteout: false,
2225                        opaque: false,
2226                        stat: Some(ReplyAttr {
2227                            ttl: create_rep.ttl,
2228                            attr: create_rep.attr,
2229                        }),
2230                    },
2231                    Some(create_rep.fh),
2232                );
2233                trace!(
2234                    "copy_regfile_up: created upper file {name:?} with inode {}",
2235                    inode.inode
2236                );
2237                *upper_handle.lock().await = h.unwrap_or(0);
2238                upper_real_inode.lock().await.replace(inode);
2239                Ok(false)
2240            })
2241            .await?;
2242
2243        let rep = lower_layer
2244            .open(ctx, lower_inode, libc::O_RDONLY as u32)
2245            .await?;
2246
2247        let lower_handle = rep.fh;
2248
2249        // need to use work directory and then rename file to
2250        // final destination for atomic reasons.. not deal with it for now,
2251        // use stupid copy at present.
2252        // FIXME: this need a lot of work here, ntimes, xattr, etc.
2253
2254        // Copy from lower real inode to upper real inode.
2255        // TODO: use sendfile here.
2256
2257        let u_handle = *upper_handle.lock().await;
2258        let ri = upper_real_inode.lock().await.take();
2259        if let Some(ri) = ri {
2260            let mut offset: usize = 0;
2261            let size = 4 * 1024 * 1024;
2262
2263            loop {
2264                let ret = lower_layer
2265                    .read(ctx, lower_inode, lower_handle, offset as u64, size)
2266                    .await?;
2267
2268                let len = ret.data.len();
2269                if len == 0 {
2270                    break;
2271                }
2272
2273                let ret = ri
2274                    .layer
2275                    .write(ctx, ri.inode, u_handle, offset as u64, &ret.data, 0, 0)
2276                    .await?;
2277
2278                assert_eq!(ret.written as usize, len);
2279                offset += ret.written as usize;
2280            }
2281
2282            if let Err(e) = ri.layer.release(ctx, ri.inode, u_handle, 0, 0, true).await {
2283                let e: std::io::Error = e.into();
2284                // Ignore ENOSYS.
2285                if e.raw_os_error() != Some(libc::ENOSYS) {
2286                    return Err(e);
2287                }
2288            }
2289            node.add_upper_inode(ri, true).await;
2290        } else {
2291            error!("BUG: upper real inode is None after copy up");
2292        }
2293
2294        lower_layer
2295            .release(ctx, lower_inode, lower_handle, 0, 0, true)
2296            .await?;
2297
2298        Ok(Arc::clone(&node))
2299    }
2300
2301    /// Copies the specified node to the upper layer of the filesystem
2302    ///
2303    /// Performs different operations based on the node type:
2304    /// - **Directory**: Creates a corresponding directory in the upper layer
2305    /// - **Symbolic link**: Recursively copies to the upper layer
2306    /// - **Regular file**: Copies file content to the upper layer
2307    ///
2308    /// # Parameters
2309    /// * `ctx`: FUSE request context
2310    /// * `node`: Reference to the node to be copied
2311    ///
2312    /// # Returns
2313    /// Returns a reference to the upper-layer node on success, or an error on failure
2314    async fn copy_node_up(
2315        &self,
2316        ctx: Request,
2317        node: Arc<OverlayInode>,
2318    ) -> Result<Arc<OverlayInode>> {
2319        if node.in_upper_layer().await {
2320            return Ok(node);
2321        }
2322
2323        let st = node.stat64(ctx).await?;
2324        match st.attr.kind {
2325            FileType::Directory => {
2326                node.clone().create_upper_dir(ctx, None).await?;
2327                Ok(node)
2328            }
2329            FileType::Symlink => {
2330                // For symlink.
2331                self.copy_symlink_up(ctx, node).await
2332            }
2333            FileType::RegularFile => {
2334                // For regular file.
2335                self.copy_regfile_up(ctx, node).await
2336            }
2337            _ => {
2338                // For other file types. return error.
2339                Err(Error::from_raw_os_error(libc::EINVAL))
2340            }
2341        }
2342    }
2343
2344    /// recursively copy directory and all its contents to upper layer
2345    async fn copy_directory_up(
2346        &self,
2347        ctx: Request,
2348        node: Arc<OverlayInode>,
2349    ) -> Result<Arc<OverlayInode>> {
2350        // Ensure the directory itself is copied up first
2351        self.copy_node_up(ctx, node.clone()).await?;
2352
2353        // load directory to cache
2354        self.load_directory(ctx, &node).await?;
2355
2356        // go through all children
2357        let children = node.childrens.lock().await.clone();
2358        for (_name, child) in children.iter() {
2359            if _name == "." || _name == ".." {
2360                continue;
2361            }
2362            // jump over whiteout
2363            if child.whiteout.load(Ordering::Relaxed) {
2364                continue;
2365            }
2366            let st = child.stat64(ctx).await?;
2367            if !child.in_upper_layer().await {
2368                match st.attr.kind {
2369                    FileType::Directory => {
2370                        // recursively copy subdirectory
2371                        Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2372                    }
2373                    FileType::Symlink | FileType::RegularFile => {
2374                        // copy node up symlink or regular file
2375                        Box::pin(self.copy_node_up(ctx, child.clone())).await?;
2376                    }
2377                    _ => {
2378                        // other file types are ignored
2379                    }
2380                }
2381            } else if utils::is_dir(&st.attr.kind) {
2382                // If it is already in the upper layer, but the directory is not loaded,
2383                // ensure that its contents are also copied up recursively.
2384                Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2385            }
2386        }
2387
2388        Ok(node)
2389    }
2390
2391    async fn do_rm(&self, ctx: Request, parent: u64, name: &OsStr, dir: bool) -> Result<()> {
2392        // 1. Read-only mount guard
2393        if self.upper_layer.is_none() {
2394            return Err(Error::from_raw_os_error(libc::EROFS));
2395        }
2396
2397        // 2. Locate the parent Overlay Inode.
2398        // Find parent Overlay Inode.
2399        let pnode = self.lookup_node(ctx, parent, "").await?;
2400        if pnode.whiteout.load(Ordering::Relaxed) {
2401            return Err(Error::from_raw_os_error(libc::ENOENT));
2402        }
2403        let to_name = name.to_str().unwrap();
2404
2405        // 3. Locate the child Overlay Inode for the given name
2406        // Find the Overlay Inode for child with <name>.
2407        let node = self.lookup_node(ctx, parent, to_name).await?;
2408        if node.whiteout.load(Ordering::Relaxed) {
2409            // already deleted.
2410            return Err(Error::from_raw_os_error(libc::ENOENT));
2411        }
2412
2413        // 4. If removing a directory, ensure it is empty of real entries
2414        if dir {
2415            self.load_directory(ctx, &node).await?;
2416            let (count, whiteouts) = node.count_entries_and_whiteout(ctx).await?;
2417            trace!("entries: {count}, whiteouts: {whiteouts}\n");
2418            if count > 0 {
2419                return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
2420            }
2421
2422            // Delete all whiteouts.
2423            if whiteouts > 0 && node.in_upper_layer().await {
2424                self.empty_node_directory(ctx, Arc::clone(&node)).await?;
2425            }
2426
2427            trace!("whiteouts deleted!\n");
2428        }
2429
2430        // 5. Decide whether we need to create a whiteout entry
2431        // We'll filp this off if upper-layer unlink suffices or parent is opaque
2432        let need_whiteout = AtomicBool::new(true);
2433        let pnode = self.copy_node_up(ctx, Arc::clone(&pnode)).await?;
2434
2435        if node.upper_layer_only().await {
2436            need_whiteout.store(false, Ordering::Relaxed);
2437        }
2438
2439        let mut df = |parent_upper_inode: Option<Arc<RealInode>>| async {
2440            let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2441                error!(
2442                    "BUG: parent {} has no upper inode after copy up",
2443                    pnode.inode
2444                );
2445                Error::from_raw_os_error(libc::EINVAL)
2446            })?;
2447
2448            // Parent is opaque, it shadows everything in lower layers so no need to create extra whiteouts.
2449            if parent_real_inode.opaque {
2450                need_whiteout.store(false, Ordering::Relaxed);
2451            }
2452            if dir {
2453                parent_real_inode
2454                    .layer
2455                    .rmdir(ctx, parent_real_inode.inode, name)
2456                    .await?;
2457            } else {
2458                parent_real_inode
2459                    .layer
2460                    .unlink(ctx, parent_real_inode.inode, name)
2461                    .await?;
2462            }
2463
2464            Ok(false)
2465        };
2466
2467        // 6. Perform the unlink/rmdir operation and memory cleanup
2468        if node.in_upper_layer().await {
2469            pnode.handle_upper_inode_locked(&mut df).await?;
2470        }
2471        pnode.remove_child(name.to_str().unwrap()).await;
2472        let path = node.path.read().await.clone();
2473        self.remove_inode(node.inode, Some(path)).await;
2474
2475        // 7. If needed, create a entry in the upper layer to mask lower-layer files
2476        if need_whiteout.load(Ordering::Relaxed) {
2477            trace!("do_rm: creating whiteout\n");
2478            // pnode is copied up, so it has upper layer.
2479            pnode
2480                .handle_upper_inode_locked(
2481                    &mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2482                        let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2483                            error!(
2484                                "BUG: parent {} has no upper inode after copy up",
2485                                pnode.inode
2486                            );
2487                            Error::from_raw_os_error(libc::EINVAL)
2488                        })?;
2489
2490                        let child_ri = parent_real_inode.create_whiteout(ctx, to_name).await?; //FIXME..............
2491                        let path = format!("{}/{}", pnode.path.read().await, to_name);
2492                        let ino: u64 = self.alloc_inode(&path).await?;
2493                        let ovi = Arc::new(
2494                            OverlayInode::new_from_real_inode(to_name, ino, path.clone(), child_ri)
2495                                .await,
2496                        );
2497
2498                        self.insert_inode(ino, ovi.clone()).await;
2499                        pnode.insert_child(to_name, ovi.clone()).await;
2500                        Ok(false)
2501                    },
2502                )
2503                .await?;
2504        }
2505
2506        Ok(())
2507    }
2508
2509    async fn do_fsync(
2510        &self,
2511        ctx: Request,
2512        inode: Inode,
2513        datasync: bool,
2514        handle: Handle,
2515        syncdir: bool,
2516    ) -> Result<()> {
2517        // Use O_RDONLY flags which indicates no copy up.
2518        let data = self
2519            .get_data(ctx, Some(handle), inode, libc::O_RDONLY as u32)
2520            .await?;
2521
2522        trace!("do_fsync: got data for handle: {handle}, inode:{inode}");
2523
2524        match data.real_handle {
2525            // FIXME: need to test if inode matches corresponding handle?
2526            None => {
2527                trace!("do_fsync: no real handle found for handle: {handle}, inode:{inode}");
2528                Err(Error::from_raw_os_error(libc::ENOENT))
2529            }
2530            Some(ref rh) => {
2531                let real_handle = rh.handle.load(Ordering::Relaxed);
2532                // TODO: check if it's in upper layer? @weizhang555
2533                if syncdir {
2534                    trace!(
2535                        "do_fsync: layer.fsyncdir called for handle: {}, inode:{}; rh.inode: {}, real_handle: {}",
2536                        handle, inode, rh.inode, real_handle
2537                    );
2538                    rh.layer
2539                        .fsyncdir(ctx, rh.inode, real_handle, datasync)
2540                        .await
2541                        .map_err(|e| e.into())
2542                } else {
2543                    rh.layer
2544                        .fsync(ctx, rh.inode, real_handle, datasync)
2545                        .await
2546                        .map_err(|e| e.into())
2547                }
2548            }
2549        }
2550    }
2551
2552    // Delete everything in the directory only on upper layer, ignore lower layers.
2553    async fn empty_node_directory(&self, ctx: Request, node: Arc<OverlayInode>) -> Result<()> {
2554        let st = node.stat64(ctx).await?;
2555        if !utils::is_dir(&st.attr.kind) {
2556            // This function can only be called on directories.
2557            return Err(Error::from_raw_os_error(libc::ENOTDIR));
2558        }
2559
2560        let (layer, in_upper, inode) = node.first_layer_inode().await;
2561        if !in_upper {
2562            return Ok(());
2563        }
2564
2565        // Copy node.childrens Hashmap to Vector, the Vector is also used as temp storage,
2566        // Without this, Rust won't allow us to remove them from node.childrens.
2567        let iter = node
2568            .childrens
2569            .lock()
2570            .await
2571            .values()
2572            .cloned()
2573            .collect::<Vec<_>>();
2574
2575        for child in iter {
2576            // We only care about upper layer, ignore lower layers.
2577            if child.in_upper_layer().await {
2578                let child_name = child.name.read().await.clone();
2579                let child_name_os = OsStr::new(&child_name);
2580                if child.whiteout.load(Ordering::Relaxed) {
2581                    layer.delete_whiteout(ctx, inode, child_name_os).await?
2582                } else {
2583                    let s = child.stat64(ctx).await?;
2584                    let cname: &OsStr = OsStr::new(&child_name_os);
2585                    if utils::is_dir(&s.attr.kind) {
2586                        let (count, whiteouts) = child.count_entries_and_whiteout(ctx).await?;
2587                        if count + whiteouts > 0 {
2588                            let cb = child.clone();
2589                            Box::pin(async move { self.empty_node_directory(ctx, cb).await })
2590                                .await?;
2591                        }
2592                        layer.rmdir(ctx, inode, cname).await?
2593                    } else {
2594                        layer.unlink(ctx, inode, cname).await?;
2595                    }
2596                }
2597
2598                let cpath = child.path.read().await.clone();
2599                // delete the child
2600                self.remove_inode(child.inode, Some(cpath)).await;
2601                node.remove_child(&child_name).await;
2602            }
2603        }
2604
2605        Ok(())
2606    }
2607
2608    async fn find_real_info_from_handle(
2609        &self,
2610        handle: Handle,
2611    ) -> Result<(Arc<BoxedLayer>, Inode, Handle)> {
2612        match self.handles.lock().await.get(&handle) {
2613            Some(h) => match h.real_handle {
2614                Some(ref rhd) => {
2615                    trace!(
2616                        "find_real_info_from_handle: layer in upper: {}",
2617                        rhd.in_upper_layer
2618                    );
2619                    Ok((
2620                        rhd.layer.clone(),
2621                        rhd.inode,
2622                        rhd.handle.load(Ordering::Relaxed),
2623                    ))
2624                }
2625                None => Err(Error::from_raw_os_error(libc::ENOENT)),
2626            },
2627
2628            None => Err(Error::from_raw_os_error(libc::ENOENT)),
2629        }
2630    }
2631
2632    async fn find_real_inode(&self, inode: Inode) -> Result<(Arc<BoxedLayer>, Inode)> {
2633        if let Some(n) = self.get_active_inode(inode).await {
2634            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2635            return Ok((first_layer, first_inode));
2636        } else if let Some(n) = self.get_all_inode(inode).await {
2637            trace!("find_real_inode: found inode by get_all_inode: {}", n.inode);
2638            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2639            return Ok((first_layer, first_inode));
2640        }
2641
2642        Err(Error::from_raw_os_error(libc::ENOENT))
2643    }
2644
2645    async fn get_data(
2646        &self,
2647        ctx: Request,
2648        handle: Option<Handle>,
2649        inode: Inode,
2650        flags: u32,
2651    ) -> Result<Arc<HandleData>> {
2652        let no_open = self.no_open.load(Ordering::Relaxed);
2653        if !no_open {
2654            if let Some(h) = handle
2655                && let Some(v) = self.handles.lock().await.get(&h)
2656                && v.node.inode == inode
2657            {
2658                // trace!("get_data: found handle");
2659                return Ok(Arc::clone(v));
2660            }
2661        } else {
2662            let readonly: bool = flags
2663                & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
2664                    as u32
2665                == 0;
2666
2667            // lookup node
2668            let node = self.lookup_node(ctx, inode, "").await?;
2669
2670            // whiteout node
2671            if node.whiteout.load(Ordering::Relaxed) {
2672                return Err(Error::from_raw_os_error(libc::ENOENT));
2673            }
2674
2675            if !readonly {
2676                // Check if upper layer exists, return EROFS is not exists.
2677                self.upper_layer
2678                    .as_ref()
2679                    .cloned()
2680                    .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2681                // copy up to upper layer
2682                self.copy_node_up(ctx, Arc::clone(&node)).await?;
2683            }
2684
2685            let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
2686            let handle_data = HandleData {
2687                node: Arc::clone(&node),
2688                real_handle: Some(RealHandle {
2689                    layer,
2690                    in_upper_layer,
2691                    inode,
2692                    handle: AtomicU64::new(0),
2693                }),
2694            };
2695            return Ok(Arc::new(handle_data));
2696        }
2697
2698        Err(Error::from_raw_os_error(libc::ENOENT))
2699    }
2700
2701    // extend or init the inodes number to one overlay if the current number is done.
2702    pub async fn extend_inode_alloc(&self, key: u64) {
2703        let next_inode = key * INODE_ALLOC_BATCH;
2704        let limit_inode = next_inode + INODE_ALLOC_BATCH - 1;
2705        self.inodes
2706            .write()
2707            .await
2708            .extend_inode_number(next_inode, limit_inode);
2709    }
2710}
2711
2712/// Wrap the parameters for mounting overlay filesystem.
2713#[derive(Debug, Clone)]
2714pub struct OverlayArgs<P, Q, R, M, N, I>
2715where
2716    P: AsRef<Path>,
2717    Q: AsRef<Path>,
2718    R: AsRef<Path>,
2719    M: AsRef<str>,
2720    N: Into<String>,
2721    I: IntoIterator<Item = R>,
2722{
2723    pub mountpoint: P,
2724    pub upperdir: Q,
2725    pub lowerdir: I,
2726    pub privileged: bool,
2727    pub mapping: Option<M>,
2728    pub name: Option<N>,
2729    pub allow_other: bool,
2730}
2731
2732/// Mounts the filesystem using the given parameters and returns the mount handle.
2733///
2734/// # Parameters
2735/// - `mountpoint`: Path to the mount point.
2736/// - `upperdir`: Path to the upper directory.
2737/// - `lowerdir`: Paths to the lower directories.
2738/// - `privileged`: If true, use privileged mount; otherwise, unprivileged mount.
2739/// - `mapping`: Optional user/group ID mapping for unprivileged mounts.
2740/// - `name`: Optional name for the filesystem.
2741/// - `allow_other`: If true, allows other users to access the filesystem.
2742///
2743/// # Returns
2744/// A mount handle on success.
2745pub async fn mount_fs<P, Q, R, M, N, I>(
2746    args: OverlayArgs<P, Q, R, M, N, I>,
2747) -> rfuse3::raw::MountHandle
2748where
2749    P: AsRef<Path>,
2750    Q: AsRef<Path>,
2751    R: AsRef<Path>,
2752    M: AsRef<str>,
2753    N: Into<String>,
2754    I: IntoIterator<Item = R>,
2755{
2756    // Create lower layers
2757    let mut lower_layers: Vec<Arc<BoxedLayer>> = Vec::new();
2758    for lower in args.lowerdir {
2759        let layer = new_passthroughfs_layer(PassthroughArgs {
2760            root_dir: lower,
2761            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2762        })
2763        .await
2764        .expect("Failed to create lower filesystem layer");
2765        lower_layers.push(Arc::new(layer) as Arc<BoxedLayer>);
2766    }
2767    // Create upper layer
2768    let upper_layer: Arc<BoxedLayer> = Arc::new(
2769        new_passthroughfs_layer(PassthroughArgs {
2770            root_dir: args.upperdir,
2771            mapping: args.mapping.as_ref().map(|m| m.as_ref()),
2772        })
2773        .await
2774        .expect("Failed to create upper filesystem layer"),
2775    );
2776
2777    // Configure overlay filesystem
2778    let config = Config {
2779        mountpoint: args.mountpoint.as_ref().to_path_buf(),
2780        do_import: true,
2781        ..Default::default()
2782    };
2783    let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1)
2784        .expect("Failed to initialize OverlayFs");
2785    let logfs = LoggingFileSystem::new(overlayfs);
2786
2787    let mount_path: OsString = OsString::from(args.mountpoint.as_ref().as_os_str());
2788
2789    // Obtain the current user's uid and gid
2790    let uid = unsafe { libc::getuid() };
2791    let gid = unsafe { libc::getgid() };
2792
2793    let mut mount_options = MountOptions::default();
2794    mount_options
2795        .force_readdir_plus(true)
2796        .uid(uid)
2797        .gid(gid)
2798        .allow_other(args.allow_other);
2799    if let Some(name) = args.name {
2800        mount_options.fs_name(name);
2801    }
2802
2803    // Mount filesystem based on privilege flag and return the mount handle
2804    if !args.privileged {
2805        debug!("Mounting with unprivileged mode");
2806        Session::new(mount_options)
2807            .mount_with_unprivileged(logfs, mount_path)
2808            .await
2809            .expect("Unprivileged mount failed")
2810    } else {
2811        debug!("Mounting with privileged mode");
2812        Session::new(mount_options)
2813            .mount(logfs, mount_path)
2814            .await
2815            .expect("Privileged mount failed")
2816    }
2817}