libfuse_fs/overlayfs/
mod.rs

1// Copyright (C) 2023 Ant Group. All rights reserved.
2//  2024 From [fuse_backend_rs](https://github.com/cloud-hypervisor/fuse-backend-rs)
3// SPDX-License-Identifier: Apache-2.0
4
5#![allow(missing_docs)]
6mod async_io;
7pub mod config;
8mod inode_store;
9mod layer;
10mod utils;
11
12//mod tempfile;
13use core::panic;
14use std::collections::HashMap;
15use std::ffi::{OsStr, OsString};
16use std::future::Future;
17use std::io::{Error, Result};
18
19use config::Config;
20use futures::StreamExt as _;
21use rfuse3::raw::reply::{
22    DirectoryEntry, DirectoryEntryPlus, ReplyAttr, ReplyEntry, ReplyOpen, ReplyStatFs,
23};
24use rfuse3::raw::{Filesystem, Request, Session};
25use std::sync::{Arc, Weak};
26
27use rfuse3::{Errno, FileType, MountOptions, mode_from_kind_and_perm};
28const SLASH_ASCII: char = '/';
29use futures::future::join_all;
30use futures::stream::iter;
31
32use crate::passthrough::newlogfs::LoggingFileSystem;
33use crate::passthrough::{PassthroughFs, new_passthroughfs_layer};
34use inode_store::InodeStore;
35use layer::Layer;
36use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
37
38use tokio::sync::{Mutex, RwLock};
39
40pub type Inode = u64;
41pub type Handle = u64;
42
43type BoxedLayer = PassthroughFs;
44//type BoxedFileSystem = Box<dyn FileSystem<Inode = Inode, Handle = Handle> + Send + Sync>;
45const INODE_ALLOC_BATCH: u64 = 0x1_0000_0000;
46// RealInode represents one inode object in specific layer.
47// Also, each RealInode maps to one Entry, which should be 'forgotten' after drop.
48// Important note: do not impl Clone trait for it or refcount will be messed up.
49pub(crate) struct RealInode {
50    pub layer: Arc<PassthroughFs>,
51    pub in_upper_layer: bool,
52    pub inode: u64,
53    // File is whiteouted, we need to hide it.
54    pub whiteout: bool,
55    // Directory is opaque, we need to hide all entries inside it.
56    pub opaque: bool,
57    pub stat: Option<ReplyAttr>,
58}
59
60// OverlayInode must be protected by lock, it can be operated by multiple threads.
61// #[derive(Default)]
62pub(crate) struct OverlayInode {
63    // Inode hash table, map from 'name' to 'OverlayInode'.
64    pub childrens: Mutex<HashMap<String, Arc<OverlayInode>>>,
65    pub parent: Mutex<Weak<OverlayInode>>,
66    // Backend inodes from all layers.
67    pub real_inodes: Mutex<Vec<Arc<RealInode>>>,
68    // Inode number.
69    pub inode: u64,
70    pub path: RwLock<String>,
71    pub name: RwLock<String>,
72    pub lookups: AtomicU64,
73    // Node is whiteout-ed.
74    pub whiteout: AtomicBool,
75    // Directory is loaded.
76    pub loaded: AtomicBool,
77    // hard link counter(nlink), init as 1
78    pub nlink: AtomicU64,
79}
80
81#[derive(Default)]
82pub enum CachePolicy {
83    Never,
84    #[default]
85    Auto,
86    Always,
87}
88pub struct OverlayFs {
89    config: Config,
90    lower_layers: Vec<Arc<PassthroughFs>>,
91    upper_layer: Option<Arc<PassthroughFs>>,
92    // All inodes in FS.
93    inodes: RwLock<InodeStore>,
94    // Open file handles.
95    handles: Mutex<HashMap<u64, Arc<HandleData>>>,
96    next_handle: AtomicU64,
97    writeback: AtomicBool,
98    no_open: AtomicBool,
99    no_opendir: AtomicBool,
100    killpriv_v2: AtomicBool,
101    perfile_dax: AtomicBool,
102    root_inodes: u64,
103}
104
105// This is a wrapper of one inode in specific layer, It can't impl Clone trait.
106struct RealHandle {
107    layer: Arc<PassthroughFs>,
108    in_upper_layer: bool,
109    inode: u64,
110    handle: AtomicU64,
111}
112
113struct HandleData {
114    node: Arc<OverlayInode>,
115    //offset: libc::off_t,
116    real_handle: Option<RealHandle>,
117}
118
119// RealInode is a wrapper of one inode in specific layer.
120// All layer operations returning Entry should be wrapped in RealInode implementation
121// so that we can increase the refcount(lookup count) of each inode and decrease it after Drop.
122// Important: do not impl 'Copy' trait for it or refcount will be messed up.
123impl RealInode {
124    async fn new(
125        layer: Arc<PassthroughFs>,
126        in_upper_layer: bool,
127        inode: u64,
128        whiteout: bool,
129        opaque: bool,
130    ) -> Self {
131        let mut ri = RealInode {
132            layer,
133            in_upper_layer,
134            inode,
135            whiteout,
136            opaque,
137            stat: None,
138        };
139        match ri.stat64_ignore_enoent(&Request::default()).await {
140            Ok(v) => {
141                ri.stat = v;
142            }
143            Err(e) => {
144                error!("stat64 failed during RealInode creation: {e}");
145            }
146        }
147        ri
148    }
149
150    async fn stat64(&self, req: &Request) -> Result<ReplyAttr> {
151        let layer = self.layer.as_ref();
152        if self.inode == 0 {
153            return Err(Error::from_raw_os_error(libc::ENOENT));
154        }
155        // trace!("stat64: trying to getattr req: {:?}", req);
156        layer
157            .getattr(*req, self.inode, None, 0)
158            .await
159            .map_err(|e| e.into())
160    }
161
162    async fn stat64_ignore_enoent(&self, req: &Request) -> Result<Option<ReplyAttr>> {
163        match self.stat64(req).await {
164            Ok(v1) => Ok(Some(v1)),
165            Err(e) => match e.raw_os_error() {
166                Some(raw_error) => {
167                    if raw_error != libc::ENOENT || raw_error != libc::ENAMETOOLONG {
168                        return Ok(None);
169                    }
170                    Err(e)
171                }
172                None => Err(e),
173            },
174        }
175    }
176
177    // Do real lookup action in specific layer, this call will increase Entry refcount which must be released later.
178    async fn lookup_child_ignore_enoent(
179        &self,
180        ctx: Request,
181        name: &str,
182    ) -> Result<Option<ReplyEntry>> {
183        let cname = OsStr::new(name);
184        // Real inode must have a layer.
185        let layer = self.layer.as_ref();
186        match layer.lookup(ctx, self.inode, cname).await {
187            Ok(v) => {
188                // Negative entry also indicates missing entry.
189                if v.attr.ino == 0 {
190                    return Ok(None);
191                }
192                Ok(Some(v))
193            }
194            Err(e) => {
195                let ioerror: std::io::Error = e.into();
196                if let Some(raw_error) = ioerror.raw_os_error() {
197                    if raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG {
198                        return Ok(None);
199                    }
200                }
201
202                Err(e.into())
203            }
204        }
205    }
206
207    // Find child inode in same layer under this directory(Self).
208    // Return None if not found.
209    async fn lookup_child(&self, ctx: Request, name: &str) -> Result<Option<RealInode>> {
210        if self.whiteout {
211            return Ok(None);
212        }
213
214        let layer = self.layer.as_ref();
215
216        // Find child Entry with <name> under directory with inode <self.inode>.
217        match self.lookup_child_ignore_enoent(ctx, name).await? {
218            Some(v) => {
219                // The Entry must be forgotten in each layer, which will be done automatically by Drop operation.
220                let (whiteout, opaque) = if v.attr.kind == FileType::Directory {
221                    (false, layer.is_opaque(ctx, v.attr.ino).await?)
222                } else {
223                    (layer.is_whiteout(ctx, v.attr.ino).await?, false)
224                };
225
226                Ok(Some(RealInode {
227                    layer: self.layer.clone(),
228                    in_upper_layer: self.in_upper_layer,
229                    inode: v.attr.ino,
230                    whiteout,
231                    opaque,
232                    stat: Some(ReplyAttr {
233                        ttl: v.ttl,
234                        attr: v.attr,
235                    }),
236                }))
237            }
238            None => Ok(None),
239        }
240    }
241
242    // Read directory entries from specific RealInode, error out if it's not directory.
243    async fn readdir(&self, ctx: Request) -> Result<HashMap<String, RealInode>> {
244        // Deleted inode should not be read.
245        if self.whiteout {
246            return Err(Error::from_raw_os_error(libc::ENOENT));
247        }
248        // trace!("readdir: before stat");
249        let stat = match self.stat.clone() {
250            Some(v) => v,
251            None => self.stat64(&ctx).await?,
252        };
253
254        // Must be directory.
255        if stat.attr.kind != FileType::Directory {
256            return Err(Error::from_raw_os_error(libc::ENOTDIR));
257        }
258
259        // Open the directory and load each entry.
260        let opendir_res = self
261            .layer
262            .opendir(ctx, self.inode, libc::O_RDONLY as u32)
263            .await;
264        // trace!("readdir: after opendir");
265        let handle = match opendir_res {
266            Ok(handle) => handle,
267
268            // opendir may not be supported if no_opendir is set, so we can ignore this error.
269            Err(e) => {
270                let ioerror: std::io::Error = e.into();
271                match ioerror.raw_os_error() {
272                    Some(raw_error) => {
273                        if raw_error == libc::ENOSYS {
274                            // We can still call readdir with inode if opendir is not supported in this layer.
275                            ReplyOpen { fh: 0, flags: 0 }
276                        } else {
277                            return Err(e.into());
278                        }
279                    }
280                    None => {
281                        return Err(e.into());
282                    }
283                }
284            }
285        };
286
287        let child_names = self.layer.readdir(ctx, self.inode, handle.fh, 0).await?;
288        // Non-zero handle indicates successful 'open', we should 'release' it.
289        if handle.fh > 0 {
290            self.layer
291                .releasedir(ctx, self.inode, handle.fh, handle.flags)
292                .await?
293            //DIFF
294        }
295
296        // Lookup all child and construct "RealInode"s.
297        let child_real_inodes = Arc::new(Mutex::new(HashMap::new()));
298        // trace!("readdir: before iter childrens");
299        let a_map = child_names.entries.map(|entery| async {
300            match entery {
301                Ok(dire) => {
302                    let dname = dire.name.into_string().unwrap();
303                    if dname == "." || dname == ".." {
304                        // Skip . and .. entries.
305                        return Ok(());
306                    }
307                    // trace!("readdir: before lookup child: dname={}", dname);
308                    if let Some(child) = self.lookup_child(ctx, &dname).await? {
309                        child_real_inodes.lock().await.insert(dname, child);
310                    }
311                    Ok(())
312                }
313                Err(err) => Err(err),
314            }
315        });
316        let k = join_all(a_map.collect::<Vec<_>>().await).await;
317        drop(k);
318        // Now into_inner func is safety.
319        let re = Arc::try_unwrap(child_real_inodes)
320            .map_err(|_| Errno::new_not_exist())?
321            .into_inner();
322        // trace!("readdir: return");
323        Ok(re)
324    }
325
326    async fn create_whiteout(&self, ctx: Request, name: &str) -> Result<RealInode> {
327        if !self.in_upper_layer {
328            return Err(Error::from_raw_os_error(libc::EROFS));
329        }
330
331        // from &str to &OsStr
332        let name_osstr = OsStr::new(name);
333        let entry = self
334            .layer
335            .create_whiteout(ctx, self.inode, name_osstr)
336            .await?;
337
338        // Wrap whiteout to RealInode.
339        Ok(RealInode {
340            layer: self.layer.clone(),
341            in_upper_layer: true,
342            inode: entry.attr.ino,
343            whiteout: true,
344            opaque: false,
345            stat: Some(ReplyAttr {
346                ttl: entry.ttl,
347                attr: entry.attr,
348            }),
349        })
350    }
351
352    async fn mkdir(&self, ctx: Request, name: &str, mode: u32, umask: u32) -> Result<RealInode> {
353        if !self.in_upper_layer {
354            return Err(Error::from_raw_os_error(libc::EROFS));
355        }
356
357        let name_osstr = OsStr::new(name);
358        let entry = self
359            .layer
360            .mkdir(ctx, self.inode, name_osstr, mode, umask)
361            .await?;
362
363        // update node's first_layer
364        Ok(RealInode {
365            layer: self.layer.clone(),
366            in_upper_layer: true,
367            inode: entry.attr.ino,
368            whiteout: false,
369            opaque: false,
370            stat: Some(ReplyAttr {
371                ttl: entry.ttl,
372                attr: entry.attr,
373            }),
374        })
375    }
376
377    async fn create(
378        &self,
379        ctx: Request,
380        name: &str,
381        mode: u32,
382        flags: u32,
383    ) -> Result<(RealInode, Option<u64>)> {
384        if !self.in_upper_layer {
385            return Err(Error::from_raw_os_error(libc::EROFS));
386        }
387        let name = OsStr::new(name);
388        let create_rep = self
389            .layer
390            .create(ctx, self.inode, name, mode, flags)
391            .await?;
392
393        Ok((
394            RealInode {
395                layer: self.layer.clone(),
396                in_upper_layer: true,
397                inode: create_rep.attr.ino,
398                whiteout: false,
399                opaque: false,
400                stat: Some(ReplyAttr {
401                    ttl: create_rep.ttl,
402                    attr: create_rep.attr,
403                }),
404            },
405            Some(create_rep.fh),
406        ))
407    }
408
409    async fn mknod(
410        &self,
411        ctx: Request,
412        name: &str,
413        mode: u32,
414        rdev: u32,
415        _umask: u32,
416    ) -> Result<RealInode> {
417        if !self.in_upper_layer {
418            return Err(Error::from_raw_os_error(libc::EROFS));
419        }
420        let name = OsStr::new(name);
421        let rep = self.layer.mknod(ctx, self.inode, name, mode, rdev).await?;
422        Ok(RealInode {
423            layer: self.layer.clone(),
424            in_upper_layer: true,
425            inode: rep.attr.ino,
426            whiteout: false,
427            opaque: false,
428            stat: Some(ReplyAttr {
429                ttl: rep.ttl,
430                attr: rep.attr,
431            }),
432        })
433    }
434
435    async fn link(&self, ctx: Request, ino: u64, name: &str) -> Result<RealInode> {
436        if !self.in_upper_layer {
437            return Err(Error::from_raw_os_error(libc::EROFS));
438        }
439        let name = OsStr::new(name);
440        let entry = self.layer.link(ctx, ino, self.inode, name).await?;
441
442        let opaque = if utils::is_dir(&entry.attr.kind) {
443            self.layer.is_opaque(ctx, entry.attr.ino).await?
444        } else {
445            false
446        };
447        Ok(RealInode {
448            layer: self.layer.clone(),
449            in_upper_layer: true,
450            inode: entry.attr.ino,
451            whiteout: false,
452            opaque,
453            stat: Some(ReplyAttr {
454                ttl: entry.ttl,
455                attr: entry.attr,
456            }),
457        })
458    }
459
460    // Create a symlink in self directory.
461    async fn symlink(&self, ctx: Request, link_name: &str, filename: &str) -> Result<RealInode> {
462        if !self.in_upper_layer {
463            return Err(Error::from_raw_os_error(libc::EROFS));
464        }
465        let link_name = OsStr::new(link_name);
466        let filename = OsStr::new(filename);
467        let entry = self
468            .layer
469            .symlink(ctx, self.inode, filename, link_name)
470            .await?;
471
472        Ok(RealInode {
473            layer: self.layer.clone(),
474            in_upper_layer: true,
475            inode: entry.attr.ino,
476            whiteout: false,
477            opaque: false,
478            stat: Some(ReplyAttr {
479                ttl: entry.ttl,
480                attr: entry.attr,
481            }),
482        })
483    }
484}
485
486impl Drop for RealInode {
487    fn drop(&mut self) {
488        let layer = Arc::clone(&self.layer);
489        let inode = self.inode;
490        tokio::spawn(async move {
491            let ctx = Request::default();
492            layer.forget(ctx, inode, 1).await;
493        });
494    }
495}
496
497impl OverlayInode {
498    pub fn new() -> Self {
499        Self {
500            childrens: Mutex::new(HashMap::new()),
501            parent: Mutex::new(Weak::new()),
502            real_inodes: Mutex::new(vec![]),
503            inode: 0,
504            path: RwLock::new(String::new()),
505            name: RwLock::new(String::new()),
506            lookups: AtomicU64::new(0),
507            whiteout: AtomicBool::new(false),
508            loaded: AtomicBool::new(false),
509            nlink: AtomicU64::new(1),
510        }
511    }
512    // Allocate new OverlayInode based on one RealInode,
513    // inode number is always 0 since only OverlayFs has global unique inode allocator.
514    pub async fn new_from_real_inode(
515        name: &str,
516        ino: u64,
517        path: String,
518        real_inode: RealInode,
519    ) -> Self {
520        let mut new = OverlayInode::new();
521        new.inode = ino;
522        new.path = path.into();
523        new.name = name.to_string().into();
524        new.whiteout.store(real_inode.whiteout, Ordering::Relaxed);
525        new.lookups = AtomicU64::new(1);
526        new.real_inodes = Mutex::new(vec![real_inode.into()]);
527        new
528    }
529
530    pub async fn new_from_real_inodes(
531        name: &str,
532        ino: u64,
533        path: String,
534        real_inodes: Vec<RealInode>,
535    ) -> Result<Self> {
536        if real_inodes.is_empty() {
537            error!("BUG: new_from_real_inodes() called with empty real_inodes");
538            return Err(Error::from_raw_os_error(libc::EINVAL));
539        }
540
541        let mut first = true;
542        let mut new = Self::new();
543        for ri in real_inodes {
544            let whiteout = ri.whiteout;
545            let opaque = ri.opaque;
546            let stat = match &ri.stat {
547                Some(v) => v.clone(),
548                None => ri.stat64(&Request::default()).await?,
549            };
550
551            if first {
552                first = false;
553                new = Self::new_from_real_inode(name, ino, path.clone(), ri).await;
554
555                // This is whiteout, no need to check lower layers.
556                if whiteout {
557                    break;
558                }
559
560                // A non-directory file shadows all lower layers as default.
561                if !utils::is_dir(&stat.attr.kind) {
562                    break;
563                }
564
565                // Opaque directory shadows all lower layers.
566                if opaque {
567                    break;
568                }
569            } else {
570                // This is whiteout, no need to record this, break directly.
571                if ri.whiteout {
572                    break;
573                }
574
575                // Only directory have multiple real inodes, so if this is non-first real-inode
576                // and it's not directory, it should indicates some invalid layout. @weizhang555
577                if !utils::is_dir(&stat.attr.kind) {
578                    error!("invalid layout: non-directory has multiple real inodes");
579                    break;
580                }
581
582                // Valid directory.
583                new.real_inodes.lock().await.push(ri.into());
584                // Opaque directory shadows all lower layers.
585                if opaque {
586                    break;
587                }
588            }
589        }
590        Ok(new)
591    }
592
593    pub async fn stat64(&self, ctx: Request) -> Result<ReplyAttr> {
594        // try layers in order or just take stat from first layer?
595        for l in self.real_inodes.lock().await.iter() {
596            if let Some(v) = l.stat64_ignore_enoent(&ctx).await? {
597                return Ok(v);
598            }
599        }
600
601        // not in any layer
602        Err(Error::from_raw_os_error(libc::ENOENT))
603    }
604
605    pub async fn is_dir(&self, ctx: Request) -> Result<bool> {
606        let st = self.stat64(ctx).await?;
607        Ok(utils::is_dir(&st.attr.kind))
608    }
609
610    pub async fn count_entries_and_whiteout(&self, ctx: Request) -> Result<(u64, u64)> {
611        let mut count = 0;
612        let mut whiteouts = 0;
613
614        let st = self.stat64(ctx).await?;
615
616        // must be directory
617        if !utils::is_dir(&st.attr.kind) {
618            return Err(Error::from_raw_os_error(libc::ENOTDIR));
619        }
620
621        for (_, child) in self.childrens.lock().await.iter() {
622            if child.whiteout.load(Ordering::Relaxed) {
623                whiteouts += 1;
624            } else {
625                count += 1;
626            }
627        }
628        Ok((count, whiteouts))
629    }
630
631    pub async fn open(
632        &self,
633        ctx: Request,
634        flags: u32,
635        _fuse_flags: u32,
636    ) -> Result<(Arc<BoxedLayer>, ReplyOpen)> {
637        let (layer, _, inode) = self.first_layer_inode().await;
638        let ro = layer.as_ref().open(ctx, inode, flags).await?;
639        Ok((layer, ro))
640    }
641
642    // Self is directory, fill all childrens.
643    pub async fn scan_childrens(self: &Arc<Self>, ctx: Request) -> Result<Vec<OverlayInode>> {
644        let st = self.stat64(ctx).await?;
645        if !utils::is_dir(&st.attr.kind) {
646            return Err(Error::from_raw_os_error(libc::ENOTDIR));
647        }
648
649        let mut all_layer_inodes: HashMap<String, Vec<RealInode>> = HashMap::new();
650        // read out directories from each layer
651        // Scan from upper layer to lower layer.
652        for ri in self.real_inodes.lock().await.iter() {
653            if ri.whiteout {
654                // Node is deleted from some upper layer, skip it.
655                debug!("directory is whiteout");
656                break;
657            }
658
659            let stat = match &ri.stat {
660                Some(v) => v.clone(),
661                None => ri.stat64(&ctx).await?,
662            };
663
664            if !utils::is_dir(&stat.attr.kind) {
665                debug!("{} is not a directory", self.path.read().await);
666                // not directory
667                break;
668            }
669
670            // Read all entries from one layer.
671            let entries: HashMap<String, RealInode> = ri.readdir(ctx).await?;
672
673            // Merge entries from one layer to all_layer_inodes.
674            for (name, inode) in entries {
675                match all_layer_inodes.get_mut(&name) {
676                    Some(v) => {
677                        // Append additional RealInode to the end of vector.
678                        v.push(inode)
679                    }
680                    None => {
681                        all_layer_inodes.insert(name, vec![inode]);
682                    }
683                }
684            }
685
686            // if opaque, stop here
687            if ri.opaque {
688                debug!("directory {} is opaque", self.path.read().await);
689                break;
690            }
691        }
692
693        // Construct OverlayInode for each entry.
694        let mut childrens = vec![];
695        for (name, real_inodes) in all_layer_inodes {
696            // Inode numbers are not allocated yet.
697            let path = format!("{}/{}", self.path.read().await, name);
698            let new = Self::new_from_real_inodes(name.as_str(), 0, path, real_inodes).await?;
699            childrens.push(new);
700        }
701
702        Ok(childrens)
703    }
704
705    // Create a new directory in upper layer for node, node must be directory.
706    pub async fn create_upper_dir(
707        self: Arc<Self>,
708        ctx: Request,
709        mode_umask: Option<(u32, u32)>,
710    ) -> Result<()> {
711        let st = self.stat64(ctx).await?;
712        if !utils::is_dir(&st.attr.kind) {
713            return Err(Error::from_raw_os_error(libc::ENOTDIR));
714        }
715
716        // If node already has upper layer, we can just return here.
717        if self.in_upper_layer().await {
718            return Ok(());
719        }
720
721        // not in upper layer, check parent.
722        let pnode = if let Some(n) = self.parent.lock().await.upgrade() {
723            Arc::clone(&n)
724        } else {
725            return Err(Error::other("no parent?"));
726        };
727
728        if !pnode.in_upper_layer().await {
729            Box::pin(pnode.clone().create_upper_dir(ctx, None)).await?; // recursive call
730        }
731        let child: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
732        let c_name = self.name.read().await.clone();
733        let _ = pnode
734            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
735                match parent_upper_inode {
736                    Some(parent_ri) => {
737                        let ri = match mode_umask {
738                            Some((mode, umask)) => {
739                                parent_ri.mkdir(ctx, &c_name, mode, umask).await?
740                            }
741                            None => {
742                                parent_ri
743                                    .mkdir(
744                                        ctx,
745                                        &c_name,
746                                        mode_from_kind_and_perm(st.attr.kind, st.attr.perm),
747                                        0,
748                                    )
749                                    .await?
750                            }
751                        };
752                        // create directory here
753                        child.lock().await.replace(ri);
754                    }
755                    None => {
756                        error!(
757                            "BUG: parent {} has no upper inode after create_upper_dir",
758                            pnode.inode
759                        );
760                        return Err(Error::from_raw_os_error(libc::EINVAL));
761                    }
762                }
763                Ok(false)
764            })
765            .await?;
766
767        if let Some(ri) = child.lock().await.take() {
768            // Push the new real inode to the front of vector.
769            self.add_upper_inode(ri, false).await;
770        }
771
772        Ok(())
773    }
774
775    // Add new upper RealInode to OverlayInode, clear all lower RealInodes if 'clear_lowers' is true.
776    async fn add_upper_inode(self: &Arc<Self>, ri: RealInode, clear_lowers: bool) {
777        let mut inodes = self.real_inodes.lock().await;
778        // Update self according to upper attribute.
779        self.whiteout.store(ri.whiteout, Ordering::Relaxed);
780
781        // Push the new real inode to the front of vector.
782        let mut new = vec![Arc::new(ri)];
783        // Drain lower RealInodes.
784        let lowers = inodes.drain(..).collect::<Vec<Arc<RealInode>>>();
785        if !clear_lowers {
786            // If not clear lowers, append them to the end of vector.
787            new.extend(lowers);
788        }
789        inodes.extend(new);
790    }
791
792    // return the uppder layer fs.
793    pub async fn in_upper_layer(&self) -> bool {
794        let all_inodes = self.real_inodes.lock().await;
795        let first = all_inodes.first();
796        match first {
797            Some(v) => v.in_upper_layer,
798            None => false,
799        }
800    }
801
802    pub async fn upper_layer_only(&self) -> bool {
803        let real_inodes = self.real_inodes.lock().await;
804        let first = real_inodes.first();
805        match first {
806            Some(v) => {
807                if !v.in_upper_layer {
808                    false
809                } else {
810                    real_inodes.len() == 1
811                }
812            }
813            None => false,
814        }
815    }
816
817    pub async fn first_layer_inode(&self) -> (Arc<BoxedLayer>, bool, u64) {
818        let all_inodes = self.real_inodes.lock().await;
819        let first = all_inodes.first();
820        match first {
821            Some(v) => (v.layer.clone(), v.in_upper_layer, v.inode),
822            None => panic!("BUG: dangling OverlayInode"),
823        }
824    }
825
826    pub async fn child(&self, name: &str) -> Option<Arc<OverlayInode>> {
827        self.childrens.lock().await.get(name).cloned()
828    }
829
830    pub async fn remove_child(&self, name: &str) -> Option<Arc<OverlayInode>> {
831        self.childrens.lock().await.remove(name)
832    }
833
834    pub async fn insert_child(&self, name: &str, node: Arc<OverlayInode>) {
835        self.childrens.lock().await.insert(name.to_string(), node);
836    }
837
838    /// Handles operations on the upper layer inode of an `OverlayInode` in a thread-safe manner.
839    ///
840    /// This function locks the `real_inodes` field of the `OverlayInode` and retrieves the first
841    /// real inode (if any). If the first inode exists and belongs to the upper layer (`in_upper_layer` is true),
842    /// the provided callback `f` is invoked with the inode wrapped in `Some`. Otherwise, `f` is invoked with `None`.
843    ///
844    /// # Arguments
845    /// * `f`: A closure that takes an `Option<RealInode>` and returns a future. The future resolves to a `Result<bool>`.
846    ///
847    /// # Returns
848    /// * `Ok(bool)`: The result of invoking the callback `f`.
849    /// * `Err(Erron)`: An error is returned if:
850    ///   - There are no backend inodes (`real_inodes` is empty), indicating a dangling `OverlayInode`.
851    ///   - The callback `f` itself returns an error.
852    ///
853    /// # Behavior
854    /// 1. Locks the `real_inodes` field to ensure thread safety.
855    /// 2. Checks if the first inode exists:
856    ///    - If it exists and is in the upper layer, invokes `f(Some(inode))`.
857    ///    - If it exists but is not in the upper layer, invokes `f(None)`.
858    /// 3. If no inodes exist, returns an error indicating a dangling `OverlayInode`.
859    ///
860    /// # Example Use Case
861    /// This function is typically used to perform operations on the upper layer inode of an `OverlayInode`,
862    /// such as creating, modifying, or deleting files/directories in the overlay filesystem's upper layer.
863    pub async fn handle_upper_inode_locked<F, Fut>(&self, f: F) -> Result<bool>
864    where
865        // Can pass a &RealInode (or None) to f for any lifetime 'a
866        F: FnOnce(Option<Arc<RealInode>>) -> Fut,
867        // f returns a Future that must live at least as long as 'a
868        Fut: Future<Output = Result<bool>>,
869    {
870        let all_inodes = self.real_inodes.lock().await;
871        let first = all_inodes.first();
872        match first {
873            Some(v) => {
874                if v.in_upper_layer {
875                    f(Some(v.clone())).await
876                } else {
877                    f(None).await
878                }
879            }
880            None => Err(Error::other(format!(
881                "BUG: dangling OverlayInode {} without any backend inode",
882                self.inode
883            ))),
884        }
885    }
886}
887#[allow(unused)]
888fn entry_type_from_mode(mode: libc::mode_t) -> u8 {
889    match mode & libc::S_IFMT {
890        libc::S_IFBLK => libc::DT_BLK,
891        libc::S_IFCHR => libc::DT_CHR,
892        libc::S_IFDIR => libc::DT_DIR,
893        libc::S_IFIFO => libc::DT_FIFO,
894        libc::S_IFLNK => libc::DT_LNK,
895        libc::S_IFREG => libc::DT_REG,
896        libc::S_IFSOCK => libc::DT_SOCK,
897        _ => libc::DT_UNKNOWN,
898    }
899}
900impl OverlayFs {
901    pub fn new(
902        upper: Option<Arc<BoxedLayer>>,
903        lowers: Vec<Arc<BoxedLayer>>,
904        params: Config,
905        root_inode: u64,
906    ) -> Result<Self> {
907        // load root inode
908        Ok(OverlayFs {
909            config: params,
910            lower_layers: lowers,
911            upper_layer: upper,
912            inodes: RwLock::new(InodeStore::new()),
913            handles: Mutex::new(HashMap::new()),
914            next_handle: AtomicU64::new(1),
915            writeback: AtomicBool::new(false),
916            no_open: AtomicBool::new(false),
917            no_opendir: AtomicBool::new(false),
918            killpriv_v2: AtomicBool::new(false),
919            perfile_dax: AtomicBool::new(false),
920            root_inodes: root_inode,
921        })
922    }
923
924    pub fn root_inode(&self) -> Inode {
925        self.root_inodes
926    }
927
928    async fn alloc_inode(&self, path: &str) -> Result<u64> {
929        self.inodes.write().await.alloc_inode(path)
930    }
931
932    /// Add a file layer and stack and merge the previous file layers.
933    pub async fn push_layer(&mut self, layer: Arc<BoxedLayer>) -> Result<()> {
934        let upper = self.upper_layer.take();
935        if let Some(upper) = upper {
936            self.lower_layers.push(upper);
937        }
938        self.upper_layer = Some(layer);
939        // TODO: merge previous file layers. need optimization
940        self.import().await?;
941        Ok(())
942    }
943
944    pub async fn import(&self) -> Result<()> {
945        let mut root = OverlayInode::new();
946        root.inode = self.root_inode();
947        root.path = String::from("").into();
948        root.name = String::from("").into();
949        root.lookups = AtomicU64::new(2);
950        root.real_inodes = Mutex::new(vec![]);
951        let ctx = Request::default();
952
953        // Update upper inode
954        if let Some(layer) = self.upper_layer.as_ref() {
955            let ino = layer.root_inode();
956            let real = RealInode::new(
957                layer.clone(),
958                true,
959                ino,
960                false,
961                layer.is_opaque(ctx, ino).await?,
962            )
963            .await;
964            root.real_inodes.lock().await.push(real.into());
965        }
966
967        // Update lower inodes.
968        for layer in self.lower_layers.iter() {
969            let ino = layer.root_inode();
970            let real: RealInode = RealInode::new(
971                layer.clone(),
972                false,
973                ino,
974                false,
975                layer.is_opaque(ctx, ino).await?,
976            )
977            .await;
978            root.real_inodes.lock().await.push(real.into());
979        }
980        let root_node = Arc::new(root);
981
982        // insert root inode into hash
983        self.insert_inode(self.root_inode(), Arc::clone(&root_node))
984            .await;
985
986        info!("loading root directory");
987        self.load_directory(ctx, &root_node).await?;
988        info!("loaded root directory");
989
990        Ok(())
991    }
992
993    async fn root_node(&self) -> Arc<OverlayInode> {
994        // Root node must exist.
995        self.get_active_inode(self.root_inode()).await.unwrap()
996    }
997
998    async fn insert_inode(&self, inode: u64, node: Arc<OverlayInode>) {
999        self.inodes.write().await.insert_inode(inode, node).await;
1000    }
1001
1002    async fn get_active_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1003        self.inodes.read().await.get_inode(inode)
1004    }
1005
1006    // Get inode which is active or deleted.
1007    async fn get_all_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
1008        let inode_store = self.inodes.read().await;
1009        match inode_store.get_inode(inode) {
1010            Some(n) => Some(n),
1011            None => inode_store.get_deleted_inode(inode),
1012        }
1013    }
1014
1015    // Return the inode only if it's permanently deleted from both self.inodes and self.deleted_inodes.
1016    async fn remove_inode(
1017        &self,
1018        inode: u64,
1019        path_removed: Option<String>,
1020    ) -> Option<Arc<OverlayInode>> {
1021        self.inodes
1022            .write()
1023            .await
1024            .remove_inode(inode, path_removed)
1025            .await
1026    }
1027
1028    // Lookup child OverlayInode with <name> under <parent> directory.
1029    // If name is empty, return parent itself.
1030    // Parent dir will be loaded, but returned OverlayInode won't.
1031    async fn lookup_node(
1032        &self,
1033        ctx: Request,
1034        parent: Inode,
1035        name: &str,
1036    ) -> Result<Arc<OverlayInode>> {
1037        if name.contains(SLASH_ASCII) {
1038            return Err(Error::from_raw_os_error(libc::EINVAL));
1039        }
1040
1041        // Parent inode is expected to be loaded before this function is called.
1042        // TODO: Is this correct?
1043        let pnode = match self.get_active_inode(parent).await {
1044            Some(v) => v,
1045            None => {
1046                match self.get_all_inode(parent).await {
1047                    Some(v) => {
1048                        trace!(
1049                            "overlayfs:mod.rs:1031:lookup_node: parent inode {parent} is deleted"
1050                        );
1051                        v
1052                    }
1053                    None => {
1054                        trace!(
1055                            "overlayfs:mod.rs:1034:lookup_node: parent inode {parent} not found"
1056                        );
1057                        // Parent inode is not found, return ENOENT.
1058                        return Err(Error::from_raw_os_error(libc::ENOENT));
1059                    }
1060                }
1061            }
1062        };
1063
1064        // Parent is whiteout-ed, return ENOENT.
1065        if pnode.whiteout.load(Ordering::Relaxed) {
1066            return Err(Error::from_raw_os_error(libc::ENOENT));
1067        }
1068
1069        let st = pnode.stat64(ctx).await?;
1070        if utils::is_dir(&st.attr.kind) && !pnode.loaded.load(Ordering::Relaxed) {
1071            // Parent is expected to be directory, load it first.
1072            self.load_directory(ctx, &pnode).await?;
1073        }
1074
1075        // Current file or dir.
1076        if name.eq(".")  
1077            // Root directory has no parent.
1078            || (parent == self.root_inode() && name.eq("..")) 
1079            // Special convention: empty name indicates current dir.
1080            || name.is_empty()
1081        {
1082            return Ok(Arc::clone(&pnode));
1083        }
1084
1085        match pnode.child(name).await {
1086            // Child is found.
1087            Some(v) => Ok(v),
1088            None => {
1089                trace!("lookup_node: child {name} not found");
1090                Err(Error::from_raw_os_error(libc::ENOENT))
1091            }
1092        }
1093    }
1094
1095    async fn lookup_node_ignore_enoent(
1096        &self,
1097        ctx: Request,
1098        parent: u64,
1099        name: &str,
1100    ) -> Result<Option<Arc<OverlayInode>>> {
1101        match self.lookup_node(ctx, parent, name).await {
1102            Ok(n) => Ok(Some(Arc::clone(&n))),
1103            Err(e) => {
1104                if let Some(raw_error) = e.raw_os_error() {
1105                    if raw_error == libc::ENOENT {
1106                        return Ok(None);
1107                    }
1108                }
1109                Err(e)
1110            }
1111        }
1112    }
1113
1114    // Load entries of the directory from all layers, if node is not directory, return directly.
1115    async fn load_directory(&self, ctx: Request, node: &Arc<OverlayInode>) -> Result<()> {
1116        if node.loaded.load(Ordering::Relaxed) {
1117            return Ok(());
1118        }
1119
1120        // We got all childrens without inode.
1121        // info!("before scan childrens, ctx: {:?}, node: {:?}", ctx, node.inode);
1122        let childrens = node.scan_childrens(ctx).await?;
1123        // info!("scanned children");
1124
1125        // =============== Start Lock Area ===================
1126        // Lock OverlayFs inodes.
1127        let mut inode_store = self.inodes.write().await;
1128        // Lock the OverlayInode and its childrens.
1129        let mut node_children = node.childrens.lock().await;
1130
1131        // Check again in case another 'load_directory' function call gets locks and want to do duplicated work.
1132        if node.loaded.load(Ordering::Relaxed) {
1133            return Ok(());
1134        }
1135
1136        // Now we have two locks' protection, Fs inodes lock and OverlayInode's childrens lock.
1137        // info!("before iter childrens");
1138        for mut child in childrens.into_iter() {
1139            // Allocate inode for each child.
1140            let ino = inode_store.alloc_inode(&child.path.read().await)?;
1141
1142            let name = child.name.read().await.clone();
1143            child.inode = ino;
1144            // Create bi-directional link between parent and child.
1145            child.parent = Mutex::new(Arc::downgrade(node));
1146
1147            let arc_child = Arc::new(child);
1148            node_children.insert(name, arc_child.clone());
1149            // Record overlay inode in whole OverlayFs.
1150            inode_store.insert_inode(ino, arc_child).await;
1151        }
1152        // info!("after iter childrens");
1153
1154        node.loaded.store(true, Ordering::Relaxed);
1155
1156        Ok(())
1157    }
1158
1159    async fn forget_one(&self, inode: Inode, count: u64) {
1160        if inode == self.root_inode() || inode == 0 {
1161            return;
1162        }
1163
1164        let v = match self.get_all_inode(inode).await {
1165            Some(n) => n,
1166            None => {
1167                trace!("forget unknown inode: {inode}");
1168                return;
1169            }
1170        };
1171
1172        // Use fetch_update to atomically update lookups in a loop until it succeeds
1173        v.lookups
1174            .fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
1175                // If count is larger than current lookups, return 0
1176                // Otherwise subtract count from current lookups
1177                if current < count {
1178                    Some(0)
1179                } else {
1180                    Some(current - count)
1181                }
1182            })
1183            .expect("fetch_update failed");
1184
1185        let lookups = v.lookups.load(Ordering::Relaxed);
1186        trace!(
1187            "forget inode: {}, name {}, lookups: {}",
1188            inode,
1189            v.name.read().await,
1190            lookups
1191        );
1192        if lookups == 0 {
1193            debug!(
1194                "inode is forgotten: {}, name {}",
1195                inode,
1196                v.name.read().await
1197            );
1198            let _ = self.remove_inode(inode, None).await;
1199            let parent = v.parent.lock().await;
1200
1201            if let Some(p) = parent.upgrade() {
1202                // remove it from hashmap
1203                p.remove_child(&v.name.read().await).await;
1204            }
1205        }
1206    }
1207
1208    async fn do_lookup(&self, ctx: Request, parent: Inode, name: &str) -> Result<ReplyEntry> {
1209        let node = self.lookup_node(ctx, parent, name).await?;
1210        debug!("do_lookup: {name:?}, found");
1211
1212        if node.whiteout.load(Ordering::Relaxed) {
1213            eprintln!("Error: node.whiteout.load() called.");
1214            return Err(Error::from_raw_os_error(libc::ENOENT));
1215        }
1216
1217        let mut st = node.stat64(ctx).await?;
1218        st.attr.ino = node.inode;
1219        if utils::is_dir(&st.attr.kind) && !node.loaded.load(Ordering::Relaxed) {
1220            self.load_directory(ctx, &node).await?;
1221        }
1222
1223        // FIXME: can forget happen between found and increase reference counter?
1224        let tmp = node.lookups.fetch_add(1, Ordering::Relaxed);
1225        trace!("lookup count: {}", tmp + 1);
1226        Ok(ReplyEntry {
1227            ttl: st.ttl,
1228            attr: st.attr,
1229            generation: 0,
1230        })
1231    }
1232
1233    async fn do_statvfs(&self, ctx: Request, inode: Inode) -> Result<ReplyStatFs> {
1234        match self.get_active_inode(inode).await {
1235            Some(ovi) => {
1236                let all_inodes = ovi.real_inodes.lock().await;
1237                let real_inode = all_inodes
1238                    .first()
1239                    .ok_or(Error::other("backend inode not found"))?;
1240                Ok(real_inode.layer.statfs(ctx, real_inode.inode).await?)
1241            }
1242            None => Err(Error::from_raw_os_error(libc::ENOENT)),
1243        }
1244    }
1245
1246    #[allow(clippy::too_many_arguments)]
1247    async fn do_readdir<'a>(
1248        &self,
1249        ctx: Request,
1250        inode: Inode,
1251        handle: u64,
1252        offset: u64,
1253    ) -> Result<<OverlayFs as rfuse3::raw::Filesystem>::DirEntryStream<'a>> {
1254        // lookup the directory
1255        let ovl_inode = match self.handles.lock().await.get(&handle) {
1256            Some(dir) => dir.node.clone(),
1257            None => {
1258                // Try to get data with inode.
1259                let node = self.lookup_node(ctx, inode, ".").await?;
1260
1261                let st = node.stat64(ctx).await?;
1262                if !utils::is_dir(&st.attr.kind) {
1263                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1264                }
1265
1266                node.clone()
1267            }
1268        };
1269        self.load_directory(ctx, &ovl_inode).await?;
1270        let mut childrens = Vec::new();
1271        //add myself as "."
1272        childrens.push((".".to_string(), ovl_inode.clone()));
1273
1274        //add parent
1275        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1276            Some(p) => p.clone(),
1277            None => self.root_node().await,
1278        };
1279        childrens.push(("..".to_string(), parent_node));
1280
1281        for (_, child) in ovl_inode.childrens.lock().await.iter() {
1282            // skip whiteout node
1283            if child.whiteout.load(Ordering::Relaxed) {
1284                continue;
1285            }
1286            childrens.push((child.name.read().await.clone(), child.clone()));
1287        }
1288
1289        if offset >= childrens.len() as u64 {
1290            return Ok(iter(vec![].into_iter()));
1291        }
1292        let mut d: Vec<std::result::Result<DirectoryEntry, Errno>> = Vec::new();
1293
1294        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1295            // make struct DireEntry and Entry
1296            let st = child.stat64(ctx).await?;
1297            let dir_entry = DirectoryEntry {
1298                inode: child.inode,
1299                kind: st.attr.kind,
1300                name: name.into(),
1301                offset: (index + 1) as i64,
1302            };
1303            d.push(Ok(dir_entry));
1304        }
1305
1306        Ok(iter(d.into_iter()))
1307    }
1308
1309    #[allow(clippy::too_many_arguments)]
1310    async fn do_readdirplus<'a>(
1311        &self,
1312        ctx: Request,
1313        inode: Inode,
1314        handle: u64,
1315        offset: u64,
1316    ) -> Result<<OverlayFs as rfuse3::raw::Filesystem>::DirEntryPlusStream<'a>> {
1317        // lookup the directory
1318        let ovl_inode = match self.handles.lock().await.get(&handle) {
1319            Some(dir) => {
1320                trace!(
1321                    "do_readdirplus: handle {} found, inode {}",
1322                    handle, dir.node.inode
1323                );
1324                dir.node.clone()
1325            }
1326            None => {
1327                trace!("do_readdirplus: handle {handle} not found, lookup inode {inode}");
1328                // Try to get data with inode.
1329                let node = self.lookup_node(ctx, inode, ".").await?;
1330
1331                let st = node.stat64(ctx).await?;
1332                if !utils::is_dir(&st.attr.kind) {
1333                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1334                }
1335
1336                node.clone()
1337            }
1338        };
1339        self.load_directory(ctx, &ovl_inode).await?;
1340
1341        let mut childrens = Vec::new();
1342        //add myself as "."
1343        childrens.push((".".to_string(), ovl_inode.clone()));
1344
1345        //add parent
1346        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1347            Some(p) => p.clone(),
1348            None => self.root_node().await,
1349        };
1350        childrens.push(("..".to_string(), parent_node));
1351
1352        for (_, child) in ovl_inode.childrens.lock().await.iter() {
1353            // skip whiteout node
1354            if child.whiteout.load(Ordering::Relaxed) {
1355                continue;
1356            }
1357            childrens.push((child.name.read().await.clone(), child.clone()));
1358        }
1359
1360        if offset >= childrens.len() as u64 {
1361            return Ok(iter(vec![].into_iter()));
1362        }
1363        let mut d: Vec<std::result::Result<DirectoryEntryPlus, Errno>> = Vec::new();
1364
1365        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1366            if index >= offset {
1367                // make struct DireEntry and Entry
1368                let mut st = child.stat64(ctx).await?;
1369                child.lookups.fetch_add(1, Ordering::Relaxed);
1370                st.attr.ino = child.inode;
1371                let dir_entry = DirectoryEntryPlus {
1372                    inode: child.inode,
1373                    generation: 0,
1374                    kind: st.attr.kind,
1375                    name: name.into(),
1376                    offset: (index + 1) as i64,
1377                    attr: st.attr,
1378                    entry_ttl: st.ttl,
1379                    attr_ttl: st.ttl,
1380                };
1381                d.push(Ok(dir_entry));
1382            }
1383        }
1384
1385        Ok(iter(d.into_iter()))
1386    }
1387
1388    async fn do_mkdir(
1389        &self,
1390        ctx: Request,
1391        parent_node: Arc<OverlayInode>,
1392        name: &str,
1393        mode: u32,
1394        umask: u32,
1395    ) -> Result<()> {
1396        if self.upper_layer.is_none() {
1397            return Err(Error::from_raw_os_error(libc::EROFS));
1398        }
1399
1400        // Parent node was deleted.
1401        if parent_node.whiteout.load(Ordering::Relaxed) {
1402            return Err(Error::from_raw_os_error(libc::ENOENT));
1403        }
1404
1405        let mut delete_whiteout = false;
1406        let mut set_opaque = false;
1407        if let Some(n) = self
1408            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1409            .await?
1410        {
1411            // Node with same name exists, let's check if it's whiteout.
1412            if !n.whiteout.load(Ordering::Relaxed) {
1413                return Err(Error::from_raw_os_error(libc::EEXIST));
1414            }
1415
1416            if n.in_upper_layer().await {
1417                delete_whiteout = true;
1418            }
1419
1420            // Set opaque if child dir has lower layers.
1421            if !n.upper_layer_only().await {
1422                set_opaque = true;
1423            }
1424        }
1425
1426        // Copy parent node up if necessary.
1427        let pnode = self.copy_node_up(ctx, parent_node).await?;
1428
1429        let path = format!("{}/{}", pnode.path.read().await, name);
1430        let path_ref = &path;
1431        let new_node = Arc::new(Mutex::new(None));
1432        pnode
1433            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1434                let parent_real_inode = match parent_real_inode {
1435                    Some(inode) => inode,
1436                    None => {
1437                        error!("BUG: parent doesn't have upper inode after copied up");
1438                        return Err(Error::from_raw_os_error(libc::EINVAL));
1439                    }
1440                };
1441                let osstr = OsStr::new(name);
1442                if delete_whiteout {
1443                    let _ = parent_real_inode
1444                        .layer
1445                        .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1446                        .await;
1447                }
1448
1449                // Allocate inode number.
1450                let ino = self.alloc_inode(path_ref).await?;
1451                let child_dir = parent_real_inode.mkdir(ctx, name, mode, umask).await?;
1452                // Set opaque if child dir has lower layers.
1453                if set_opaque {
1454                    parent_real_inode
1455                        .layer
1456                        .set_opaque(ctx, child_dir.inode)
1457                        .await?;
1458                }
1459                let ovi =
1460                    OverlayInode::new_from_real_inode(name, ino, path_ref.clone(), child_dir).await;
1461                new_node.lock().await.replace(ovi);
1462                Ok(false)
1463            })
1464            .await?;
1465
1466        // new_node is always 'Some'
1467        let nn = new_node.lock().await.take();
1468        let arc_node = Arc::new(nn.unwrap());
1469        self.insert_inode(arc_node.inode, arc_node.clone()).await;
1470        pnode.insert_child(name, arc_node).await;
1471        Ok(())
1472    }
1473
1474    async fn do_mknod(
1475        &self,
1476        ctx: Request,
1477        parent_node: &Arc<OverlayInode>,
1478        name: &str,
1479        mode: u32,
1480        rdev: u32,
1481        umask: u32,
1482    ) -> Result<()> {
1483        if self.upper_layer.is_none() {
1484            return Err(Error::from_raw_os_error(libc::EROFS));
1485        }
1486
1487        // Parent node was deleted.
1488        if parent_node.whiteout.load(Ordering::Relaxed) {
1489            return Err(Error::from_raw_os_error(libc::ENOENT));
1490        }
1491
1492        match self
1493            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1494            .await?
1495        {
1496            Some(n) => {
1497                // Node with same name exists, let's check if it's whiteout.
1498                if !n.whiteout.load(Ordering::Relaxed) {
1499                    return Err(Error::from_raw_os_error(libc::EEXIST));
1500                }
1501
1502                // Copy parent node up if necessary.
1503                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1504                pnode
1505                    .handle_upper_inode_locked(
1506                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1507                            let parent_real_inode = match parent_real_inode {
1508                                Some(inode) => inode,
1509                                None => {
1510                                    error!("BUG: parent doesn't have upper inode after copied up");
1511                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1512                                }
1513                            };
1514                            let osstr = OsStr::new(name);
1515                            if n.in_upper_layer().await {
1516                                let _ = parent_real_inode
1517                                    .layer
1518                                    .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1519                                    .await;
1520                            }
1521
1522                            let child_ri = parent_real_inode
1523                                .mknod(ctx, name, mode, rdev, umask)
1524                                .await?;
1525
1526                            // Replace existing real inodes with new one.
1527                            n.add_upper_inode(child_ri, true).await;
1528                            Ok(false)
1529                        },
1530                    )
1531                    .await?;
1532            }
1533            None => {
1534                // Copy parent node up if necessary.
1535                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1536                let new_node = Arc::new(Mutex::new(None));
1537                let path = format!("{}/{}", pnode.path.read().await, name);
1538                pnode
1539                    .handle_upper_inode_locked(
1540                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1541                            let parent_real_inode = match parent_real_inode {
1542                                Some(inode) => inode,
1543                                None => {
1544                                    error!("BUG: parent doesn't have upper inode after copied up");
1545                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1546                                }
1547                            };
1548
1549                            // Allocate inode number.
1550                            let ino = self.alloc_inode(&path).await?;
1551                            let child_ri = parent_real_inode
1552                                .mknod(ctx, name, mode, rdev, umask)
1553                                .await?;
1554                            let ovi = OverlayInode::new_from_real_inode(
1555                                name,
1556                                ino,
1557                                path.clone(),
1558                                child_ri,
1559                            )
1560                            .await;
1561
1562                            new_node.lock().await.replace(ovi);
1563                            Ok(false)
1564                        },
1565                    )
1566                    .await?;
1567
1568                let nn = new_node.lock().await.take();
1569                let arc_node = Arc::new(nn.unwrap());
1570                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1571                pnode.insert_child(name, arc_node).await;
1572            }
1573        }
1574
1575        Ok(())
1576    }
1577
1578    async fn do_create(
1579        &self,
1580        ctx: Request,
1581        parent_node: &Arc<OverlayInode>,
1582        name: &OsStr,
1583        mode: u32,
1584        flags: u32,
1585    ) -> Result<Option<u64>> {
1586        let name_str = name.to_str().unwrap();
1587        let upper = self
1588            .upper_layer
1589            .as_ref()
1590            .cloned()
1591            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
1592
1593        // Parent node was deleted.
1594        if parent_node.whiteout.load(Ordering::Relaxed) {
1595            return Err(Error::from_raw_os_error(libc::ENOENT));
1596        }
1597
1598        let handle: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1599        let real_ino: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1600        let new_ovi = match self
1601            .lookup_node_ignore_enoent(ctx, parent_node.inode, name_str)
1602            .await?
1603        {
1604            Some(n) => {
1605                // Node with same name exists, let's check if it's whiteout.
1606                if !n.whiteout.load(Ordering::Relaxed) {
1607                    return Err(Error::from_raw_os_error(libc::EEXIST));
1608                }
1609
1610                // Copy parent node up if necessary.
1611                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1612                pnode
1613                    .handle_upper_inode_locked(
1614                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1615                            let parent_real_inode = match parent_real_inode {
1616                                Some(inode) => inode,
1617                                None => {
1618                                    error!("BUG: parent doesn't have upper inode after copied up");
1619                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1620                                }
1621                            };
1622
1623                            if n.in_upper_layer().await {
1624                                let _ = parent_real_inode
1625                                    .layer
1626                                    .delete_whiteout(ctx, parent_real_inode.inode, name)
1627                                    .await;
1628                            }
1629
1630                            let (child_ri, hd) =
1631                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1632                            real_ino.lock().await.replace(child_ri.inode);
1633                            handle.lock().await.replace(hd.unwrap());
1634
1635                            // Replace existing real inodes with new one.
1636                            n.add_upper_inode(child_ri, true).await;
1637                            Ok(false)
1638                        },
1639                    )
1640                    .await?;
1641                n.clone()
1642            }
1643            None => {
1644                // Copy parent node up if necessary.
1645                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1646                let new_node = Arc::new(Mutex::new(None));
1647                let path = format!("{}/{}", pnode.path.read().await, name_str);
1648                pnode
1649                    .handle_upper_inode_locked(
1650                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1651                            let parent_real_inode = match parent_real_inode {
1652                                Some(inode) => inode,
1653                                None => {
1654                                    error!("BUG: parent doesn't have upper inode after copied up");
1655                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1656                                }
1657                            };
1658
1659                            let (child_ri, hd) =
1660                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1661                            real_ino.lock().await.replace(child_ri.inode);
1662                            handle.lock().await.replace(hd.unwrap());
1663                            // Allocate inode number.
1664                            let ino = self.alloc_inode(&path).await?;
1665                            let ovi = OverlayInode::new_from_real_inode(
1666                                name_str,
1667                                ino,
1668                                path.clone(),
1669                                child_ri,
1670                            )
1671                            .await;
1672
1673                            new_node.lock().await.replace(ovi);
1674                            Ok(false)
1675                        },
1676                    )
1677                    .await?;
1678
1679                // new_node is always 'Some'
1680                let nn = new_node.lock().await.take();
1681                let arc_node = Arc::new(nn.unwrap());
1682                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1683                pnode.insert_child(name_str, arc_node.clone()).await;
1684                arc_node
1685            }
1686        };
1687
1688        let final_handle = match *handle.lock().await {
1689            Some(hd) => {
1690                if self.no_open.load(Ordering::Relaxed) {
1691                    None
1692                } else {
1693                    let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1694                    let handle_data = HandleData {
1695                        node: new_ovi,
1696                        real_handle: Some(RealHandle {
1697                            layer: upper.clone(),
1698                            in_upper_layer: true,
1699                            inode: real_ino.lock().await.unwrap(),
1700                            handle: AtomicU64::new(hd),
1701                        }),
1702                    };
1703                    self.handles
1704                        .lock()
1705                        .await
1706                        .insert(handle, Arc::new(handle_data));
1707                    Some(handle)
1708                }
1709            }
1710            None => None,
1711        };
1712        Ok(final_handle)
1713    }
1714
1715    async fn do_rename(
1716        &self,
1717        req: Request,
1718        parent: Inode,
1719        name: &OsStr,
1720        new_parent: Inode,
1721        new_name: &OsStr,
1722    ) -> Result<()> {
1723        let name_str = name.to_str().unwrap();
1724        let new_name_str = new_name.to_str().unwrap();
1725
1726        // Ensure parent exists
1727        let parent_node = match self.get_all_inode(parent).await {
1728            Some(node) => node,
1729            None => return Err(Error::from_raw_os_error(libc::ENOENT)),
1730        };
1731        // Ensure parent is a directory
1732
1733        if !parent_node.is_dir(req).await? {
1734            return Err(Error::from_raw_os_error(libc::EPERM));
1735        }
1736
1737        // Ensure new_parent is a directory
1738        let new_parent_node = match self.get_all_inode(new_parent).await {
1739            Some(node) => node,
1740            None => return Err(Error::from_raw_os_error(libc::ENOENT)),
1741        };
1742        if !new_parent_node.is_dir(req).await? {
1743            return Err(Error::from_raw_os_error(libc::EPERM));
1744        }
1745
1746        // // Ensure new_name does not exist in new_parent
1747        // // Check if new_name exists in new_parent
1748        // if let Some(existing_node) = self
1749        //     .lookup_node_ignore_enoent(req, new_parent, new_name_str)
1750        //     .await?
1751        // {
1752        //     // If the node exists and is not whiteout, return EEXIST
1753        //     if !existing_node.whiteout.load(Ordering::Acquire) {
1754        //         return Err(Error::from_raw_os_error(libc::EEXIST));
1755        //     }
1756        //     // If it's a whiteout, allow rename to proceed (overwrite whiteout)
1757        // }
1758
1759        let src_node = parent_node
1760            .child(name_str)
1761            .await
1762            .ok_or_else(|| Error::from_raw_os_error(libc::ENOENT))?;
1763        let need_whiteout = src_node.upper_layer_only().await;
1764        self.copy_node_up(req, parent_node.clone()).await?;
1765        if src_node.is_dir(req).await? {
1766            // Directory can't be renamed.
1767            self.copy_directory_up(req, src_node.clone()).await?;
1768        } else {
1769            self.copy_node_up(req, src_node.clone()).await?;
1770        };
1771
1772        // copy new_parent up if necessary.
1773        self.copy_node_up(req, new_parent_node.clone()).await?;
1774        let (src_lay, src_t, src_true_inode) = parent_node.first_layer_inode().await;
1775        let (dst_lay, dst_t, dst_true_inode) = new_parent_node.first_layer_inode().await;
1776
1777        // Assert that both layers are the same.
1778        assert!(Arc::ptr_eq(&src_lay, &dst_lay));
1779        assert!(src_t);
1780        assert!(dst_t);
1781
1782        // rename in PassthroughFS.
1783        match dst_lay
1784            .rename(req, src_true_inode, name, dst_true_inode, new_name)
1785            .await
1786        {
1787            Ok(_) => {
1788                let new_path = format!("{}/{}", new_parent_node.path.read().await, new_name_str);
1789
1790                *src_node.path.write().await = new_path;
1791                *src_node.name.write().await = new_name_str.to_string();
1792
1793                new_parent_node.insert_child(new_name_str, src_node).await;
1794            }
1795            Err(e) => return Err(e.into()),
1796        }
1797
1798        if !need_whiteout {
1799            let _ = src_lay.create_whiteout(req, src_true_inode, name).await?;
1800        }
1801
1802        // Insert into new parent, update node name and path
1803        let _ = parent_node
1804            .remove_child(name_str)
1805            .await
1806            .ok_or_else(|| Error::from_raw_os_error(libc::ENOENT))?;
1807
1808        Ok(())
1809    }
1810
1811    async fn do_link(
1812        &self,
1813        ctx: Request,
1814        src_node: &Arc<OverlayInode>,
1815        new_parent: &Arc<OverlayInode>,
1816        name: &str,
1817    ) -> Result<()> {
1818        let name_os = OsStr::new(name);
1819        if self.upper_layer.is_none() {
1820            return Err(Error::from_raw_os_error(libc::EROFS));
1821        }
1822
1823        // Node is whiteout.
1824        if src_node.whiteout.load(Ordering::Relaxed) || new_parent.whiteout.load(Ordering::Relaxed)
1825        {
1826            return Err(Error::from_raw_os_error(libc::ENOENT));
1827        }
1828
1829        let st = src_node.stat64(ctx).await?;
1830        if utils::is_dir(&st.attr.kind) {
1831            // Directory can't be hardlinked.
1832            return Err(Error::from_raw_os_error(libc::EPERM));
1833        }
1834
1835        let src_node = self.copy_node_up(ctx, Arc::clone(src_node)).await?;
1836        let new_parent = self.copy_node_up(ctx, Arc::clone(new_parent)).await?;
1837        let src_ino = src_node.first_layer_inode().await.2;
1838
1839        match self
1840            .lookup_node_ignore_enoent(ctx, new_parent.inode, name)
1841            .await?
1842        {
1843            Some(n) => {
1844                // trace!("do_link: found existing node with name '{}'", name);
1845                // Node with same name exists, let's check if it's whiteout.
1846                if !n.whiteout.load(Ordering::Relaxed) {
1847                    return Err(Error::from_raw_os_error(libc::EEXIST));
1848                }
1849
1850                // Node is definitely a whiteout now.
1851                new_parent
1852                    .handle_upper_inode_locked(
1853                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1854                            let parent_real_inode = match parent_real_inode {
1855                                Some(inode) => inode,
1856                                None => {
1857                                    error!("BUG: parent doesn't have upper inode after copied up");
1858                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1859                                }
1860                            };
1861
1862                            // Whiteout file exists in upper level, let's delete it.
1863                            if n.in_upper_layer().await {
1864                                let _ = parent_real_inode
1865                                    .layer
1866                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
1867                                    .await;
1868                            }
1869
1870                            let child_ri = parent_real_inode.link(ctx, src_ino, name).await?;
1871
1872                            // Replace existing real inodes with new one.
1873                            n.add_upper_inode(child_ri, true).await;
1874                            Ok(false)
1875                        },
1876                    )
1877                    .await?;
1878            }
1879            None => {
1880                // Copy parent node up if necessary.
1881                // trace!("do_link: no existing node found, creating new link");
1882                new_parent
1883                    .handle_upper_inode_locked(
1884                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1885                            let parent_real_inode = match parent_real_inode {
1886                                Some(inode) => inode,
1887                                None => {
1888                                    error!("BUG: parent doesn't have upper inode after copied up");
1889                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1890                                }
1891                            };
1892
1893                            parent_real_inode.link(ctx, src_ino, name).await?;
1894
1895                            Ok(false)
1896                        },
1897                    )
1898                    .await?;
1899
1900                // Points to the same node as src_node.
1901                new_parent.insert_child(name, Arc::clone(&src_node)).await;
1902            }
1903        }
1904
1905        src_node.nlink.fetch_add(1, Ordering::Relaxed);
1906        Ok(())
1907    }
1908
1909    async fn do_symlink(
1910        &self,
1911        ctx: Request,
1912        linkname: &str,
1913        parent_node: &Arc<OverlayInode>,
1914        name: &str,
1915    ) -> Result<()> {
1916        let name_os = OsStr::new(name);
1917        if self.upper_layer.is_none() {
1918            return Err(Error::from_raw_os_error(libc::EROFS));
1919        }
1920
1921        // parent was deleted.
1922        if parent_node.whiteout.load(Ordering::Relaxed) {
1923            return Err(Error::from_raw_os_error(libc::ENOENT));
1924        }
1925
1926        match self
1927            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1928            .await?
1929        {
1930            Some(n) => {
1931                // Node with same name exists, let's check if it's whiteout.
1932                if !n.whiteout.load(Ordering::Relaxed) {
1933                    return Err(Error::from_raw_os_error(libc::EEXIST));
1934                }
1935
1936                // Copy parent node up if necessary.
1937                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1938                pnode
1939                    .handle_upper_inode_locked(
1940                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1941                            let parent_real_inode = match parent_real_inode {
1942                                Some(inode) => inode,
1943                                None => {
1944                                    error!("BUG: parent doesn't have upper inode after copied up");
1945                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1946                                }
1947                            };
1948
1949                            if n.in_upper_layer().await {
1950                                let _ = parent_real_inode
1951                                    .layer
1952                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
1953                                    .await;
1954                            }
1955
1956                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
1957
1958                            // Replace existing real inodes with new one.
1959                            n.add_upper_inode(child_ri, true).await;
1960                            Ok(false)
1961                        },
1962                    )
1963                    .await?;
1964            }
1965            None => {
1966                // Copy parent node up if necessary.
1967                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1968                let new_node: Arc<Mutex<Option<OverlayInode>>> = Arc::new(Mutex::new(None));
1969                let path = format!("{}/{}", pnode.path.read().await, name);
1970                pnode
1971                    .handle_upper_inode_locked(
1972                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1973                            let parent_real_inode = match parent_real_inode {
1974                                Some(inode) => inode,
1975                                None => {
1976                                    error!("BUG: parent doesn't have upper inode after copied up");
1977                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1978                                }
1979                            };
1980
1981                            // Allocate inode number.
1982                            let ino = self.alloc_inode(&path).await?;
1983                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
1984                            let ovi = OverlayInode::new_from_real_inode(
1985                                name,
1986                                ino,
1987                                path.clone(),
1988                                child_ri,
1989                            )
1990                            .await;
1991
1992                            new_node.lock().await.replace(ovi);
1993                            Ok(false)
1994                        },
1995                    )
1996                    .await?;
1997
1998                // new_node is always 'Some'
1999                let arc_node = Arc::new(new_node.lock().await.take().unwrap());
2000                self.insert_inode(arc_node.inode, arc_node.clone()).await;
2001                pnode.insert_child(name, arc_node).await;
2002            }
2003        }
2004
2005        Ok(())
2006    }
2007
2008    async fn copy_symlink_up(
2009        &self,
2010        ctx: Request,
2011        node: Arc<OverlayInode>,
2012    ) -> Result<Arc<OverlayInode>> {
2013        if node.in_upper_layer().await {
2014            return Ok(node);
2015        }
2016
2017        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2018            Arc::clone(n)
2019        } else {
2020            return Err(Error::other("no parent?"));
2021        };
2022
2023        let (self_layer, _, self_inode) = node.first_layer_inode().await;
2024
2025        if !parent_node.in_upper_layer().await {
2026            parent_node.clone().create_upper_dir(ctx, None).await?;
2027        }
2028
2029        // Read the linkname from lower layer.
2030        let reply_data = self_layer.readlink(ctx, self_inode).await?;
2031        // Convert path to &str.
2032        let path = std::str::from_utf8(&reply_data.data)
2033            .map_err(|_| Error::from_raw_os_error(libc::EINVAL))?;
2034
2035        let new_upper_real: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
2036        parent_node
2037            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2038                // We already create upper dir for parent_node above.
2039                let parent_real_inode =
2040                    parent_upper_inode.ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2041                new_upper_real.lock().await.replace(
2042                    parent_real_inode
2043                        .symlink(ctx, path, &node.name.read().await)
2044                        .await?,
2045                );
2046                Ok(false)
2047            })
2048            .await?;
2049
2050        if let Some(real_inode) = new_upper_real.lock().await.take() {
2051            // update upper_inode and first_inode()
2052            node.add_upper_inode(real_inode, true).await;
2053        }
2054
2055        Ok(node)
2056    }
2057
2058    // Copy regular file from lower layer to upper layer.
2059    // Caller must ensure node doesn't have upper layer.
2060    async fn copy_regfile_up(
2061        &self,
2062        ctx: Request,
2063        node: Arc<OverlayInode>,
2064    ) -> Result<Arc<OverlayInode>> {
2065        if node.in_upper_layer().await {
2066            return Ok(node);
2067        }
2068        //error...
2069        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2070            Arc::clone(n)
2071        } else {
2072            return Err(Error::other("no parent?"));
2073        };
2074
2075        let st = node.stat64(ctx).await?;
2076        let (lower_layer, _, lower_inode) = node.first_layer_inode().await;
2077        trace!(
2078            "copy_regfile_up: node {} in lower layer's inode {}",
2079            node.inode, lower_inode
2080        );
2081
2082        if !parent_node.in_upper_layer().await {
2083            parent_node.clone().create_upper_dir(ctx, None).await?;
2084        }
2085
2086        // create the file in upper layer using information from lower layer
2087
2088        let flags = libc::O_WRONLY;
2089        let mode = mode_from_kind_and_perm(st.attr.kind, st.attr.perm);
2090
2091        let upper_handle = Arc::new(Mutex::new(0));
2092        let upper_real_inode = Arc::new(Mutex::new(None));
2093        parent_node
2094            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2095                // We already create upper dir for parent_node.
2096                let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2097                    error!("parent {} has no upper inode", parent_node.inode);
2098                    Error::from_raw_os_error(libc::EINVAL)
2099                })?;
2100                let (inode, h) = parent_real_inode
2101                    .create(
2102                        ctx,
2103                        &node.name.read().await,
2104                        mode,
2105                        flags.try_into().unwrap(),
2106                    )
2107                    .await?;
2108                trace!(
2109                    "copy_regfile_up: created upper file {} with inode {}",
2110                    node.name.read().await,
2111                    inode.inode
2112                );
2113                *upper_handle.lock().await = h.unwrap_or(0);
2114                upper_real_inode.lock().await.replace(inode);
2115                Ok(false)
2116            })
2117            .await?;
2118
2119        let rep = lower_layer
2120            .open(ctx, lower_inode, libc::O_RDONLY as u32)
2121            .await?;
2122
2123        let lower_handle = rep.fh;
2124
2125        // need to use work directory and then rename file to
2126        // final destination for atomic reasons.. not deal with it for now,
2127        // use stupid copy at present.
2128        // FIXME: this need a lot of work here, ntimes, xattr, etc.
2129
2130        // Copy from lower real inode to upper real inode.
2131        // TODO: use sendfile here.
2132
2133        let mut _offset: usize = 0;
2134        let size = 4 * 1024 * 1024;
2135
2136        let ret = lower_layer
2137            .read(ctx, lower_inode, lower_handle, _offset as u64, size)
2138            .await?;
2139
2140        _offset += ret.data.len();
2141
2142        // close handles
2143        lower_layer
2144            .release(ctx, lower_inode, lower_handle, 0, 0, true)
2145            .await?;
2146
2147        _offset = 0;
2148        let u_handle = *upper_handle.lock().await;
2149        let ri = upper_real_inode.lock().await.take();
2150        if let Some(ri) = ri {
2151            // loop {
2152            let res = ri
2153                .layer
2154                .write(ctx, ri.inode, u_handle, _offset as u64, &ret.data, 0, 0)
2155                .await?;
2156            //     if ret.written == 0 {
2157            //         break;
2158            //     }
2159            //     trace!("write {} bytes to upper layer", ret.written);
2160
2161            //     _offset += ret.written as usize;
2162            // }
2163            assert!(res.written as usize == ret.data.len());
2164            if let Err(e) = ri.layer.release(ctx, ri.inode, u_handle, 0, 0, true).await {
2165                let e: std::io::Error = e.into();
2166                // Ignore ENOSYS.
2167                if e.raw_os_error() != Some(libc::ENOSYS) {
2168                    return Err(e);
2169                }
2170            }
2171            node.add_upper_inode(ri, true).await;
2172        } else {
2173            error!("BUG: upper real inode is None after copy up");
2174        }
2175
2176        Ok(Arc::clone(&node))
2177    }
2178
2179    /// Copies the specified node to the upper layer of the filesystem
2180    ///
2181    /// Performs different operations based on the node type:
2182    /// - **Directory**: Creates a corresponding directory in the upper layer
2183    /// - **Symbolic link**: Recursively copies to the upper layer
2184    /// - **Regular file**: Copies file content to the upper layer
2185    ///
2186    /// # Parameters
2187    /// * `ctx`: FUSE request context
2188    /// * `node`: Reference to the node to be copied
2189    ///
2190    /// # Returns
2191    /// Returns a reference to the upper-layer node on success, or an error on failure
2192    async fn copy_node_up(
2193        &self,
2194        ctx: Request,
2195        node: Arc<OverlayInode>,
2196    ) -> Result<Arc<OverlayInode>> {
2197        if node.in_upper_layer().await {
2198            return Ok(node);
2199        }
2200
2201        let st = node.stat64(ctx).await?;
2202        match st.attr.kind {
2203            FileType::Directory => {
2204                node.clone().create_upper_dir(ctx, None).await?;
2205                Ok(node)
2206            }
2207            FileType::Symlink => {
2208                // For symlink.
2209                self.copy_symlink_up(ctx, node).await
2210            }
2211            FileType::RegularFile => {
2212                // For regular file.
2213                self.copy_regfile_up(ctx, node).await
2214            }
2215            _ => {
2216                // For other file types. return error.
2217                Err(Error::from_raw_os_error(libc::EINVAL))
2218            }
2219        }
2220    }
2221
2222    /// recursively copy directory and all its contents to upper layer
2223    async fn copy_directory_up(
2224        &self,
2225        ctx: Request,
2226        node: Arc<OverlayInode>,
2227    ) -> Result<Arc<OverlayInode>> {
2228        // Ensure the directory itself is copied up first
2229        self.copy_node_up(ctx, node.clone()).await?;
2230
2231        // load directory to cache
2232        self.load_directory(ctx, &node).await?;
2233
2234        // go through all children
2235        let children = node.childrens.lock().await.clone();
2236        for (_name, child) in children.iter() {
2237            if _name == "." || _name == ".." {
2238                continue;
2239            }
2240            // jump over whiteout
2241            if child.whiteout.load(Ordering::Relaxed) {
2242                continue;
2243            }
2244            let st = child.stat64(ctx).await?;
2245            if !child.in_upper_layer().await {
2246                match st.attr.kind {
2247                    FileType::Directory => {
2248                        // recursively copy subdirectory
2249                        Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2250                    }
2251                    FileType::Symlink | FileType::RegularFile => {
2252                        // copy node up symlink or regular file
2253                        Box::pin(self.copy_node_up(ctx, child.clone())).await?;
2254                    }
2255                    _ => {
2256                        // other file types are ignored
2257                    }
2258                }
2259            } else if utils::is_dir(&st.attr.kind) {
2260                // If it is already in the upper layer, but the directory is not loaded,
2261                // ensure that its contents are also copied up recursively.
2262                Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2263            }
2264        }
2265
2266        Ok(node)
2267    }
2268
2269    async fn do_rm(&self, ctx: Request, parent: u64, name: &OsStr, dir: bool) -> Result<()> {
2270        // 1. Read-only mount guard
2271        if self.upper_layer.is_none() {
2272            return Err(Error::from_raw_os_error(libc::EROFS));
2273        }
2274
2275        // 2. Locate the parent Overlay Inode.
2276        // Find parent Overlay Inode.
2277        let pnode = self.lookup_node(ctx, parent, "").await?;
2278        if pnode.whiteout.load(Ordering::Relaxed) {
2279            return Err(Error::from_raw_os_error(libc::ENOENT));
2280        }
2281        let to_name = name.to_str().unwrap();
2282
2283        // 3. Locate the child Overlay Inode for the given name
2284        // Find the Overlay Inode for child with <name>.
2285        let node = self.lookup_node(ctx, parent, to_name).await?;
2286        if node.whiteout.load(Ordering::Relaxed) {
2287            // already deleted.
2288            return Err(Error::from_raw_os_error(libc::ENOENT));
2289        }
2290
2291        // 4. If removing a directory, ensure it is empty of real entries
2292        if dir {
2293            self.load_directory(ctx, &node).await?;
2294            let (count, whiteouts) = node.count_entries_and_whiteout(ctx).await?;
2295            trace!("entries: {count}, whiteouts: {whiteouts}\n");
2296            if count > 0 {
2297                return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
2298            }
2299
2300            // Delete all whiteouts.
2301            if whiteouts > 0 && node.in_upper_layer().await {
2302                self.empty_node_directory(ctx, Arc::clone(&node)).await?;
2303            }
2304
2305            trace!("whiteouts deleted!\n");
2306        }
2307
2308        // 5. Decide whether we need to create a whiteout entry
2309        // We'll filp this off if upper-layer unlink suffices or parent is opaque
2310        let need_whiteout = AtomicBool::new(true);
2311        let pnode = self.copy_node_up(ctx, Arc::clone(&pnode)).await?;
2312
2313        if node.upper_layer_only().await {
2314            need_whiteout.store(false, Ordering::Relaxed);
2315        }
2316
2317        // 6. Decrement the FUSE lookup (open handle) counter on this node
2318        // lookups decrease by 1.
2319        let origin = node.lookups.fetch_sub(1, Ordering::Relaxed);
2320        trace!(
2321            "overlay:do_rm: node {} sub lookups to {}",
2322            node.inode,
2323            origin - 1
2324        );
2325        let mut df = |parent_upper_inode: Option<Arc<RealInode>>| async {
2326            let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2327                error!(
2328                    "BUG: parent {} has no upper inode after copy up",
2329                    pnode.inode
2330                );
2331                Error::from_raw_os_error(libc::EINVAL)
2332            })?;
2333
2334            // Parent is opaque, it shadows everything in lower layers so no need to create extra whiteouts.
2335            if parent_real_inode.opaque {
2336                need_whiteout.store(false, Ordering::Relaxed);
2337            }
2338            if dir {
2339                parent_real_inode
2340                    .layer
2341                    .rmdir(ctx, parent_real_inode.inode, name)
2342                    .await?;
2343            } else {
2344                parent_real_inode
2345                    .layer
2346                    .unlink(ctx, parent_real_inode.inode, name)
2347                    .await?;
2348            }
2349
2350            Ok(false)
2351        };
2352
2353        // 7. Perform the unlink/rmdir operation if the node exists in upper layer
2354        // decrement hardlink count
2355        let prev_nlink = node.nlink.fetch_sub(1, Ordering::Relaxed);
2356        let remaining_links = prev_nlink.saturating_sub(1);
2357        let open_handles = node.lookups.load(Ordering::Relaxed);
2358        if node.in_upper_layer().await {
2359            // run upper-layer remove + maybe remove overlay inode + remove child
2360            let hh = pnode.handle_upper_inode_locked(&mut df);
2361            let (res, _, _) = tokio::join!(
2362                hh,
2363                async {
2364                    if remaining_links == 0 && open_handles == 0 {
2365                        self.remove_inode(node.inode, Some(node.path.read().await.clone()))
2366                            .await
2367                            .unwrap();
2368                    } else {
2369                        trace!(
2370                            "in upper layer, defer remove_inode {}, links={}, open_handles={}",
2371                            node.inode, remaining_links, open_handles
2372                        );
2373                    }
2374                    Ok::<(), Error>(())
2375                },
2376                pnode.remove_child(name.to_str().unwrap()),
2377            );
2378            res?;
2379        } else {
2380            // no upper layer entry: only lower layer, so no real fn unlink here
2381            let _ = tokio::join!(
2382                async {
2383                    if remaining_links == 0 && open_handles == 0 {
2384                        self.remove_inode(node.inode, None).await.unwrap();
2385                    } else {
2386                        trace!(
2387                            "not in upper layer, defer remove_inode {}, links={}, open_handles={}",
2388                            node.inode, remaining_links, open_handles
2389                        );
2390                    }
2391                    Ok::<(), Error>(())
2392                },
2393                pnode.remove_child(name.to_str().unwrap()),
2394            );
2395        }
2396
2397        // 8. If needed, create a entry in the upper layer to mask lower-layer files
2398        if need_whiteout.load(Ordering::Relaxed) {
2399            trace!("do_rm: creating whiteout\n");
2400            // pnode is copied up, so it has upper layer.
2401            pnode
2402                .handle_upper_inode_locked(
2403                    &mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2404                        let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2405                            error!(
2406                                "BUG: parent {} has no upper inode after copy up",
2407                                pnode.inode
2408                            );
2409                            Error::from_raw_os_error(libc::EINVAL)
2410                        })?;
2411
2412                        let child_ri = parent_real_inode.create_whiteout(ctx, to_name).await?; //FIXME..............
2413                        let path = format!("{}/{}", pnode.path.read().await, to_name);
2414                        let ino: u64 = self.alloc_inode(&path).await?;
2415                        let ovi = Arc::new(
2416                            OverlayInode::new_from_real_inode(to_name, ino, path.clone(), child_ri)
2417                                .await,
2418                        );
2419
2420                        self.insert_inode(ino, ovi.clone()).await;
2421                        pnode.insert_child(to_name, ovi.clone()).await;
2422                        Ok(false)
2423                    },
2424                )
2425                .await?;
2426        }
2427
2428        Ok(())
2429    }
2430
2431    async fn do_fsync(
2432        &self,
2433        ctx: Request,
2434        inode: Inode,
2435        datasync: bool,
2436        handle: Handle,
2437        syncdir: bool,
2438    ) -> Result<()> {
2439        // Use O_RDONLY flags which indicates no copy up.
2440        let data = self
2441            .get_data(ctx, Some(handle), inode, libc::O_RDONLY as u32)
2442            .await?;
2443
2444        trace!("do_fsync: got data for handle: {handle}, inode:{inode}");
2445
2446        match data.real_handle {
2447            // FIXME: need to test if inode matches corresponding handle?
2448            None => {
2449                trace!("do_fsync: no real handle found for handle: {handle}, inode:{inode}");
2450                Err(Error::from_raw_os_error(libc::ENOENT))
2451            }
2452            Some(ref rh) => {
2453                let real_handle = rh.handle.load(Ordering::Relaxed);
2454                // TODO: check if it's in upper layer? @weizhang555
2455                if syncdir {
2456                    trace!(
2457                        "do_fsync: layer.fsyncdir called for handle: {}, inode:{}; rh.inode: {}, real_handle: {}",
2458                        handle, inode, rh.inode, real_handle
2459                    );
2460                    rh.layer
2461                        .fsyncdir(ctx, rh.inode, real_handle, datasync)
2462                        .await
2463                        .map_err(|e| e.into())
2464                } else {
2465                    rh.layer
2466                        .fsync(ctx, rh.inode, real_handle, datasync)
2467                        .await
2468                        .map_err(|e| e.into())
2469                }
2470            }
2471        }
2472    }
2473
2474    // Delete everything in the directory only on upper layer, ignore lower layers.
2475    async fn empty_node_directory(&self, ctx: Request, node: Arc<OverlayInode>) -> Result<()> {
2476        let st = node.stat64(ctx).await?;
2477        if !utils::is_dir(&st.attr.kind) {
2478            // This function can only be called on directories.
2479            return Err(Error::from_raw_os_error(libc::ENOTDIR));
2480        }
2481
2482        let (layer, in_upper, inode) = node.first_layer_inode().await;
2483        if !in_upper {
2484            return Ok(());
2485        }
2486
2487        // Copy node.childrens Hashmap to Vector, the Vector is also used as temp storage,
2488        // Without this, Rust won't allow us to remove them from node.childrens.
2489        let iter = node
2490            .childrens
2491            .lock()
2492            .await
2493            .values()
2494            .cloned()
2495            .collect::<Vec<_>>();
2496
2497        for child in iter {
2498            // We only care about upper layer, ignore lower layers.
2499            if child.in_upper_layer().await {
2500                let child_name = child.name.read().await.clone();
2501                let child_name_os = OsStr::new(&child_name);
2502                if child.whiteout.load(Ordering::Relaxed) {
2503                    layer.delete_whiteout(ctx, inode, child_name_os).await?
2504                } else {
2505                    let s = child.stat64(ctx).await?;
2506                    let cname: &OsStr = OsStr::new(&child_name_os);
2507                    if utils::is_dir(&s.attr.kind) {
2508                        let (count, whiteouts) = child.count_entries_and_whiteout(ctx).await?;
2509                        if count + whiteouts > 0 {
2510                            let cb = child.clone();
2511                            Box::pin(async move { self.empty_node_directory(ctx, cb).await })
2512                                .await?;
2513                        }
2514                        layer.rmdir(ctx, inode, cname).await?
2515                    } else {
2516                        layer.unlink(ctx, inode, cname).await?;
2517                    }
2518                }
2519
2520                let cpath = child.path.read().await.clone();
2521                // delete the child
2522                self.remove_inode(child.inode, Some(cpath)).await;
2523                node.remove_child(&child_name).await;
2524            }
2525        }
2526
2527        Ok(())
2528    }
2529
2530    async fn find_real_info_from_handle(
2531        &self,
2532        handle: Handle,
2533    ) -> Result<(Arc<BoxedLayer>, Inode, Handle)> {
2534        match self.handles.lock().await.get(&handle) {
2535            Some(h) => match h.real_handle {
2536                Some(ref rhd) => {
2537                    trace!(
2538                        "find_real_info_from_handle: layer in upper: {}",
2539                        rhd.in_upper_layer
2540                    );
2541                    Ok((
2542                        rhd.layer.clone(),
2543                        rhd.inode,
2544                        rhd.handle.load(Ordering::Relaxed),
2545                    ))
2546                }
2547                None => Err(Error::from_raw_os_error(libc::ENOENT)),
2548            },
2549
2550            None => Err(Error::from_raw_os_error(libc::ENOENT)),
2551        }
2552    }
2553
2554    async fn find_real_inode(&self, inode: Inode) -> Result<(Arc<BoxedLayer>, Inode)> {
2555        if let Some(n) = self.get_active_inode(inode).await {
2556            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2557            return Ok((first_layer, first_inode));
2558        } else if let Some(n) = self.get_all_inode(inode).await {
2559            trace!("find_real_inode: found inode by get_all_inode: {}", n.inode);
2560            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2561            return Ok((first_layer, first_inode));
2562        }
2563
2564        Err(Error::from_raw_os_error(libc::ENOENT))
2565    }
2566
2567    async fn get_data(
2568        &self,
2569        ctx: Request,
2570        handle: Option<Handle>,
2571        inode: Inode,
2572        flags: u32,
2573    ) -> Result<Arc<HandleData>> {
2574        let no_open = self.no_open.load(Ordering::Relaxed);
2575        if !no_open {
2576            if let Some(h) = handle {
2577                if let Some(v) = self.handles.lock().await.get(&h) {
2578                    if v.node.inode == inode {
2579                        // trace!("get_data: found handle");
2580                        return Ok(Arc::clone(v));
2581                    }
2582                }
2583            }
2584        } else {
2585            let readonly: bool = flags
2586                & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
2587                    as u32
2588                == 0;
2589
2590            // lookup node
2591            let node = self.lookup_node(ctx, inode, "").await?;
2592
2593            // whiteout node
2594            if node.whiteout.load(Ordering::Relaxed) {
2595                return Err(Error::from_raw_os_error(libc::ENOENT));
2596            }
2597
2598            if !readonly {
2599                // Check if upper layer exists, return EROFS is not exists.
2600                self.upper_layer
2601                    .as_ref()
2602                    .cloned()
2603                    .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2604                // copy up to upper layer
2605                self.copy_node_up(ctx, Arc::clone(&node)).await?;
2606            }
2607
2608            let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
2609            let handle_data = HandleData {
2610                node: Arc::clone(&node),
2611                real_handle: Some(RealHandle {
2612                    layer,
2613                    in_upper_layer,
2614                    inode,
2615                    handle: AtomicU64::new(0),
2616                }),
2617            };
2618            return Ok(Arc::new(handle_data));
2619        }
2620
2621        Err(Error::from_raw_os_error(libc::ENOENT))
2622    }
2623
2624    // extend or init the inodes number to one overlay if the current number is done.
2625    pub async fn extend_inode_alloc(&self, key: u64) {
2626        let next_inode = key * INODE_ALLOC_BATCH;
2627        let limit_inode = next_inode + INODE_ALLOC_BATCH - 1;
2628        self.inodes
2629            .write()
2630            .await
2631            .extend_inode_number(next_inode, limit_inode);
2632    }
2633}
2634
2635/// Mounts the filesystem using the given parameters and returns the mount handle.
2636///
2637/// # Parameters
2638/// - `mountpoint`: Path to the mount point.
2639/// - `upperdir`: Path to the upper directory.
2640/// - `lowerdir`: Paths to the lower directories.
2641/// - `not_unprivileged`: If true, use privileged mount; otherwise, unprivileged mount.
2642///
2643/// # Returns
2644/// A mount handle on success.
2645pub async fn mount_fs(
2646    mountpoint: String,
2647    upperdir: String,
2648    lowerdir: Vec<String>,
2649    not_unprivileged: bool,
2650) -> rfuse3::raw::MountHandle {
2651    // Create lower layers
2652    let mut lower_layers = Vec::new();
2653    for lower in &lowerdir {
2654        let layer = new_passthroughfs_layer(lower)
2655            .await
2656            .expect("Failed to create lower filesystem layer");
2657        lower_layers.push(Arc::new(layer));
2658    }
2659    // Create upper layer
2660    let upper_layer = Arc::new(
2661        new_passthroughfs_layer(&upperdir)
2662            .await
2663            .expect("Failed to create upper filesystem layer"),
2664    );
2665
2666    // Configure overlay filesystem
2667    let config = Config {
2668        mountpoint: mountpoint.clone(),
2669        do_import: true,
2670        ..Default::default()
2671    };
2672    let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1)
2673        .expect("Failed to initialize OverlayFs");
2674    let logfs = LoggingFileSystem::new(overlayfs);
2675
2676    let mount_path: OsString = OsString::from(mountpoint);
2677
2678    // Obtain the current user's uid and gid
2679    let uid = unsafe { libc::getuid() };
2680    let gid = unsafe { libc::getgid() };
2681
2682    let mut mount_options = MountOptions::default();
2683    mount_options.force_readdir_plus(true).uid(uid).gid(gid);
2684
2685    // Mount filesystem based on privilege flag and return the mount handle
2686    if !not_unprivileged {
2687        println!("Mounting with unprivileged mode");
2688        Session::new(mount_options)
2689            .mount_with_unprivileged(logfs, mount_path)
2690            .await
2691            .expect("Unprivileged mount failed")
2692    } else {
2693        println!("Mounting with privileged mode");
2694        Session::new(mount_options)
2695            .mount(logfs, mount_path)
2696            .await
2697            .expect("Privileged mount failed")
2698    }
2699}