libfuse_fs/overlayfs/
mod.rs

1// Copyright (C) 2023 Ant Group. All rights reserved.
2//  2024 From [fuse_backend_rs](https://github.com/cloud-hypervisor/fuse-backend-rs)
3// SPDX-License-Identifier: Apache-2.0
4
5#![allow(missing_docs)]
6mod async_io;
7pub mod config;
8mod inode_store;
9mod layer;
10mod utils;
11
12//mod tempfile;
13use core::panic;
14use std::collections::HashMap;
15use std::ffi::{OsStr, OsString};
16use std::future::Future;
17use std::io::{Error, Result};
18
19use config::Config;
20use futures::StreamExt as _;
21use rfuse3::raw::reply::{
22    DirectoryEntry, DirectoryEntryPlus, ReplyAttr, ReplyEntry, ReplyOpen, ReplyStatFs,
23};
24use rfuse3::raw::{Filesystem, Request, Session};
25use std::sync::{Arc, Weak};
26
27use rfuse3::{Errno, FileType, MountOptions, mode_from_kind_and_perm};
28const SLASH_ASCII: char = '/';
29use futures::future::join_all;
30use futures::stream::iter;
31
32use crate::passthrough::newlogfs::LoggingFileSystem;
33use crate::passthrough::{PassthroughFs, new_passthroughfs_layer};
34use inode_store::InodeStore;
35use layer::Layer;
36use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
37
38use tokio::sync::{Mutex, RwLock};
39
40pub type Inode = u64;
41pub type Handle = u64;
42
43type BoxedLayer = PassthroughFs;
44//type BoxedFileSystem = Box<dyn FileSystem<Inode = Inode, Handle = Handle> + Send + Sync>;
45const INODE_ALLOC_BATCH: u64 = 0x1_0000_0000;
46// RealInode represents one inode object in specific layer.
47// Also, each RealInode maps to one Entry, which should be 'forgotten' after drop.
48// Important note: do not impl Clone trait for it or refcount will be messed up.
49pub(crate) struct RealInode {
50    pub layer: Arc<PassthroughFs>,
51    pub in_upper_layer: bool,
52    pub inode: u64,
53    // File is whiteouted, we need to hide it.
54    pub whiteout: bool,
55    // Directory is opaque, we need to hide all entries inside it.
56    pub opaque: bool,
57    pub stat: Option<ReplyAttr>,
58}
59
60// OverlayInode must be protected by lock, it can be operated by multiple threads.
61#[derive(Default)]
62pub(crate) struct OverlayInode {
63    // Inode hash table, map from 'name' to 'OverlayInode'.
64    pub childrens: Mutex<HashMap<String, Arc<OverlayInode>>>,
65    pub parent: Mutex<Weak<OverlayInode>>,
66    // Backend inodes from all layers.
67    pub real_inodes: Mutex<Vec<Arc<RealInode>>>,
68    // Inode number.
69    pub inode: u64,
70    pub path: RwLock<String>,
71    pub name: RwLock<String>,
72    pub lookups: AtomicU64,
73    // Node is whiteout-ed.
74    pub whiteout: AtomicBool,
75    // Directory is loaded.
76    pub loaded: AtomicBool,
77}
78
79#[derive(Default)]
80pub enum CachePolicy {
81    Never,
82    #[default]
83    Auto,
84    Always,
85}
86pub struct OverlayFs {
87    config: Config,
88    lower_layers: Vec<Arc<PassthroughFs>>,
89    upper_layer: Option<Arc<PassthroughFs>>,
90    // All inodes in FS.
91    inodes: RwLock<InodeStore>,
92    // Open file handles.
93    handles: Mutex<HashMap<u64, Arc<HandleData>>>,
94    next_handle: AtomicU64,
95    writeback: AtomicBool,
96    no_open: AtomicBool,
97    no_opendir: AtomicBool,
98    killpriv_v2: AtomicBool,
99    perfile_dax: AtomicBool,
100    root_inodes: u64,
101}
102
103// This is a wrapper of one inode in specific layer, It can't impl Clone trait.
104struct RealHandle {
105    layer: Arc<PassthroughFs>,
106    in_upper_layer: bool,
107    inode: u64,
108    handle: AtomicU64,
109}
110
111struct HandleData {
112    node: Arc<OverlayInode>,
113    //offset: libc::off_t,
114    real_handle: Option<RealHandle>,
115}
116
117// RealInode is a wrapper of one inode in specific layer.
118// All layer operations returning Entry should be wrapped in RealInode implementation
119// so that we can increase the refcount(lookup count) of each inode and decrease it after Drop.
120// Important: do not impl 'Copy' trait for it or refcount will be messed up.
121impl RealInode {
122    async fn new(
123        layer: Arc<PassthroughFs>,
124        in_upper_layer: bool,
125        inode: u64,
126        whiteout: bool,
127        opaque: bool,
128    ) -> Self {
129        let mut ri = RealInode {
130            layer,
131            in_upper_layer,
132            inode,
133            whiteout,
134            opaque,
135            stat: None,
136        };
137        match ri.stat64_ignore_enoent(&Request::default()).await {
138            Ok(v) => {
139                ri.stat = v;
140            }
141            Err(e) => {
142                error!("stat64 failed during RealInode creation: {}", e);
143            }
144        }
145        ri
146    }
147
148    async fn stat64(&self, req: &Request) -> Result<ReplyAttr> {
149        let layer = self.layer.as_ref();
150        if self.inode == 0 {
151            return Err(Error::from_raw_os_error(libc::ENOENT));
152        }
153        layer
154            .getattr(*req, self.inode, None, 0)
155            .await
156            .map_err(|e| e.into())
157    }
158
159    async fn stat64_ignore_enoent(&self, req: &Request) -> Result<Option<ReplyAttr>> {
160        match self.stat64(req).await {
161            Ok(v1) => Ok(Some(v1)),
162            Err(e) => match e.raw_os_error() {
163                Some(raw_error) => {
164                    if raw_error != libc::ENOENT || raw_error != libc::ENAMETOOLONG {
165                        return Ok(None);
166                    }
167                    Err(e)
168                }
169                None => Err(e),
170            },
171        }
172    }
173
174    // Do real lookup action in specific layer, this call will increase Entry refcount which must be released later.
175    async fn lookup_child_ignore_enoent(
176        &self,
177        ctx: Request,
178        name: &str,
179    ) -> Result<Option<ReplyEntry>> {
180        let cname = OsStr::new(name);
181        // Real inode must have a layer.
182        let layer = self.layer.as_ref();
183        match layer.lookup(ctx, self.inode, cname).await {
184            Ok(v) => {
185                // Negative entry also indicates missing entry.
186                if v.attr.ino == 0 {
187                    return Ok(None);
188                }
189                Ok(Some(v))
190            }
191            Err(e) => {
192                let ioerror: std::io::Error = e.into();
193                if let Some(raw_error) = ioerror.raw_os_error() {
194                    if raw_error == libc::ENOENT || raw_error == libc::ENAMETOOLONG {
195                        return Ok(None);
196                    }
197                }
198
199                Err(e.into())
200            }
201        }
202    }
203
204    // Find child inode in same layer under this directory(Self).
205    // Return None if not found.
206    async fn lookup_child(&self, ctx: Request, name: &str) -> Result<Option<RealInode>> {
207        if self.whiteout {
208            return Ok(None);
209        }
210
211        let layer = self.layer.as_ref();
212
213        // Find child Entry with <name> under directory with inode <self.inode>.
214        match self.lookup_child_ignore_enoent(ctx, name).await? {
215            Some(v) => {
216                // The Entry must be forgotten in each layer, which will be done automatically by Drop operation.
217                let (whiteout, opaque) = if v.attr.kind == FileType::Directory {
218                    (false, layer.is_opaque(ctx, v.attr.ino).await?)
219                } else {
220                    (layer.is_whiteout(ctx, v.attr.ino).await?, false)
221                };
222
223                Ok(Some(RealInode {
224                    layer: self.layer.clone(),
225                    in_upper_layer: self.in_upper_layer,
226                    inode: v.attr.ino,
227                    whiteout,
228                    opaque,
229                    stat: Some(ReplyAttr {
230                        ttl: v.ttl,
231                        attr: v.attr,
232                    }),
233                }))
234            }
235            None => Ok(None),
236        }
237    }
238
239    // Read directory entries from specific RealInode, error out if it's not directory.
240    async fn readdir(&self, ctx: Request) -> Result<HashMap<String, RealInode>> {
241        // Deleted inode should not be read.
242        if self.whiteout {
243            return Err(Error::from_raw_os_error(libc::ENOENT));
244        }
245
246        let stat = match self.stat.clone() {
247            Some(v) => v,
248            None => self.stat64(&ctx).await?,
249        };
250
251        // Must be directory.
252        if stat.attr.kind != FileType::Directory {
253            return Err(Error::from_raw_os_error(libc::ENOTDIR));
254        }
255
256        // Open the directory and load each entry.
257        let opendir_res = self
258            .layer
259            .opendir(ctx, self.inode, libc::O_RDONLY as u32)
260            .await;
261        let handle = match opendir_res {
262            Ok(handle) => handle,
263
264            // opendir may not be supported if no_opendir is set, so we can ignore this error.
265            Err(e) => {
266                let ioerror: std::io::Error = e.into();
267                match ioerror.raw_os_error() {
268                    Some(raw_error) => {
269                        if raw_error == libc::ENOSYS {
270                            // We can still call readdir with inode if opendir is not supported in this layer.
271                            ReplyOpen { fh: 0, flags: 0 }
272                        } else {
273                            return Err(e.into());
274                        }
275                    }
276                    None => {
277                        return Err(e.into());
278                    }
279                }
280            }
281        };
282
283        let child_names = self.layer.readdir(ctx, self.inode, handle.fh, 0).await?;
284        // Non-zero handle indicates successful 'open', we should 'release' it.
285        if handle.fh > 0 {
286            self.layer
287                .releasedir(ctx, self.inode, handle.fh, handle.flags)
288                .await?
289            //DIFF
290        }
291
292        // Lookup all child and construct "RealInode"s.
293        let child_real_inodes = Arc::new(Mutex::new(HashMap::new()));
294
295        let a_map = child_names.entries.map(|entery| async {
296            match entery {
297                Ok(dire) => {
298                    let dname = dire.name.into_string().unwrap();
299                    if dname == "." || dname == ".." {
300                        // Skip . and .. entries.
301                        return Ok(());
302                    }
303                    if let Some(child) = self.lookup_child(ctx, &dname).await? {
304                        child_real_inodes.lock().await.insert(dname, child);
305                    }
306                    Ok(())
307                }
308                Err(err) => Err(err),
309            }
310        });
311        let k = join_all(a_map.collect::<Vec<_>>().await).await;
312        drop(k);
313        // Now into_inner func is safety.
314        let re = Arc::try_unwrap(child_real_inodes)
315            .map_err(|_| Errno::new_not_exist())?
316            .into_inner();
317
318        Ok(re)
319    }
320
321    async fn create_whiteout(&self, ctx: Request, name: &str) -> Result<RealInode> {
322        if !self.in_upper_layer {
323            return Err(Error::from_raw_os_error(libc::EROFS));
324        }
325
326        // from &str to &OsStr
327        let name_osstr = OsStr::new(name);
328        let entry = self
329            .layer
330            .create_whiteout(ctx, self.inode, name_osstr)
331            .await?;
332
333        // Wrap whiteout to RealInode.
334        Ok(RealInode {
335            layer: self.layer.clone(),
336            in_upper_layer: true,
337            inode: entry.attr.ino,
338            whiteout: true,
339            opaque: false,
340            stat: Some(ReplyAttr {
341                ttl: entry.ttl,
342                attr: entry.attr,
343            }),
344        })
345    }
346
347    async fn mkdir(&self, ctx: Request, name: &str, mode: u32, umask: u32) -> Result<RealInode> {
348        if !self.in_upper_layer {
349            return Err(Error::from_raw_os_error(libc::EROFS));
350        }
351
352        let name_osstr = OsStr::new(name);
353        let entry = self
354            .layer
355            .mkdir(ctx, self.inode, name_osstr, mode, umask)
356            .await?;
357
358        // update node's first_layer
359        Ok(RealInode {
360            layer: self.layer.clone(),
361            in_upper_layer: true,
362            inode: entry.attr.ino,
363            whiteout: false,
364            opaque: false,
365            stat: Some(ReplyAttr {
366                ttl: entry.ttl,
367                attr: entry.attr,
368            }),
369        })
370    }
371
372    async fn create(
373        &self,
374        ctx: Request,
375        name: &str,
376        mode: u32,
377        flags: u32,
378    ) -> Result<(RealInode, Option<u64>)> {
379        if !self.in_upper_layer {
380            return Err(Error::from_raw_os_error(libc::EROFS));
381        }
382        let name = OsStr::new(name);
383        let create_rep = self
384            .layer
385            .create(ctx, self.inode, name, mode, flags)
386            .await?;
387
388        Ok((
389            RealInode {
390                layer: self.layer.clone(),
391                in_upper_layer: true,
392                inode: create_rep.attr.ino,
393                whiteout: false,
394                opaque: false,
395                stat: Some(ReplyAttr {
396                    ttl: create_rep.ttl,
397                    attr: create_rep.attr,
398                }),
399            },
400            Some(create_rep.fh),
401        ))
402    }
403
404    async fn mknod(
405        &self,
406        ctx: Request,
407        name: &str,
408        mode: u32,
409        rdev: u32,
410        _umask: u32,
411    ) -> Result<RealInode> {
412        if !self.in_upper_layer {
413            return Err(Error::from_raw_os_error(libc::EROFS));
414        }
415        let name = OsStr::new(name);
416        let rep = self.layer.mknod(ctx, self.inode, name, mode, rdev).await?;
417        Ok(RealInode {
418            layer: self.layer.clone(),
419            in_upper_layer: true,
420            inode: rep.attr.ino,
421            whiteout: false,
422            opaque: false,
423            stat: Some(ReplyAttr {
424                ttl: rep.ttl,
425                attr: rep.attr,
426            }),
427        })
428    }
429
430    async fn link(&self, ctx: Request, ino: u64, name: &str) -> Result<RealInode> {
431        if !self.in_upper_layer {
432            return Err(Error::from_raw_os_error(libc::EROFS));
433        }
434        let name = OsStr::new(name);
435        let entry = self.layer.link(ctx, ino, self.inode, name).await?;
436
437        let opaque = if utils::is_dir(&entry.attr.kind) {
438            self.layer.is_opaque(ctx, entry.attr.ino).await?
439        } else {
440            false
441        };
442        Ok(RealInode {
443            layer: self.layer.clone(),
444            in_upper_layer: true,
445            inode: entry.attr.ino,
446            whiteout: false,
447            opaque,
448            stat: Some(ReplyAttr {
449                ttl: entry.ttl,
450                attr: entry.attr,
451            }),
452        })
453    }
454
455    // Create a symlink in self directory.
456    async fn symlink(&self, ctx: Request, link_name: &str, filename: &str) -> Result<RealInode> {
457        if !self.in_upper_layer {
458            return Err(Error::from_raw_os_error(libc::EROFS));
459        }
460        let link_name = OsStr::new(link_name);
461        let filename = OsStr::new(filename);
462        let entry = self
463            .layer
464            .symlink(ctx, self.inode, filename, link_name)
465            .await?;
466
467        Ok(RealInode {
468            layer: self.layer.clone(),
469            in_upper_layer: true,
470            inode: entry.attr.ino,
471            whiteout: false,
472            opaque: false,
473            stat: Some(ReplyAttr {
474                ttl: entry.ttl,
475                attr: entry.attr,
476            }),
477        })
478    }
479}
480
481impl Drop for RealInode {
482    fn drop(&mut self) {
483        let layer = Arc::clone(&self.layer);
484        let inode = self.inode;
485        tokio::spawn(async move {
486            let ctx = Request::default();
487            layer.forget(ctx, inode, 1).await;
488        });
489    }
490}
491
492impl OverlayInode {
493    pub fn new() -> Self {
494        OverlayInode::default()
495    }
496    // Allocate new OverlayInode based on one RealInode,
497    // inode number is always 0 since only OverlayFs has global unique inode allocator.
498    pub async fn new_from_real_inode(
499        name: &str,
500        ino: u64,
501        path: String,
502        real_inode: RealInode,
503    ) -> Self {
504        let mut new = OverlayInode::new();
505        new.inode = ino;
506        new.path = path.into();
507        new.name = name.to_string().into();
508        new.whiteout.store(real_inode.whiteout, Ordering::Relaxed);
509        new.lookups = AtomicU64::new(1);
510        new.real_inodes = Mutex::new(vec![real_inode.into()]);
511        new
512    }
513
514    pub async fn new_from_real_inodes(
515        name: &str,
516        ino: u64,
517        path: String,
518        real_inodes: Vec<RealInode>,
519    ) -> Result<Self> {
520        if real_inodes.is_empty() {
521            error!("BUG: new_from_real_inodes() called with empty real_inodes");
522            return Err(Error::from_raw_os_error(libc::EINVAL));
523        }
524
525        let mut first = true;
526        let mut new = Self::new();
527        for ri in real_inodes {
528            let whiteout = ri.whiteout;
529            let opaque = ri.opaque;
530            let stat = match &ri.stat {
531                Some(v) => v.clone(),
532                None => ri.stat64(&Request::default()).await?,
533            };
534
535            if first {
536                first = false;
537                new = Self::new_from_real_inode(name, ino, path.clone(), ri).await;
538
539                // This is whiteout, no need to check lower layers.
540                if whiteout {
541                    break;
542                }
543
544                // A non-directory file shadows all lower layers as default.
545                if !utils::is_dir(&stat.attr.kind) {
546                    break;
547                }
548
549                // Opaque directory shadows all lower layers.
550                if opaque {
551                    break;
552                }
553            } else {
554                // This is whiteout, no need to record this, break directly.
555                if ri.whiteout {
556                    break;
557                }
558
559                // Only directory have multiple real inodes, so if this is non-first real-inode
560                // and it's not directory, it should indicates some invalid layout. @weizhang555
561                if !utils::is_dir(&stat.attr.kind) {
562                    error!("invalid layout: non-directory has multiple real inodes");
563                    break;
564                }
565
566                // Valid directory.
567                new.real_inodes.lock().await.push(ri.into());
568                // Opaque directory shadows all lower layers.
569                if opaque {
570                    break;
571                }
572            }
573        }
574        Ok(new)
575    }
576
577    pub async fn stat64(&self, ctx: Request) -> Result<ReplyAttr> {
578        // try layers in order or just take stat from first layer?
579        for l in self.real_inodes.lock().await.iter() {
580            if let Some(v) = l.stat64_ignore_enoent(&ctx).await? {
581                return Ok(v);
582            }
583        }
584
585        // not in any layer
586        Err(Error::from_raw_os_error(libc::ENOENT))
587    }
588
589    pub async fn is_dir(&self, ctx: Request) -> Result<bool> {
590        let st = self.stat64(ctx).await?;
591        Ok(utils::is_dir(&st.attr.kind))
592    }
593
594    pub async fn count_entries_and_whiteout(&self, ctx: Request) -> Result<(u64, u64)> {
595        let mut count = 0;
596        let mut whiteouts = 0;
597
598        let st = self.stat64(ctx).await?;
599
600        // must be directory
601        if !utils::is_dir(&st.attr.kind) {
602            return Err(Error::from_raw_os_error(libc::ENOTDIR));
603        }
604
605        for (_, child) in self.childrens.lock().await.iter() {
606            if child.whiteout.load(Ordering::Relaxed) {
607                whiteouts += 1;
608            } else {
609                count += 1;
610            }
611        }
612        Ok((count, whiteouts))
613    }
614
615    pub async fn open(
616        &self,
617        ctx: Request,
618        flags: u32,
619        _fuse_flags: u32,
620    ) -> Result<(Arc<BoxedLayer>, ReplyOpen)> {
621        let (layer, _, inode) = self.first_layer_inode().await;
622        let ro = layer.as_ref().open(ctx, inode, flags).await?;
623        Ok((layer, ro))
624    }
625
626    // Self is directory, fill all childrens.
627    pub async fn scan_childrens(self: &Arc<Self>, ctx: Request) -> Result<Vec<OverlayInode>> {
628        let st = self.stat64(ctx).await?;
629        if !utils::is_dir(&st.attr.kind) {
630            return Err(Error::from_raw_os_error(libc::ENOTDIR));
631        }
632
633        let mut all_layer_inodes: HashMap<String, Vec<RealInode>> = HashMap::new();
634        // read out directories from each layer
635        // Scan from upper layer to lower layer.
636        for ri in self.real_inodes.lock().await.iter() {
637            if ri.whiteout {
638                // Node is deleted from some upper layer, skip it.
639                debug!("directory is whiteout");
640                break;
641            }
642
643            let stat = match &ri.stat {
644                Some(v) => v.clone(),
645                None => ri.stat64(&ctx).await?,
646            };
647
648            if !utils::is_dir(&stat.attr.kind) {
649                debug!("{} is not a directory", self.path.read().await);
650                // not directory
651                break;
652            }
653
654            // Read all entries from one layer.
655            let entries: HashMap<String, RealInode> = ri.readdir(ctx).await?;
656
657            // Merge entries from one layer to all_layer_inodes.
658            for (name, inode) in entries {
659                match all_layer_inodes.get_mut(&name) {
660                    Some(v) => {
661                        // Append additional RealInode to the end of vector.
662                        v.push(inode)
663                    }
664                    None => {
665                        all_layer_inodes.insert(name, vec![inode]);
666                    }
667                }
668            }
669
670            // if opaque, stop here
671            if ri.opaque {
672                debug!("directory {} is opaque", self.path.read().await);
673                break;
674            }
675        }
676
677        // Construct OverlayInode for each entry.
678        let mut childrens = vec![];
679        for (name, real_inodes) in all_layer_inodes {
680            // Inode numbers are not allocated yet.
681            let path = format!("{}/{}", self.path.read().await, name);
682            let new = Self::new_from_real_inodes(name.as_str(), 0, path, real_inodes).await?;
683            childrens.push(new);
684        }
685
686        Ok(childrens)
687    }
688
689    // Create a new directory in upper layer for node, node must be directory.
690    pub async fn create_upper_dir(
691        self: Arc<Self>,
692        ctx: Request,
693        mode_umask: Option<(u32, u32)>,
694    ) -> Result<()> {
695        let st = self.stat64(ctx).await?;
696        if !utils::is_dir(&st.attr.kind) {
697            return Err(Error::from_raw_os_error(libc::ENOTDIR));
698        }
699
700        // If node already has upper layer, we can just return here.
701        if self.in_upper_layer().await {
702            return Ok(());
703        }
704
705        // not in upper layer, check parent.
706        let pnode = if let Some(n) = self.parent.lock().await.upgrade() {
707            Arc::clone(&n)
708        } else {
709            return Err(Error::other("no parent?"));
710        };
711
712        if !pnode.in_upper_layer().await {
713            Box::pin(pnode.clone().create_upper_dir(ctx, None)).await?; // recursive call
714        }
715        let child: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
716        let c_name = self.name.read().await.clone();
717        let _ = pnode
718            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
719                match parent_upper_inode {
720                    Some(parent_ri) => {
721                        let ri = match mode_umask {
722                            Some((mode, umask)) => {
723                                parent_ri.mkdir(ctx, &c_name, mode, umask).await?
724                            }
725                            None => {
726                                parent_ri
727                                    .mkdir(
728                                        ctx,
729                                        &c_name,
730                                        mode_from_kind_and_perm(st.attr.kind, st.attr.perm),
731                                        0,
732                                    )
733                                    .await?
734                            }
735                        };
736                        // create directory here
737                        child.lock().await.replace(ri);
738                    }
739                    None => {
740                        error!(
741                            "BUG: parent {} has no upper inode after create_upper_dir",
742                            pnode.inode
743                        );
744                        return Err(Error::from_raw_os_error(libc::EINVAL));
745                    }
746                }
747                Ok(false)
748            })
749            .await?;
750
751        if let Some(ri) = child.lock().await.take() {
752            // Push the new real inode to the front of vector.
753            self.add_upper_inode(ri, false).await;
754        }
755
756        Ok(())
757    }
758
759    // Add new upper RealInode to OverlayInode, clear all lower RealInodes if 'clear_lowers' is true.
760    async fn add_upper_inode(self: &Arc<Self>, ri: RealInode, clear_lowers: bool) {
761        let mut inodes = self.real_inodes.lock().await;
762        // Update self according to upper attribute.
763        self.whiteout.store(ri.whiteout, Ordering::Relaxed);
764
765        // Push the new real inode to the front of vector.
766        let mut new = vec![Arc::new(ri)];
767        // Drain lower RealInodes.
768        let lowers = inodes.drain(..).collect::<Vec<Arc<RealInode>>>();
769        if !clear_lowers {
770            // If not clear lowers, append them to the end of vector.
771            new.extend(lowers);
772        }
773        inodes.extend(new);
774    }
775
776    // return the uppder layer fs.
777    pub async fn in_upper_layer(&self) -> bool {
778        let all_inodes = self.real_inodes.lock().await;
779        let first = all_inodes.first();
780        match first {
781            Some(v) => v.in_upper_layer,
782            None => false,
783        }
784    }
785
786    pub async fn upper_layer_only(&self) -> bool {
787        let real_inodes = self.real_inodes.lock().await;
788        let first = real_inodes.first();
789        match first {
790            Some(v) => {
791                if !v.in_upper_layer {
792                    false
793                } else {
794                    real_inodes.len() == 1
795                }
796            }
797            None => false,
798        }
799    }
800
801    pub async fn first_layer_inode(&self) -> (Arc<BoxedLayer>, bool, u64) {
802        let all_inodes = self.real_inodes.lock().await;
803        let first = all_inodes.first();
804        match first {
805            Some(v) => (v.layer.clone(), v.in_upper_layer, v.inode),
806            None => panic!("BUG: dangling OverlayInode"),
807        }
808    }
809
810    pub async fn child(&self, name: &str) -> Option<Arc<OverlayInode>> {
811        self.childrens.lock().await.get(name).cloned()
812    }
813
814    pub async fn remove_child(&self, name: &str) -> Option<Arc<OverlayInode>> {
815        self.childrens.lock().await.remove(name)
816    }
817
818    pub async fn insert_child(&self, name: &str, node: Arc<OverlayInode>) {
819        self.childrens.lock().await.insert(name.to_string(), node);
820    }
821
822    /// Handles operations on the upper layer inode of an `OverlayInode` in a thread-safe manner.
823    ///
824    /// This function locks the `real_inodes` field of the `OverlayInode` and retrieves the first
825    /// real inode (if any). If the first inode exists and belongs to the upper layer (`in_upper_layer` is true),
826    /// the provided callback `f` is invoked with the inode wrapped in `Some`. Otherwise, `f` is invoked with `None`.
827    ///
828    /// # Arguments
829    /// * `f`: A closure that takes an `Option<RealInode>` and returns a future. The future resolves to a `Result<bool>`.
830    ///
831    /// # Returns
832    /// * `Ok(bool)`: The result of invoking the callback `f`.
833    /// * `Err(Erron)`: An error is returned if:
834    ///   - There are no backend inodes (`real_inodes` is empty), indicating a dangling `OverlayInode`.
835    ///   - The callback `f` itself returns an error.
836    ///
837    /// # Behavior
838    /// 1. Locks the `real_inodes` field to ensure thread safety.
839    /// 2. Checks if the first inode exists:
840    ///    - If it exists and is in the upper layer, invokes `f(Some(inode))`.
841    ///    - If it exists but is not in the upper layer, invokes `f(None)`.
842    /// 3. If no inodes exist, returns an error indicating a dangling `OverlayInode`.
843    ///
844    /// # Example Use Case
845    /// This function is typically used to perform operations on the upper layer inode of an `OverlayInode`,
846    /// such as creating, modifying, or deleting files/directories in the overlay filesystem's upper layer.
847    pub async fn handle_upper_inode_locked<F, Fut>(&self, f: F) -> Result<bool>
848    where
849        // Can pass a &RealInode (or None) to f for any lifetime 'a
850        F: FnOnce(Option<Arc<RealInode>>) -> Fut,
851        // f returns a Future that must live at least as long as 'a
852        Fut: Future<Output = Result<bool>>,
853    {
854        let all_inodes = self.real_inodes.lock().await;
855        let first = all_inodes.first();
856        match first {
857            Some(v) => {
858                if v.in_upper_layer {
859                    f(Some(v.clone())).await
860                } else {
861                    f(None).await
862                }
863            }
864            None => Err(Error::other(format!(
865                "BUG: dangling OverlayInode {} without any backend inode",
866                self.inode
867            ))),
868        }
869    }
870}
871#[allow(unused)]
872fn entry_type_from_mode(mode: libc::mode_t) -> u8 {
873    match mode & libc::S_IFMT {
874        libc::S_IFBLK => libc::DT_BLK,
875        libc::S_IFCHR => libc::DT_CHR,
876        libc::S_IFDIR => libc::DT_DIR,
877        libc::S_IFIFO => libc::DT_FIFO,
878        libc::S_IFLNK => libc::DT_LNK,
879        libc::S_IFREG => libc::DT_REG,
880        libc::S_IFSOCK => libc::DT_SOCK,
881        _ => libc::DT_UNKNOWN,
882    }
883}
884impl OverlayFs {
885    pub fn new(
886        upper: Option<Arc<BoxedLayer>>,
887        lowers: Vec<Arc<BoxedLayer>>,
888        params: Config,
889        root_inode: u64,
890    ) -> Result<Self> {
891        // load root inode
892        Ok(OverlayFs {
893            config: params,
894            lower_layers: lowers,
895            upper_layer: upper,
896            inodes: RwLock::new(InodeStore::new()),
897            handles: Mutex::new(HashMap::new()),
898            next_handle: AtomicU64::new(1),
899            writeback: AtomicBool::new(false),
900            no_open: AtomicBool::new(false),
901            no_opendir: AtomicBool::new(false),
902            killpriv_v2: AtomicBool::new(false),
903            perfile_dax: AtomicBool::new(false),
904            root_inodes: root_inode,
905        })
906    }
907
908    pub fn root_inode(&self) -> Inode {
909        self.root_inodes
910    }
911
912    async fn alloc_inode(&self, path: &str) -> Result<u64> {
913        self.inodes.write().await.alloc_inode(path)
914    }
915
916    /// Add a file layer and stack and merge the previous file layers.
917    pub async fn push_layer(&mut self, layer: Arc<BoxedLayer>) -> Result<()> {
918        let upper = self.upper_layer.take();
919        if let Some(upper) = upper {
920            self.lower_layers.push(upper);
921        }
922        self.upper_layer = Some(layer);
923        // TODO: merge previous file layers. need optimization
924        self.import().await?;
925        Ok(())
926    }
927
928    pub async fn import(&self) -> Result<()> {
929        let mut root = OverlayInode::new();
930        root.inode = self.root_inode();
931        root.path = String::from("").into();
932        root.name = String::from("").into();
933        root.lookups = AtomicU64::new(2);
934        root.real_inodes = Mutex::new(vec![]);
935        let ctx = Request::default();
936
937        // Update upper inode
938        if let Some(layer) = self.upper_layer.as_ref() {
939            let ino = layer.root_inode();
940            let real = RealInode::new(
941                layer.clone(),
942                true,
943                ino,
944                false,
945                layer.is_opaque(ctx, ino).await?,
946            )
947            .await;
948            root.real_inodes.lock().await.push(real.into());
949        }
950
951        // Update lower inodes.
952        for layer in self.lower_layers.iter() {
953            let ino = layer.root_inode();
954            let real: RealInode = RealInode::new(
955                layer.clone(),
956                false,
957                ino,
958                false,
959                layer.is_opaque(ctx, ino).await?,
960            )
961            .await;
962            root.real_inodes.lock().await.push(real.into());
963        }
964        let root_node = Arc::new(root);
965
966        // insert root inode into hash
967        self.insert_inode(self.root_inode(), Arc::clone(&root_node))
968            .await;
969
970        info!("loading root directory\n");
971        self.load_directory(ctx, &root_node).await?;
972
973        Ok(())
974    }
975
976    async fn root_node(&self) -> Arc<OverlayInode> {
977        // Root node must exist.
978        self.get_active_inode(self.root_inode()).await.unwrap()
979    }
980
981    async fn insert_inode(&self, inode: u64, node: Arc<OverlayInode>) {
982        self.inodes.write().await.insert_inode(inode, node).await;
983    }
984
985    async fn get_active_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
986        self.inodes.read().await.get_inode(inode)
987    }
988
989    // Get inode which is active or deleted.
990    async fn get_all_inode(&self, inode: u64) -> Option<Arc<OverlayInode>> {
991        let inode_store = self.inodes.read().await;
992        match inode_store.get_inode(inode) {
993            Some(n) => Some(n),
994            None => inode_store.get_deleted_inode(inode),
995        }
996    }
997
998    // Return the inode only if it's permanently deleted from both self.inodes and self.deleted_inodes.
999    async fn remove_inode(
1000        &self,
1001        inode: u64,
1002        path_removed: Option<String>,
1003    ) -> Option<Arc<OverlayInode>> {
1004        self.inodes
1005            .write()
1006            .await
1007            .remove_inode(inode, path_removed)
1008            .await
1009    }
1010
1011    // Lookup child OverlayInode with <name> under <parent> directory.
1012    // If name is empty, return parent itself.
1013    // Parent dir will be loaded, but returned OverlayInode won't.
1014    async fn lookup_node(
1015        &self,
1016        ctx: Request,
1017        parent: Inode,
1018        name: &str,
1019    ) -> Result<Arc<OverlayInode>> {
1020        if name.contains(SLASH_ASCII) {
1021            return Err(Error::from_raw_os_error(libc::EINVAL));
1022        }
1023
1024        // Parent inode is expected to be loaded before this function is called.
1025        let pnode = match self.get_active_inode(parent).await {
1026            Some(v) => v,
1027            None => return Err(Error::from_raw_os_error(libc::ENOENT)),
1028        };
1029
1030        // Parent is whiteout-ed, return ENOENT.
1031        if pnode.whiteout.load(Ordering::Relaxed) {
1032            return Err(Error::from_raw_os_error(libc::ENOENT));
1033        }
1034
1035        let st = pnode.stat64(ctx).await?;
1036        if utils::is_dir(&st.attr.kind) && !pnode.loaded.load(Ordering::Relaxed) {
1037            // Parent is expected to be directory, load it first.
1038            self.load_directory(ctx, &pnode).await?;
1039        }
1040
1041        // Current file or dir.
1042        if name.eq(".")  
1043            // Root directory has no parent.
1044            || (parent == self.root_inode() && name.eq("..")) 
1045            // Special convention: empty name indicates current dir.
1046            || name.is_empty()
1047        {
1048            return Ok(Arc::clone(&pnode));
1049        }
1050
1051        match pnode.child(name).await {
1052            // Child is found.
1053            Some(v) => Ok(v),
1054            None => Err(Error::from_raw_os_error(libc::ENOENT)),
1055        }
1056    }
1057
1058    async fn lookup_node_ignore_enoent(
1059        &self,
1060        ctx: Request,
1061        parent: u64,
1062        name: &str,
1063    ) -> Result<Option<Arc<OverlayInode>>> {
1064        match self.lookup_node(ctx, parent, name).await {
1065            Ok(n) => Ok(Some(Arc::clone(&n))),
1066            Err(e) => {
1067                if let Some(raw_error) = e.raw_os_error() {
1068                    if raw_error == libc::ENOENT {
1069                        return Ok(None);
1070                    }
1071                }
1072                Err(e)
1073            }
1074        }
1075    }
1076
1077    // Load entries of the directory from all layers, if node is not directory, return directly.
1078    async fn load_directory(&self, ctx: Request, node: &Arc<OverlayInode>) -> Result<()> {
1079        if node.loaded.load(Ordering::Acquire) {
1080            return Ok(());
1081        }
1082
1083        // We got all childrens without inode.
1084        let childrens = node.scan_childrens(ctx).await?;
1085
1086        // =============== Start Lock Area ===================
1087        // Lock OverlayFs inodes.
1088        let mut inode_store = self.inodes.write().await;
1089        // Lock the OverlayInode and its childrens.
1090        let mut node_children = node.childrens.lock().await;
1091
1092        // Check again in case another 'load_directory' function call gets locks and want to do duplicated work.
1093        if node.loaded.load(Ordering::Relaxed) {
1094            return Ok(());
1095        }
1096
1097        // Now we have two locks' protection, Fs inodes lock and OverlayInode's childrens lock.
1098        for mut child in childrens.into_iter() {
1099            // Allocate inode for each child.
1100            let ino = inode_store.alloc_inode(&child.path.read().await)?;
1101
1102            let name = child.name.read().await.clone();
1103            child.inode = ino;
1104            // Create bi-directional link between parent and child.
1105            child.parent = Mutex::new(Arc::downgrade(node));
1106
1107            let arc_child = Arc::new(child);
1108            node_children.insert(name, arc_child.clone());
1109            // Record overlay inode in whole OverlayFs.
1110            inode_store.insert_inode(ino, arc_child).await;
1111        }
1112
1113        node.loaded.store(true, Ordering::Relaxed);
1114
1115        Ok(())
1116    }
1117
1118    async fn forget_one(&self, inode: Inode, count: u64) {
1119        if inode == self.root_inode() || inode == 0 {
1120            return;
1121        }
1122
1123        let v = match self.get_all_inode(inode).await {
1124            Some(n) => n,
1125            None => {
1126                trace!("forget unknown inode: {}", inode);
1127                return;
1128            }
1129        };
1130
1131        // Use fetch_update to atomically update lookups in a loop until it succeeds
1132        v.lookups
1133            .fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
1134                // If count is larger than current lookups, return 0
1135                // Otherwise subtract count from current lookups
1136                if current < count {
1137                    Some(0)
1138                } else {
1139                    Some(current - count)
1140                }
1141            })
1142            .expect("fetch_update failed");
1143
1144        let lookups = v.lookups.load(Ordering::Relaxed);
1145        trace!(
1146            "forget inode: {}, name {}, lookups: {}",
1147            inode,
1148            v.name.read().await,
1149            lookups
1150        );
1151        if lookups == 0 {
1152            debug!(
1153                "inode is forgotten: {}, name {}",
1154                inode,
1155                v.name.read().await
1156            );
1157            let _ = self.remove_inode(inode, None).await;
1158            let parent = v.parent.lock().await;
1159
1160            if let Some(p) = parent.upgrade() {
1161                // remove it from hashmap
1162                p.remove_child(&v.name.read().await).await;
1163            }
1164        }
1165    }
1166
1167    async fn do_lookup(&self, ctx: Request, parent: Inode, name: &str) -> Result<ReplyEntry> {
1168        let node = self.lookup_node(ctx, parent, name).await?;
1169
1170        if node.whiteout.load(Ordering::Relaxed) {
1171            eprintln!("Error: node.whiteout.load() called.");
1172            return Err(Error::from_raw_os_error(libc::ENOENT));
1173        }
1174
1175        let mut st = node.stat64(ctx).await?;
1176        st.attr.ino = node.inode;
1177        if utils::is_dir(&st.attr.kind) && !node.loaded.load(Ordering::Relaxed) {
1178            self.load_directory(ctx, &node).await?;
1179        }
1180
1181        // FIXME: can forget happen between found and increase reference counter?
1182        let tmp = node.lookups.fetch_add(1, Ordering::Relaxed);
1183        trace!("lookup count: {}", tmp + 1);
1184        Ok(ReplyEntry {
1185            ttl: st.ttl,
1186            attr: st.attr,
1187            generation: 0,
1188        })
1189    }
1190
1191    async fn do_statvfs(&self, ctx: Request, inode: Inode) -> Result<ReplyStatFs> {
1192        match self.get_active_inode(inode).await {
1193            Some(ovi) => {
1194                let all_inodes = ovi.real_inodes.lock().await;
1195                let real_inode = all_inodes
1196                    .first()
1197                    .ok_or(Error::other("backend inode not found"))?;
1198                Ok(real_inode.layer.statfs(ctx, real_inode.inode).await?)
1199            }
1200            None => Err(Error::from_raw_os_error(libc::ENOENT)),
1201        }
1202    }
1203
1204    #[allow(clippy::too_many_arguments)]
1205    async fn do_readdir<'a>(
1206        &self,
1207        ctx: Request,
1208        inode: Inode,
1209        handle: u64,
1210        offset: u64,
1211    ) -> Result<<OverlayFs as rfuse3::raw::Filesystem>::DirEntryStream<'a>> {
1212        // lookup the directory
1213        let ovl_inode = match self.handles.lock().await.get(&handle) {
1214            Some(dir) => dir.node.clone(),
1215            None => {
1216                // Try to get data with inode.
1217                let node = self.lookup_node(ctx, inode, ".").await?;
1218
1219                let st = node.stat64(ctx).await?;
1220                if !utils::is_dir(&st.attr.kind) {
1221                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1222                }
1223
1224                node.clone()
1225            }
1226        };
1227        self.load_directory(ctx, &ovl_inode).await?;
1228        let mut childrens = Vec::new();
1229        //add myself as "."
1230        childrens.push((".".to_string(), ovl_inode.clone()));
1231
1232        //add parent
1233        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1234            Some(p) => p.clone(),
1235            None => self.root_node().await,
1236        };
1237        childrens.push(("..".to_string(), parent_node));
1238
1239        for (_, child) in ovl_inode.childrens.lock().await.iter() {
1240            // skip whiteout node
1241            if child.whiteout.load(Ordering::Relaxed) {
1242                continue;
1243            }
1244            childrens.push((child.name.read().await.clone(), child.clone()));
1245        }
1246
1247        if offset >= childrens.len() as u64 {
1248            return Ok(iter(vec![].into_iter()));
1249        }
1250        let mut d: Vec<std::result::Result<DirectoryEntry, Errno>> = Vec::new();
1251
1252        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1253            // make struct DireEntry and Entry
1254            let st = child.stat64(ctx).await?;
1255            let dir_entry = DirectoryEntry {
1256                inode: child.inode,
1257                kind: st.attr.kind,
1258                name: name.into(),
1259                offset: (index + 1) as i64,
1260            };
1261            d.push(Ok(dir_entry));
1262        }
1263
1264        Ok(iter(d.into_iter()))
1265    }
1266
1267    #[allow(clippy::too_many_arguments)]
1268    async fn do_readdirplus<'a>(
1269        &self,
1270        ctx: Request,
1271        inode: Inode,
1272        handle: u64,
1273        offset: u64,
1274    ) -> Result<<OverlayFs as rfuse3::raw::Filesystem>::DirEntryPlusStream<'a>> {
1275        // lookup the directory
1276        let ovl_inode = match self.handles.lock().await.get(&handle) {
1277            Some(dir) => dir.node.clone(),
1278            None => {
1279                // Try to get data with inode.
1280                let node = self.lookup_node(ctx, inode, ".").await?;
1281
1282                let st = node.stat64(ctx).await?;
1283                if !utils::is_dir(&st.attr.kind) {
1284                    return Err(Error::from_raw_os_error(libc::ENOTDIR));
1285                }
1286
1287                node.clone()
1288            }
1289        };
1290        self.load_directory(ctx, &ovl_inode).await?;
1291
1292        let mut childrens = Vec::new();
1293        //add myself as "."
1294        childrens.push((".".to_string(), ovl_inode.clone()));
1295
1296        //add parent
1297        let parent_node = match ovl_inode.parent.lock().await.upgrade() {
1298            Some(p) => p.clone(),
1299            None => self.root_node().await,
1300        };
1301        childrens.push(("..".to_string(), parent_node));
1302
1303        for (_, child) in ovl_inode.childrens.lock().await.iter() {
1304            // skip whiteout node
1305            if child.whiteout.load(Ordering::Relaxed) {
1306                continue;
1307            }
1308            childrens.push((child.name.read().await.clone(), child.clone()));
1309        }
1310
1311        if offset >= childrens.len() as u64 {
1312            return Ok(iter(vec![].into_iter()));
1313        }
1314        let mut d: Vec<std::result::Result<DirectoryEntryPlus, Errno>> = Vec::new();
1315
1316        for (index, (name, child)) in (0_u64..).zip(childrens.into_iter()) {
1317            if index >= offset {
1318                // make struct DireEntry and Entry
1319                let mut st = child.stat64(ctx).await?;
1320                child.lookups.fetch_add(1, Ordering::Relaxed);
1321                st.attr.ino = child.inode;
1322                let dir_entry = DirectoryEntryPlus {
1323                    inode: child.inode,
1324                    generation: 0,
1325                    kind: st.attr.kind,
1326                    name: name.into(),
1327                    offset: (index + 1) as i64,
1328                    attr: st.attr,
1329                    entry_ttl: st.ttl,
1330                    attr_ttl: st.ttl,
1331                };
1332                d.push(Ok(dir_entry));
1333            }
1334        }
1335
1336        Ok(iter(d.into_iter()))
1337    }
1338    async fn do_mkdir(
1339        &self,
1340        ctx: Request,
1341        parent_node: Arc<OverlayInode>,
1342        name: &str,
1343        mode: u32,
1344        umask: u32,
1345    ) -> Result<()> {
1346        if self.upper_layer.is_none() {
1347            return Err(Error::from_raw_os_error(libc::EROFS));
1348        }
1349
1350        // Parent node was deleted.
1351        if parent_node.whiteout.load(Ordering::Relaxed) {
1352            return Err(Error::from_raw_os_error(libc::ENOENT));
1353        }
1354
1355        let mut delete_whiteout = false;
1356        let mut set_opaque = false;
1357        if let Some(n) = self
1358            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1359            .await?
1360        {
1361            // Node with same name exists, let's check if it's whiteout.
1362            if !n.whiteout.load(Ordering::Relaxed) {
1363                return Err(Error::from_raw_os_error(libc::EEXIST));
1364            }
1365
1366            if n.in_upper_layer().await {
1367                delete_whiteout = true;
1368            }
1369
1370            // Set opaque if child dir has lower layers.
1371            if !n.upper_layer_only().await {
1372                set_opaque = true;
1373            }
1374        }
1375
1376        // Copy parent node up if necessary.
1377        let pnode = self.copy_node_up(ctx, parent_node).await?;
1378
1379        let path = format!("{}/{}", pnode.path.read().await, name);
1380        let path_ref = &path;
1381        let new_node = Arc::new(Mutex::new(None));
1382        pnode
1383            .handle_upper_inode_locked(&mut |parent_real_inode: Option<Arc<RealInode>>| async {
1384                let parent_real_inode = match parent_real_inode {
1385                    Some(inode) => inode,
1386                    None => {
1387                        error!("BUG: parent doesn't have upper inode after copied up");
1388                        return Err(Error::from_raw_os_error(libc::EINVAL));
1389                    }
1390                };
1391                let osstr = OsStr::new(name);
1392                if delete_whiteout {
1393                    let _ = parent_real_inode
1394                        .layer
1395                        .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1396                        .await;
1397                }
1398
1399                // Allocate inode number.
1400                let ino = self.alloc_inode(path_ref).await?;
1401                let child_dir = parent_real_inode.mkdir(ctx, name, mode, umask).await?;
1402                // Set opaque if child dir has lower layers.
1403                if set_opaque {
1404                    parent_real_inode
1405                        .layer
1406                        .set_opaque(ctx, child_dir.inode)
1407                        .await?;
1408                }
1409                let ovi =
1410                    OverlayInode::new_from_real_inode(name, ino, path_ref.clone(), child_dir).await;
1411                new_node.lock().await.replace(ovi);
1412                Ok(false)
1413            })
1414            .await?;
1415
1416        // new_node is always 'Some'
1417        let nn = new_node.lock().await.take();
1418        let arc_node = Arc::new(nn.unwrap());
1419        self.insert_inode(arc_node.inode, arc_node.clone()).await;
1420        pnode.insert_child(name, arc_node).await;
1421        Ok(())
1422    }
1423
1424    async fn do_mknod(
1425        &self,
1426        ctx: Request,
1427        parent_node: &Arc<OverlayInode>,
1428        name: &str,
1429        mode: u32,
1430        rdev: u32,
1431        umask: u32,
1432    ) -> Result<()> {
1433        if self.upper_layer.is_none() {
1434            return Err(Error::from_raw_os_error(libc::EROFS));
1435        }
1436
1437        // Parent node was deleted.
1438        if parent_node.whiteout.load(Ordering::Relaxed) {
1439            return Err(Error::from_raw_os_error(libc::ENOENT));
1440        }
1441
1442        match self
1443            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1444            .await?
1445        {
1446            Some(n) => {
1447                // Node with same name exists, let's check if it's whiteout.
1448                if !n.whiteout.load(Ordering::Relaxed) {
1449                    return Err(Error::from_raw_os_error(libc::EEXIST));
1450                }
1451
1452                // Copy parent node up if necessary.
1453                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1454                pnode
1455                    .handle_upper_inode_locked(
1456                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1457                            let parent_real_inode = match parent_real_inode {
1458                                Some(inode) => inode,
1459                                None => {
1460                                    error!("BUG: parent doesn't have upper inode after copied up");
1461                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1462                                }
1463                            };
1464                            let osstr = OsStr::new(name);
1465                            if n.in_upper_layer().await {
1466                                let _ = parent_real_inode
1467                                    .layer
1468                                    .delete_whiteout(ctx, parent_real_inode.inode, osstr)
1469                                    .await;
1470                            }
1471
1472                            let child_ri = parent_real_inode
1473                                .mknod(ctx, name, mode, rdev, umask)
1474                                .await?;
1475
1476                            // Replace existing real inodes with new one.
1477                            n.add_upper_inode(child_ri, true).await;
1478                            Ok(false)
1479                        },
1480                    )
1481                    .await?;
1482            }
1483            None => {
1484                // Copy parent node up if necessary.
1485                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1486                let new_node = Arc::new(Mutex::new(None));
1487                let path = format!("{}/{}", pnode.path.read().await, name);
1488                pnode
1489                    .handle_upper_inode_locked(
1490                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1491                            let parent_real_inode = match parent_real_inode {
1492                                Some(inode) => inode,
1493                                None => {
1494                                    error!("BUG: parent doesn't have upper inode after copied up");
1495                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1496                                }
1497                            };
1498
1499                            // Allocate inode number.
1500                            let ino = self.alloc_inode(&path).await?;
1501                            let child_ri = parent_real_inode
1502                                .mknod(ctx, name, mode, rdev, umask)
1503                                .await?;
1504                            let ovi = OverlayInode::new_from_real_inode(
1505                                name,
1506                                ino,
1507                                path.clone(),
1508                                child_ri,
1509                            )
1510                            .await;
1511
1512                            new_node.lock().await.replace(ovi);
1513                            Ok(false)
1514                        },
1515                    )
1516                    .await?;
1517
1518                let nn = new_node.lock().await.take();
1519                let arc_node = Arc::new(nn.unwrap());
1520                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1521                pnode.insert_child(name, arc_node).await;
1522            }
1523        }
1524
1525        Ok(())
1526    }
1527
1528    async fn do_create(
1529        &self,
1530        ctx: Request,
1531        parent_node: &Arc<OverlayInode>,
1532        name: &OsStr,
1533        mode: u32,
1534        flags: u32,
1535    ) -> Result<Option<u64>> {
1536        let name_str = name.to_str().unwrap();
1537        let upper = self
1538            .upper_layer
1539            .as_ref()
1540            .cloned()
1541            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
1542
1543        // Parent node was deleted.
1544        if parent_node.whiteout.load(Ordering::Relaxed) {
1545            return Err(Error::from_raw_os_error(libc::ENOENT));
1546        }
1547
1548        let handle: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1549        let real_ino: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
1550        let new_ovi = match self
1551            .lookup_node_ignore_enoent(ctx, parent_node.inode, name_str)
1552            .await?
1553        {
1554            Some(n) => {
1555                // Node with same name exists, let's check if it's whiteout.
1556                if !n.whiteout.load(Ordering::Relaxed) {
1557                    return Err(Error::from_raw_os_error(libc::EEXIST));
1558                }
1559
1560                // Copy parent node up if necessary.
1561                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1562                pnode
1563                    .handle_upper_inode_locked(
1564                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1565                            let parent_real_inode = match parent_real_inode {
1566                                Some(inode) => inode,
1567                                None => {
1568                                    error!("BUG: parent doesn't have upper inode after copied up");
1569                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1570                                }
1571                            };
1572
1573                            if n.in_upper_layer().await {
1574                                let _ = parent_real_inode
1575                                    .layer
1576                                    .delete_whiteout(ctx, parent_real_inode.inode, name)
1577                                    .await;
1578                            }
1579
1580                            let (child_ri, hd) =
1581                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1582                            real_ino.lock().await.replace(child_ri.inode);
1583                            handle.lock().await.replace(hd.unwrap());
1584
1585                            // Replace existing real inodes with new one.
1586                            n.add_upper_inode(child_ri, true).await;
1587                            Ok(false)
1588                        },
1589                    )
1590                    .await?;
1591                n.clone()
1592            }
1593            None => {
1594                // Copy parent node up if necessary.
1595                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1596                let new_node = Arc::new(Mutex::new(None));
1597                let path = format!("{}/{}", pnode.path.read().await, name_str);
1598                pnode
1599                    .handle_upper_inode_locked(
1600                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1601                            let parent_real_inode = match parent_real_inode {
1602                                Some(inode) => inode,
1603                                None => {
1604                                    error!("BUG: parent doesn't have upper inode after copied up");
1605                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1606                                }
1607                            };
1608
1609                            let (child_ri, hd) =
1610                                parent_real_inode.create(ctx, name_str, mode, flags).await?;
1611                            real_ino.lock().await.replace(child_ri.inode);
1612                            handle.lock().await.replace(hd.unwrap());
1613                            // Allocate inode number.
1614                            let ino = self.alloc_inode(&path).await?;
1615                            let ovi = OverlayInode::new_from_real_inode(
1616                                name_str,
1617                                ino,
1618                                path.clone(),
1619                                child_ri,
1620                            )
1621                            .await;
1622
1623                            new_node.lock().await.replace(ovi);
1624                            Ok(false)
1625                        },
1626                    )
1627                    .await?;
1628
1629                // new_node is always 'Some'
1630                let nn = new_node.lock().await.take();
1631                let arc_node = Arc::new(nn.unwrap());
1632                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1633                pnode.insert_child(name_str, arc_node.clone()).await;
1634                arc_node
1635            }
1636        };
1637
1638        let final_handle = match *handle.lock().await {
1639            Some(hd) => {
1640                if self.no_open.load(Ordering::Relaxed) {
1641                    None
1642                } else {
1643                    let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1644                    let handle_data = HandleData {
1645                        node: new_ovi,
1646                        real_handle: Some(RealHandle {
1647                            layer: upper.clone(),
1648                            in_upper_layer: true,
1649                            inode: real_ino.lock().await.unwrap(),
1650                            handle: AtomicU64::new(hd),
1651                        }),
1652                    };
1653                    self.handles
1654                        .lock()
1655                        .await
1656                        .insert(handle, Arc::new(handle_data));
1657                    Some(handle)
1658                }
1659            }
1660            None => None,
1661        };
1662        Ok(final_handle)
1663    }
1664
1665    async fn do_rename(
1666        &self,
1667        req: Request,
1668        parent: Inode,
1669        name: &OsStr,
1670        new_parent: Inode,
1671        new_name: &OsStr,
1672    ) -> Result<()> {
1673        let name_str = name.to_str().unwrap();
1674        let new_name_str = new_name.to_str().unwrap();
1675
1676        // Ensure parent exists
1677        let parent_node = match self.get_all_inode(parent).await {
1678            Some(node) => node,
1679            None => return Err(Error::from_raw_os_error(libc::ENOENT)),
1680        };
1681        // Ensure parent is a directory
1682
1683        if !parent_node.is_dir(req).await? {
1684            return Err(Error::from_raw_os_error(libc::EPERM));
1685        }
1686
1687        // Ensure new_parent is a directory
1688        let new_parent_node = match self.get_all_inode(new_parent).await {
1689            Some(node) => node,
1690            None => return Err(Error::from_raw_os_error(libc::ENOENT)),
1691        };
1692        if !new_parent_node.is_dir(req).await? {
1693            return Err(Error::from_raw_os_error(libc::EPERM));
1694        }
1695
1696        // Ensure new_name does not exist in new_parent
1697        // Check if new_name exists in new_parent
1698        if let Some(existing_node) = self
1699            .lookup_node_ignore_enoent(req, new_parent, new_name_str)
1700            .await?
1701        {
1702            // If the node exists and is not whiteout, return EEXIST
1703            if !existing_node.whiteout.load(Ordering::Relaxed) {
1704                return Err(Error::from_raw_os_error(libc::EEXIST));
1705            }
1706            // If it's a whiteout, allow rename to proceed (overwrite whiteout)
1707        }
1708
1709        let src_node = parent_node
1710            .child(name_str)
1711            .await
1712            .ok_or_else(|| Error::from_raw_os_error(libc::ENOENT))?;
1713        let need_whiteout = src_node.upper_layer_only().await;
1714        self.copy_node_up(req, parent_node.clone()).await?;
1715        if src_node.is_dir(req).await? {
1716            // Directory can't be renamed.
1717            self.copy_directory_up(req, src_node.clone()).await?;
1718        } else {
1719            self.copy_node_up(req, src_node.clone()).await?;
1720        };
1721
1722        // copy new_parent up if necessary.
1723        self.copy_node_up(req, new_parent_node.clone()).await?;
1724        let (src_lay, src_t, src_true_inode) = parent_node.first_layer_inode().await;
1725        let (dst_lay, dst_t, dst_true_inode) = new_parent_node.first_layer_inode().await;
1726
1727        // Assert that both layers are the same.
1728        assert!(Arc::ptr_eq(&src_lay, &dst_lay));
1729        assert!(src_t);
1730        assert!(dst_t);
1731
1732        // rename in PassthroughFS.
1733        match dst_lay
1734            .rename(req, src_true_inode, name, dst_true_inode, new_name)
1735            .await
1736        {
1737            Ok(_) => {
1738                let new_path = format!("{}/{}", new_parent_node.path.read().await, new_name_str);
1739
1740                *src_node.path.write().await = new_path;
1741                *src_node.name.write().await = new_name_str.to_string();
1742
1743                new_parent_node.insert_child(new_name_str, src_node).await;
1744            }
1745            Err(e) => return Err(e.into()),
1746        }
1747
1748        if !need_whiteout {
1749            let _ = src_lay.create_whiteout(req, src_true_inode, name).await?;
1750        }
1751
1752        // Insert into new parent, update node name and path
1753        let _ = parent_node
1754            .remove_child(name_str)
1755            .await
1756            .ok_or_else(|| Error::from_raw_os_error(libc::ENOENT))?;
1757
1758        Ok(())
1759    }
1760
1761    async fn do_link(
1762        &self,
1763        ctx: Request,
1764        src_node: &Arc<OverlayInode>,
1765        new_parent: &Arc<OverlayInode>,
1766        name: &str,
1767    ) -> Result<()> {
1768        let name_os = OsStr::new(name);
1769        if self.upper_layer.is_none() {
1770            return Err(Error::from_raw_os_error(libc::EROFS));
1771        }
1772
1773        // Node is whiteout.
1774        if src_node.whiteout.load(Ordering::Relaxed) || new_parent.whiteout.load(Ordering::Relaxed)
1775        {
1776            return Err(Error::from_raw_os_error(libc::ENOENT));
1777        }
1778
1779        let st = src_node.stat64(ctx).await?;
1780        if utils::is_dir(&st.attr.kind) {
1781            // Directory can't be hardlinked.
1782            return Err(Error::from_raw_os_error(libc::EPERM));
1783        }
1784
1785        let src_node = self.copy_node_up(ctx, Arc::clone(src_node)).await?;
1786        let new_parent = self.copy_node_up(ctx, Arc::clone(new_parent)).await?;
1787        let src_ino = src_node.first_layer_inode().await.2;
1788
1789        match self
1790            .lookup_node_ignore_enoent(ctx, new_parent.inode, name)
1791            .await?
1792        {
1793            Some(n) => {
1794                // Node with same name exists, let's check if it's whiteout.
1795                if !n.whiteout.load(Ordering::Relaxed) {
1796                    return Err(Error::from_raw_os_error(libc::EEXIST));
1797                }
1798
1799                // Node is definitely a whiteout now.
1800                new_parent
1801                    .handle_upper_inode_locked(
1802                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1803                            let parent_real_inode = match parent_real_inode {
1804                                Some(inode) => inode,
1805                                None => {
1806                                    error!("BUG: parent doesn't have upper inode after copied up");
1807                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1808                                }
1809                            };
1810
1811                            // Whiteout file exists in upper level, let's delete it.
1812                            if n.in_upper_layer().await {
1813                                let _ = parent_real_inode
1814                                    .layer
1815                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
1816                                    .await;
1817                            }
1818
1819                            let child_ri = parent_real_inode.link(ctx, src_ino, name).await?;
1820
1821                            // Replace existing real inodes with new one.
1822                            n.add_upper_inode(child_ri, true).await;
1823                            Ok(false)
1824                        },
1825                    )
1826                    .await?;
1827            }
1828            None => {
1829                // Copy parent node up if necessary.
1830                let new_node: Arc<Mutex<Option<OverlayInode>>> = Arc::new(Mutex::new(None));
1831                new_parent
1832                    .handle_upper_inode_locked(
1833                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1834                            let parent_real_inode = match parent_real_inode {
1835                                Some(inode) => inode,
1836                                None => {
1837                                    error!("BUG: parent doesn't have upper inode after copied up");
1838                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1839                                }
1840                            };
1841
1842                            // Allocate inode number.
1843                            let path = format!("{}/{}", new_parent.path.read().await, name);
1844                            let ino = self.alloc_inode(&path).await?;
1845                            let child_ri = parent_real_inode.link(ctx, src_ino, name).await?;
1846                            let ovi =
1847                                OverlayInode::new_from_real_inode(name, ino, path, child_ri).await;
1848
1849                            new_node.lock().await.replace(ovi);
1850                            Ok(false)
1851                        },
1852                    )
1853                    .await?;
1854
1855                // new_node is always 'Some'
1856                let arc_node = Arc::new(new_node.lock().await.take().unwrap());
1857                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1858                new_parent.insert_child(name, arc_node).await;
1859            }
1860        }
1861
1862        Ok(())
1863    }
1864
1865    async fn do_symlink(
1866        &self,
1867        ctx: Request,
1868        linkname: &str,
1869        parent_node: &Arc<OverlayInode>,
1870        name: &str,
1871    ) -> Result<()> {
1872        let name_os = OsStr::new(name);
1873        if self.upper_layer.is_none() {
1874            return Err(Error::from_raw_os_error(libc::EROFS));
1875        }
1876
1877        // parent was deleted.
1878        if parent_node.whiteout.load(Ordering::Relaxed) {
1879            return Err(Error::from_raw_os_error(libc::ENOENT));
1880        }
1881
1882        match self
1883            .lookup_node_ignore_enoent(ctx, parent_node.inode, name)
1884            .await?
1885        {
1886            Some(n) => {
1887                // Node with same name exists, let's check if it's whiteout.
1888                if !n.whiteout.load(Ordering::Relaxed) {
1889                    return Err(Error::from_raw_os_error(libc::EEXIST));
1890                }
1891
1892                // Copy parent node up if necessary.
1893                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1894                pnode
1895                    .handle_upper_inode_locked(
1896                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1897                            let parent_real_inode = match parent_real_inode {
1898                                Some(inode) => inode,
1899                                None => {
1900                                    error!("BUG: parent doesn't have upper inode after copied up");
1901                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1902                                }
1903                            };
1904
1905                            if n.in_upper_layer().await {
1906                                let _ = parent_real_inode
1907                                    .layer
1908                                    .delete_whiteout(ctx, parent_real_inode.inode, name_os)
1909                                    .await;
1910                            }
1911
1912                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
1913
1914                            // Replace existing real inodes with new one.
1915                            n.add_upper_inode(child_ri, true).await;
1916                            Ok(false)
1917                        },
1918                    )
1919                    .await?;
1920            }
1921            None => {
1922                // Copy parent node up if necessary.
1923                let pnode = self.copy_node_up(ctx, Arc::clone(parent_node)).await?;
1924                let new_node: Arc<Mutex<Option<OverlayInode>>> = Arc::new(Mutex::new(None));
1925                let path = format!("{}/{}", pnode.path.read().await, name);
1926                pnode
1927                    .handle_upper_inode_locked(
1928                        &mut |parent_real_inode: Option<Arc<RealInode>>| async {
1929                            let parent_real_inode = match parent_real_inode {
1930                                Some(inode) => inode,
1931                                None => {
1932                                    error!("BUG: parent doesn't have upper inode after copied up");
1933                                    return Err(Error::from_raw_os_error(libc::EINVAL));
1934                                }
1935                            };
1936
1937                            // Allocate inode number.
1938                            let ino = self.alloc_inode(&path).await?;
1939                            let child_ri = parent_real_inode.symlink(ctx, linkname, name).await?;
1940                            let ovi = OverlayInode::new_from_real_inode(
1941                                name,
1942                                ino,
1943                                path.clone(),
1944                                child_ri,
1945                            )
1946                            .await;
1947
1948                            new_node.lock().await.replace(ovi);
1949                            Ok(false)
1950                        },
1951                    )
1952                    .await?;
1953
1954                // new_node is always 'Some'
1955                let arc_node = Arc::new(new_node.lock().await.take().unwrap());
1956                self.insert_inode(arc_node.inode, arc_node.clone()).await;
1957                pnode.insert_child(name, arc_node).await;
1958            }
1959        }
1960
1961        Ok(())
1962    }
1963
1964    async fn copy_symlink_up(
1965        &self,
1966        ctx: Request,
1967        node: Arc<OverlayInode>,
1968    ) -> Result<Arc<OverlayInode>> {
1969        if node.in_upper_layer().await {
1970            return Ok(node);
1971        }
1972
1973        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
1974            Arc::clone(n)
1975        } else {
1976            return Err(Error::other("no parent?"));
1977        };
1978
1979        let (self_layer, _, self_inode) = node.first_layer_inode().await;
1980
1981        if !parent_node.in_upper_layer().await {
1982            parent_node.clone().create_upper_dir(ctx, None).await?;
1983        }
1984
1985        // Read the linkname from lower layer.
1986        let reply_data = self_layer.readlink(ctx, self_inode).await?;
1987        // Convert path to &str.
1988        let path = std::str::from_utf8(&reply_data.data)
1989            .map_err(|_| Error::from_raw_os_error(libc::EINVAL))?;
1990
1991        let new_upper_real: Arc<Mutex<Option<RealInode>>> = Arc::new(Mutex::new(None));
1992        parent_node
1993            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
1994                // We already create upper dir for parent_node above.
1995                let parent_real_inode =
1996                    parent_upper_inode.ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
1997                new_upper_real.lock().await.replace(
1998                    parent_real_inode
1999                        .symlink(ctx, path, &node.name.read().await)
2000                        .await?,
2001                );
2002                Ok(false)
2003            })
2004            .await?;
2005
2006        if let Some(real_inode) = new_upper_real.lock().await.take() {
2007            // update upper_inode and first_inode()
2008            node.add_upper_inode(real_inode, true).await;
2009        }
2010
2011        Ok(node)
2012    }
2013
2014    // Copy regular file from lower layer to upper layer.
2015    // Caller must ensure node doesn't have upper layer.
2016    async fn copy_regfile_up(
2017        &self,
2018        ctx: Request,
2019        node: Arc<OverlayInode>,
2020    ) -> Result<Arc<OverlayInode>> {
2021        if node.in_upper_layer().await {
2022            return Ok(node);
2023        }
2024        //error...
2025        let parent_node = if let Some(ref n) = node.parent.lock().await.upgrade() {
2026            Arc::clone(n)
2027        } else {
2028            return Err(Error::other("no parent?"));
2029        };
2030
2031        let st = node.stat64(ctx).await?;
2032        let (lower_layer, _, lower_inode) = node.first_layer_inode().await;
2033
2034        if !parent_node.in_upper_layer().await {
2035            parent_node.clone().create_upper_dir(ctx, None).await?;
2036        }
2037
2038        // create the file in upper layer using information from lower layer
2039
2040        let flags = libc::O_WRONLY;
2041        let mode = mode_from_kind_and_perm(st.attr.kind, st.attr.perm);
2042
2043        let upper_handle = Arc::new(Mutex::new(0));
2044        let upper_real_inode = Arc::new(Mutex::new(None));
2045        parent_node
2046            .handle_upper_inode_locked(&mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2047                // We already create upper dir for parent_node.
2048                let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2049                    error!("parent {} has no upper inode", parent_node.inode);
2050                    Error::from_raw_os_error(libc::EINVAL)
2051                })?;
2052                let (inode, h) = parent_real_inode
2053                    .create(
2054                        ctx,
2055                        &node.name.read().await,
2056                        mode,
2057                        flags.try_into().unwrap(),
2058                    )
2059                    .await?;
2060                *upper_handle.lock().await = h.unwrap_or(0);
2061                upper_real_inode.lock().await.replace(inode);
2062                Ok(false)
2063            })
2064            .await?;
2065
2066        let rep = lower_layer
2067            .open(ctx, lower_inode, libc::O_RDONLY as u32)
2068            .await?;
2069
2070        let lower_handle = rep.fh;
2071
2072        // need to use work directory and then rename file to
2073        // final destination for atomic reasons.. not deal with it for now,
2074        // use stupid copy at present.
2075        // FIXME: this need a lot of work here, ntimes, xattr, etc.
2076
2077        // Copy from lower real inode to upper real inode.
2078        // TODO: use sendfile here.
2079
2080        let mut _offset: usize = 0;
2081        let size = 4 * 1024 * 1024;
2082
2083        let ret = lower_layer
2084            .read(ctx, lower_inode, lower_handle, _offset as u64, size)
2085            .await?;
2086
2087        _offset += ret.data.len();
2088
2089        // close handles
2090        lower_layer
2091            .release(ctx, lower_inode, lower_handle, 0, 0, true)
2092            .await?;
2093
2094        _offset = 0;
2095        let u_handle = *upper_handle.lock().await;
2096        while let Some(ref ri) = upper_real_inode.lock().await.take() {
2097            let ret = ri
2098                .layer
2099                .write(ctx, ri.inode, u_handle, _offset as u64, &ret.data, 0, 0)
2100                .await?;
2101            if ret.written == 0 {
2102                break;
2103            }
2104
2105            _offset += ret.written as usize;
2106        }
2107
2108        if let Some(ri) = upper_real_inode.lock().await.take() {
2109            if let Err(e) = ri.layer.release(ctx, ri.inode, u_handle, 0, 0, true).await {
2110                let e: std::io::Error = e.into();
2111                // Ignore ENOSYS.
2112                if e.raw_os_error() != Some(libc::ENOSYS) {
2113                    return Err(e);
2114                }
2115            }
2116
2117            // update upper_inode and first_inode()
2118            node.add_upper_inode(ri, true).await;
2119        }
2120
2121        Ok(node)
2122    }
2123
2124    /// Copies the specified node to the upper layer of the filesystem
2125    ///
2126    /// Performs different operations based on the node type:
2127    /// - **Directory**: Creates a corresponding directory in the upper layer
2128    /// - **Symbolic link**: Recursively copies to the upper layer
2129    /// - **Regular file**: Copies file content to the upper layer
2130    ///
2131    /// # Parameters
2132    /// * `ctx`: FUSE request context
2133    /// * `node`: Reference to the node to be copied
2134    ///
2135    /// # Returns
2136    /// Returns a reference to the upper-layer node on success, or an error on failure
2137    async fn copy_node_up(
2138        &self,
2139        ctx: Request,
2140        node: Arc<OverlayInode>,
2141    ) -> Result<Arc<OverlayInode>> {
2142        if node.in_upper_layer().await {
2143            return Ok(node);
2144        }
2145
2146        let st = node.stat64(ctx).await?;
2147        match st.attr.kind {
2148            FileType::Directory => {
2149                node.clone().create_upper_dir(ctx, None).await?;
2150                Ok(node)
2151            }
2152            FileType::Symlink => {
2153                // For symlink.
2154                self.copy_symlink_up(ctx, node).await
2155            }
2156            FileType::RegularFile => {
2157                // For regular file.
2158                self.copy_regfile_up(ctx, node).await
2159            }
2160            _ => {
2161                // For other file types. return error.
2162                Err(Error::from_raw_os_error(libc::EINVAL))
2163            }
2164        }
2165    }
2166
2167    /// recursively copy directory and all its contents to upper layer
2168    async fn copy_directory_up(
2169        &self,
2170        ctx: Request,
2171        node: Arc<OverlayInode>,
2172    ) -> Result<Arc<OverlayInode>> {
2173        // Ensure the directory itself is copied up first
2174        self.copy_node_up(ctx, node.clone()).await?;
2175
2176        // load directory to cache
2177        self.load_directory(ctx, &node).await?;
2178
2179        // go through all children
2180        let children = node.childrens.lock().await.clone();
2181        for (_name, child) in children.iter() {
2182            if _name == "." || _name == ".." {
2183                continue;
2184            }
2185            // jump over whiteout
2186            if child.whiteout.load(Ordering::Relaxed) {
2187                continue;
2188            }
2189            let st = child.stat64(ctx).await?;
2190            if !child.in_upper_layer().await {
2191                match st.attr.kind {
2192                    FileType::Directory => {
2193                        // recursively copy subdirectory
2194                        Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2195                    }
2196                    FileType::Symlink | FileType::RegularFile => {
2197                        // copy node up symlink or regular file
2198                        Box::pin(self.copy_node_up(ctx, child.clone())).await?;
2199                    }
2200                    _ => {
2201                        // other file types are ignored
2202                    }
2203                }
2204            } else if utils::is_dir(&st.attr.kind) {
2205                // If it is already in the upper layer, but the directory is not loaded,
2206                // ensure that its contents are also copied up recursively.
2207                Box::pin(self.copy_directory_up(ctx, child.clone())).await?;
2208            }
2209        }
2210
2211        Ok(node)
2212    }
2213
2214    async fn do_rm(&self, ctx: Request, parent: u64, name: &OsStr, dir: bool) -> Result<()> {
2215        if self.upper_layer.is_none() {
2216            return Err(Error::from_raw_os_error(libc::EROFS));
2217        }
2218
2219        // Find parent Overlay Inode.
2220        let pnode = self.lookup_node(ctx, parent, "").await?;
2221        if pnode.whiteout.load(Ordering::Relaxed) {
2222            return Err(Error::from_raw_os_error(libc::ENOENT));
2223        }
2224        let to_name = name.to_str().unwrap();
2225        // Find the Overlay Inode for child with <name>.
2226        let node = self.lookup_node(ctx, parent, to_name).await?;
2227        if node.whiteout.load(Ordering::Relaxed) {
2228            // already deleted.
2229            return Err(Error::from_raw_os_error(libc::ENOENT));
2230        }
2231
2232        if dir {
2233            self.load_directory(ctx, &node).await?;
2234            let (count, whiteouts) = node.count_entries_and_whiteout(ctx).await?;
2235            trace!("entries: {}, whiteouts: {}\n", count, whiteouts);
2236            if count > 0 {
2237                return Err(Error::from_raw_os_error(libc::ENOTEMPTY));
2238            }
2239
2240            // Delete all whiteouts.
2241            if whiteouts > 0 && node.in_upper_layer().await {
2242                self.empty_node_directory(ctx, Arc::clone(&node)).await?;
2243            }
2244
2245            trace!("whiteouts deleted!\n");
2246        }
2247
2248        let need_whiteout = AtomicBool::new(true);
2249        let pnode = self.copy_node_up(ctx, Arc::clone(&pnode)).await?;
2250
2251        if node.upper_layer_only().await {
2252            need_whiteout.store(false, Ordering::Relaxed);
2253        }
2254
2255        // lookups decrease by 1.
2256        node.lookups.fetch_sub(1, Ordering::Relaxed);
2257        let mut df = |parent_upper_inode: Option<Arc<RealInode>>| async {
2258            let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2259                error!(
2260                    "BUG: parent {} has no upper inode after copy up",
2261                    pnode.inode
2262                );
2263                Error::from_raw_os_error(libc::EINVAL)
2264            })?;
2265
2266            // Parent is opaque, it shadows everything in lower layers so no need to create extra whiteouts.
2267            if parent_real_inode.opaque {
2268                need_whiteout.store(false, Ordering::Relaxed);
2269            }
2270            if dir {
2271                parent_real_inode
2272                    .layer
2273                    .rmdir(ctx, parent_real_inode.inode, name)
2274                    .await?;
2275            } else {
2276                parent_real_inode
2277                    .layer
2278                    .unlink(ctx, parent_real_inode.inode, name)
2279                    .await?;
2280            }
2281
2282            Ok(false)
2283        };
2284
2285        if node.in_upper_layer().await {
2286            let hh = pnode.handle_upper_inode_locked(&mut df);
2287            // remove it from hashmap
2288            let node_name = node.name.read().await.clone();
2289            let (h1, _, _) = tokio::join!(
2290                hh,
2291                self.remove_inode(node.inode, Some(node.path.read().await.clone())),
2292                pnode.remove_child(&node_name),
2293            );
2294            h1?;
2295        } else {
2296            let node_name = node.name.read().await.clone();
2297            tokio::join!(
2298                self.remove_inode(node.inode, None),
2299                pnode.remove_child(&node_name)
2300            );
2301        }
2302
2303        if need_whiteout.load(Ordering::Relaxed) {
2304            println!("do_rm: creating whiteout\n");
2305            // pnode is copied up, so it has upper layer.
2306            pnode
2307                .handle_upper_inode_locked(
2308                    &mut |parent_upper_inode: Option<Arc<RealInode>>| async {
2309                        let parent_real_inode = parent_upper_inode.ok_or_else(|| {
2310                            error!(
2311                                "BUG: parent {} has no upper inode after copy up",
2312                                pnode.inode
2313                            );
2314                            Error::from_raw_os_error(libc::EINVAL)
2315                        })?;
2316
2317                        let child_ri = parent_real_inode.create_whiteout(ctx, to_name).await?; //FIXME..............
2318                        let path = format!("{}/{}", pnode.path.read().await, to_name);
2319                        let ino: u64 = self.alloc_inode(&path).await?;
2320                        let ovi = Arc::new(
2321                            OverlayInode::new_from_real_inode(to_name, ino, path.clone(), child_ri)
2322                                .await,
2323                        );
2324
2325                        self.insert_inode(ino, ovi.clone()).await;
2326                        pnode.insert_child(to_name, ovi.clone()).await;
2327                        Ok(false)
2328                    },
2329                )
2330                .await?;
2331        }
2332
2333        Ok(())
2334    }
2335
2336    async fn do_fsync(
2337        &self,
2338        ctx: Request,
2339        inode: Inode,
2340        datasync: bool,
2341        handle: Handle,
2342        syncdir: bool,
2343    ) -> Result<()> {
2344        // Use O_RDONLY flags which indicates no copy up.
2345        let data = self
2346            .get_data(ctx, Some(handle), inode, libc::O_RDONLY as u32)
2347            .await?;
2348
2349        match data.real_handle {
2350            // FIXME: need to test if inode matches corresponding handle?
2351            None => Err(Error::from_raw_os_error(libc::ENOENT)),
2352            Some(ref rh) => {
2353                let real_handle = rh.handle.load(Ordering::Relaxed);
2354                // TODO: check if it's in upper layer? @weizhang555
2355                if syncdir {
2356                    rh.layer
2357                        .fsyncdir(ctx, rh.inode, real_handle, datasync)
2358                        .await
2359                        .map_err(|e| e.into())
2360                } else {
2361                    rh.layer
2362                        .fsync(ctx, rh.inode, real_handle, datasync)
2363                        .await
2364                        .map_err(|e| e.into())
2365                }
2366            }
2367        }
2368    }
2369
2370    // Delete everything in the directory only on upper layer, ignore lower layers.
2371    async fn empty_node_directory(&self, ctx: Request, node: Arc<OverlayInode>) -> Result<()> {
2372        let st = node.stat64(ctx).await?;
2373        if !utils::is_dir(&st.attr.kind) {
2374            // This function can only be called on directories.
2375            return Err(Error::from_raw_os_error(libc::ENOTDIR));
2376        }
2377
2378        let (layer, in_upper, inode) = node.first_layer_inode().await;
2379        if !in_upper {
2380            return Ok(());
2381        }
2382
2383        // Copy node.childrens Hashmap to Vector, the Vector is also used as temp storage,
2384        // Without this, Rust won't allow us to remove them from node.childrens.
2385        let iter = node
2386            .childrens
2387            .lock()
2388            .await
2389            .values()
2390            .cloned()
2391            .collect::<Vec<_>>();
2392
2393        for child in iter {
2394            // We only care about upper layer, ignore lower layers.
2395            if child.in_upper_layer().await {
2396                let child_name = child.name.read().await.clone();
2397                let child_name_os = OsStr::new(&child_name);
2398                if child.whiteout.load(Ordering::Relaxed) {
2399                    layer.delete_whiteout(ctx, inode, child_name_os).await?
2400                } else {
2401                    let s = child.stat64(ctx).await?;
2402                    let cname: &OsStr = OsStr::new(&child_name_os);
2403                    if utils::is_dir(&s.attr.kind) {
2404                        let (count, whiteouts) = child.count_entries_and_whiteout(ctx).await?;
2405                        if count + whiteouts > 0 {
2406                            let cb = child.clone();
2407                            Box::pin(async move { self.empty_node_directory(ctx, cb).await })
2408                                .await?;
2409                        }
2410                        layer.rmdir(ctx, inode, cname).await?
2411                    } else {
2412                        layer.unlink(ctx, inode, cname).await?;
2413                    }
2414                }
2415
2416                let cpath = child.path.read().await.clone();
2417                // delete the child
2418                self.remove_inode(child.inode, Some(cpath)).await;
2419                node.remove_child(&child_name).await;
2420            }
2421        }
2422
2423        Ok(())
2424    }
2425
2426    async fn find_real_info_from_handle(
2427        &self,
2428        handle: Handle,
2429    ) -> Result<(Arc<BoxedLayer>, Inode, Handle)> {
2430        match self.handles.lock().await.get(&handle) {
2431            Some(h) => match h.real_handle {
2432                Some(ref rhd) => Ok((
2433                    rhd.layer.clone(),
2434                    rhd.inode,
2435                    rhd.handle.load(Ordering::Relaxed),
2436                )),
2437                None => Err(Error::from_raw_os_error(libc::ENOENT)),
2438            },
2439
2440            None => Err(Error::from_raw_os_error(libc::ENOENT)),
2441        }
2442    }
2443
2444    async fn find_real_inode(&self, inode: Inode) -> Result<(Arc<BoxedLayer>, Inode)> {
2445        if let Some(n) = self.get_active_inode(inode).await {
2446            let (first_layer, _, first_inode) = n.first_layer_inode().await;
2447            return Ok((first_layer, first_inode));
2448        }
2449
2450        Err(Error::from_raw_os_error(libc::ENOENT))
2451    }
2452
2453    async fn get_data(
2454        &self,
2455        ctx: Request,
2456        handle: Option<Handle>,
2457        inode: Inode,
2458        flags: u32,
2459    ) -> Result<Arc<HandleData>> {
2460        let no_open = self.no_open.load(Ordering::Relaxed);
2461        if !no_open {
2462            if let Some(h) = handle {
2463                if let Some(v) = self.handles.lock().await.get(&h) {
2464                    if v.node.inode == inode {
2465                        return Ok(Arc::clone(v));
2466                    }
2467                }
2468            }
2469        } else {
2470            let readonly: bool = flags
2471                & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
2472                    as u32
2473                == 0;
2474
2475            // lookup node
2476            let node = self.lookup_node(ctx, inode, "").await?;
2477
2478            // whiteout node
2479            if node.whiteout.load(Ordering::Relaxed) {
2480                return Err(Error::from_raw_os_error(libc::ENOENT));
2481            }
2482
2483            if !readonly {
2484                // Check if upper layer exists, return EROFS is not exists.
2485                self.upper_layer
2486                    .as_ref()
2487                    .cloned()
2488                    .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
2489                // copy up to upper layer
2490                self.copy_node_up(ctx, Arc::clone(&node)).await?;
2491            }
2492
2493            let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
2494            let handle_data = HandleData {
2495                node: Arc::clone(&node),
2496                real_handle: Some(RealHandle {
2497                    layer,
2498                    in_upper_layer,
2499                    inode,
2500                    handle: AtomicU64::new(0),
2501                }),
2502            };
2503            return Ok(Arc::new(handle_data));
2504        }
2505
2506        Err(Error::from_raw_os_error(libc::ENOENT))
2507    }
2508
2509    // extend or init the inodes number to one overlay if the current number is done.
2510    pub async fn extend_inode_alloc(&self, key: u64) {
2511        let next_inode = key * INODE_ALLOC_BATCH;
2512        let limit_inode = next_inode + INODE_ALLOC_BATCH - 1;
2513        self.inodes
2514            .write()
2515            .await
2516            .extend_inode_number(next_inode, limit_inode);
2517    }
2518}
2519
2520/// Mounts the filesystem using the given parameters and returns the mount handle.
2521///
2522/// # Parameters
2523/// - `mountpoint`: Path to the mount point.
2524/// - `upperdir`: Path to the upper directory.
2525/// - `lowerdir`: Paths to the lower directories.
2526/// - `not_unprivileged`: If true, use privileged mount; otherwise, unprivileged mount.
2527///
2528/// # Returns
2529/// A mount handle on success.
2530pub async fn mount_fs(
2531    mountpoint: String,
2532    upperdir: String,
2533    lowerdir: Vec<String>,
2534    not_unprivileged: bool,
2535) -> rfuse3::raw::MountHandle {
2536    // Create lower layers
2537    let mut lower_layers = Vec::new();
2538    for lower in &lowerdir {
2539        let layer = new_passthroughfs_layer(lower)
2540            .await
2541            .expect("Failed to create lower filesystem layer");
2542        lower_layers.push(Arc::new(layer));
2543    }
2544    // Create upper layer
2545    let upper_layer = Arc::new(
2546        new_passthroughfs_layer(&upperdir)
2547            .await
2548            .expect("Failed to create upper filesystem layer"),
2549    );
2550
2551    // Configure overlay filesystem
2552    let config = Config {
2553        mountpoint: mountpoint.clone(),
2554        do_import: true,
2555        ..Default::default()
2556    };
2557    let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1)
2558        .expect("Failed to initialize OverlayFs");
2559    let logfs = LoggingFileSystem::new(overlayfs);
2560
2561    let mount_path: OsString = OsString::from(mountpoint);
2562
2563    // Obtain the current user's uid and gid
2564    let uid = unsafe { libc::getuid() };
2565    let gid = unsafe { libc::getgid() };
2566
2567    let mut mount_options = MountOptions::default();
2568    mount_options.force_readdir_plus(true).uid(uid).gid(gid);
2569
2570    // Mount filesystem based on privilege flag and return the mount handle
2571    if !not_unprivileged {
2572        Session::new(mount_options)
2573            .mount_with_unprivileged(logfs, mount_path)
2574            .await
2575            .expect("Unprivileged mount failed")
2576    } else {
2577        Session::new(mount_options)
2578            .mount(logfs, mount_path)
2579            .await
2580            .expect("Privileged mount failed")
2581    }
2582}