libfuse_fs/passthrough/
mod.rs

1use config::{CachePolicy, Config};
2use file_handle::{FileHandle, OpenableFileHandle};
3
4use inode_store::{InodeId, InodeStore};
5use rfuse3::{Errno, raw::reply::ReplyEntry};
6use uuid::Uuid;
7
8use crate::util::convert_stat64_to_file_attr;
9use mount_fd::MountFds;
10use statx::StatExt;
11use std::io::Result;
12use std::ops::DerefMut;
13use std::sync::atomic::{AtomicBool, AtomicU32};
14use std::{
15    collections::{BTreeMap, btree_map},
16    ffi::{CStr, CString, OsString},
17    fs::File,
18    io::{self, Error},
19    marker::PhantomData,
20    os::{
21        fd::{AsFd, AsRawFd, BorrowedFd, RawFd},
22        unix::ffi::OsStringExt,
23    },
24    path::PathBuf,
25    sync::Arc,
26    sync::atomic::{AtomicU64, Ordering},
27    time::Duration,
28};
29use util::{
30    UniqueInodeGenerator, ebadf, is_dir, openat, reopen_fd_through_proc, stat_fd,
31    validate_path_component,
32};
33use vm_memory::bitmap::BitmapSlice;
34
35mod async_io;
36mod config;
37mod file_handle;
38mod inode_store;
39mod mount_fd;
40pub mod newlogfs;
41mod os_compat;
42mod statx;
43mod util;
44
45/// Current directory
46pub const CURRENT_DIR_CSTR: &[u8] = b".\0";
47/// Parent directory
48pub const PARENT_DIR_CSTR: &[u8] = b"..\0";
49pub const VFS_MAX_INO: u64 = 0xff_ffff_ffff_ffff;
50const MOUNT_INFO_FILE: &str = "/proc/self/mountinfo";
51pub const EMPTY_CSTR: &[u8] = b"\0";
52pub const PROC_SELF_FD_CSTR: &[u8] = b"/proc/self/fd\0";
53pub const ROOT_ID: u64 = 1;
54use tokio::sync::{Mutex, MutexGuard, RwLock};
55
56pub async fn new_passthroughfs_layer(rootdir: &str) -> Result<PassthroughFs> {
57    let config = Config {
58        root_dir: String::from(rootdir),
59        // enable xattr`
60        xattr: true,
61        do_import: true,
62        inode_file_handles: true,
63        ..Default::default()
64    };
65
66    let fs = PassthroughFs::<()>::new(config)?;
67
68    fs.import().await?;
69    Ok(fs)
70}
71
72type Inode = u64;
73type Handle = u64;
74
75/// Maximum host inode number supported by passthroughfs
76const MAX_HOST_INO: u64 = 0x7fff_ffff_ffff;
77
78/**
79 * Represents the file associated with an inode (`InodeData`).
80 *
81 * When obtaining such a file, it may either be a new file (the `Owned` variant), in which case the
82 * object's lifetime is static, or it may reference `InodeData.file` (the `Ref` variant), in which
83 * case the object's lifetime is that of the respective `InodeData` object.
84 */
85#[derive(Debug)]
86enum InodeFile<'a> {
87    Owned(File),
88    Ref(&'a File),
89}
90
91impl AsRawFd for InodeFile<'_> {
92    /// Return a file descriptor for this file
93    /// Note: This fd is only valid as long as the `InodeFile` exists.
94    fn as_raw_fd(&self) -> RawFd {
95        match self {
96            Self::Owned(file) => file.as_raw_fd(),
97            Self::Ref(file_ref) => file_ref.as_raw_fd(),
98        }
99    }
100}
101
102impl AsFd for InodeFile<'_> {
103    fn as_fd(&self) -> BorrowedFd<'_> {
104        match self {
105            Self::Owned(file) => file.as_fd(),
106            Self::Ref(file_ref) => file_ref.as_fd(),
107        }
108    }
109}
110
111#[derive(Debug)]
112enum InodeHandle {
113    File(File),
114    Handle(Arc<OpenableFileHandle>),
115}
116
117impl InodeHandle {
118    fn file_handle(&self) -> Option<&FileHandle> {
119        match self {
120            InodeHandle::File(_) => None,
121            InodeHandle::Handle(h) => Some(h.file_handle()),
122        }
123    }
124
125    fn get_file(&self) -> Result<InodeFile<'_>> {
126        match self {
127            InodeHandle::File(f) => Ok(InodeFile::Ref(f)),
128            InodeHandle::Handle(h) => {
129                let f = h.open(libc::O_PATH)?;
130                Ok(InodeFile::Owned(f))
131            }
132        }
133    }
134
135    fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> Result<File> {
136        match self {
137            InodeHandle::File(f) => reopen_fd_through_proc(f, flags, proc_self_fd),
138            InodeHandle::Handle(h) => h.open(flags),
139        }
140    }
141
142    fn stat(&self) -> Result<libc::stat64> {
143        match self {
144            InodeHandle::File(f) => stat_fd(f, None),
145            InodeHandle::Handle(_h) => {
146                let file = self.get_file()?;
147                stat_fd(&file, None)
148            }
149        }
150    }
151}
152
153/// Represents an inode in `PassthroughFs`.
154#[derive(Debug)]
155pub struct InodeData {
156    inode: Inode,
157    // Most of these aren't actually files but ¯\_(ツ)_/¯.
158    handle: InodeHandle,
159    id: InodeId,
160    refcount: AtomicU64,
161    // File type and mode
162    mode: u32,
163}
164
165impl InodeData {
166    fn new(inode: Inode, f: InodeHandle, refcount: u64, id: InodeId, mode: u32) -> Self {
167        InodeData {
168            inode,
169            handle: f,
170            id,
171            refcount: AtomicU64::new(refcount),
172            mode,
173        }
174    }
175
176    fn get_file(&self) -> Result<InodeFile<'_>> {
177        self.handle.get_file()
178    }
179
180    fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> Result<File> {
181        self.handle.open_file(flags, proc_self_fd)
182    }
183}
184
185/// Data structures to manage accessed inodes.
186struct InodeMap {
187    pub inodes: RwLock<InodeStore>,
188}
189
190impl InodeMap {
191    fn new() -> Self {
192        InodeMap {
193            inodes: RwLock::new(Default::default()),
194        }
195    }
196
197    async fn clear(&self) {
198        // Do not expect poisoned lock here, so safe to unwrap().
199        self.inodes.write().await.clear();
200    }
201
202    async fn get(&self, inode: Inode) -> Result<Arc<InodeData>> {
203        // Do not expect poisoned lock here, so safe to unwrap().
204        self.inodes
205            .read()
206            .await
207            .get(&inode)
208            .cloned()
209            .ok_or_else(ebadf)
210    }
211
212    fn get_inode_locked(
213        inodes: &InodeStore,
214        id: &InodeId,
215        handle: Option<&FileHandle>,
216    ) -> Option<Inode> {
217        match handle {
218            Some(h) => inodes.inode_by_handle(h).copied(),
219            None => inodes.inode_by_id(id).copied(),
220        }
221    }
222
223    async fn get_alt(&self, id: &InodeId, handle: Option<&FileHandle>) -> Option<Arc<InodeData>> {
224        // Do not expect poisoned lock here, so safe to unwrap().
225        let inodes = self.inodes.read().await;
226
227        Self::get_alt_locked(&inodes, id, handle)
228    }
229
230    fn get_alt_locked(
231        inodes: &InodeStore,
232        id: &InodeId,
233        handle: Option<&FileHandle>,
234    ) -> Option<Arc<InodeData>> {
235        handle
236            .and_then(|h| inodes.get_by_handle(h))
237            .or_else(|| {
238                inodes.get_by_id(id).filter(|data| {
239                    // When we have to fall back to looking up an inode by its IDs, ensure that
240                    // we hit an entry that does not have a file handle.  Entries with file
241                    // handles must also have a handle alt key, so if we have not found it by
242                    // that handle alt key, we must have found an entry with a mismatching
243                    // handle; i.e. an entry for a different file, even though it has the same
244                    // inode ID.
245                    // (This can happen when we look up a new file that has reused the inode ID
246                    // of some previously unlinked inode we still have in `.inodes`.)
247                    handle.is_none() || data.handle.file_handle().is_none()
248                })
249            })
250            .cloned()
251    }
252
253    async fn insert(&self, data: Arc<InodeData>) {
254        let mut inodes = self.inodes.write().await;
255
256        Self::insert_locked(&mut inodes, data)
257    }
258
259    fn insert_locked(inodes: &mut InodeStore, data: Arc<InodeData>) {
260        inodes.insert(data);
261    }
262}
263
264struct HandleData {
265    inode: Inode,
266    file: File,
267    lock: Mutex<()>,
268    open_flags: AtomicU32,
269}
270
271impl HandleData {
272    fn new(inode: Inode, file: File, flags: u32) -> Self {
273        HandleData {
274            inode,
275            file,
276            lock: Mutex::new(()),
277            open_flags: AtomicU32::new(flags),
278        }
279    }
280
281    fn get_file(&self) -> &File {
282        &self.file
283    }
284
285    async fn get_file_mut(&self) -> (MutexGuard<()>, &File) {
286        (self.lock.lock().await, &self.file)
287    }
288
289    fn borrow_fd(&self) -> BorrowedFd {
290        self.file.as_fd()
291    }
292
293    async fn get_flags(&self) -> u32 {
294        self.open_flags.load(Ordering::Relaxed)
295    }
296
297    async fn set_flags(&self, flags: u32) {
298        self.open_flags.store(flags, Ordering::Relaxed);
299    }
300}
301
302struct HandleMap {
303    handles: RwLock<BTreeMap<Handle, Arc<HandleData>>>,
304}
305
306impl HandleMap {
307    fn new() -> Self {
308        HandleMap {
309            handles: RwLock::new(BTreeMap::new()),
310        }
311    }
312
313    async fn clear(&self) {
314        // Do not expect poisoned lock here, so safe to unwrap().
315        self.handles.write().await.clear();
316    }
317
318    async fn insert(&self, handle: Handle, data: HandleData) {
319        // Do not expect poisoned lock here, so safe to unwrap().
320        self.handles.write().await.insert(handle, Arc::new(data));
321    }
322
323    async fn release(&self, handle: Handle, inode: Inode) -> Result<()> {
324        // Do not expect poisoned lock here, so safe to unwrap().
325        let mut handles = self.handles.write().await;
326
327        if let btree_map::Entry::Occupied(e) = handles.entry(handle) {
328            if e.get().inode == inode {
329                // We don't need to close the file here because that will happen automatically when
330                // the last `Arc` is dropped.
331                e.remove();
332
333                return Ok(());
334            }
335        }
336
337        Err(ebadf())
338    }
339
340    async fn get(&self, handle: Handle, inode: Inode) -> Result<Arc<HandleData>> {
341        // Do not expect poisoned lock here, so safe to unwrap().
342        self.handles
343            .read()
344            .await
345            .get(&handle)
346            .filter(|hd| hd.inode == inode)
347            .cloned()
348            .ok_or_else(ebadf)
349    }
350}
351
352/// A file system that simply "passes through" all requests it receives to the underlying file
353/// system.
354///
355/// To keep the implementation simple it servers the contents of its root directory. Users
356/// that wish to serve only a specific directory should set up the environment so that that
357/// directory ends up as the root of the file system process. One way to accomplish this is via a
358/// combination of mount namespaces and the pivot_root system call.
359pub struct PassthroughFs<S: BitmapSlice + Send + Sync = ()> {
360    // File descriptors for various points in the file system tree. These fds are always opened with
361    // the `O_PATH` option so they cannot be used for reading or writing any data. See the
362    // documentation of the `O_PATH` flag in `open(2)` for more details on what one can and cannot
363    // do with an fd opened with this flag.
364    inode_map: InodeMap,
365    next_inode: AtomicU64,
366
367    // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be
368    // used for reading and writing data.
369    handle_map: HandleMap,
370    next_handle: AtomicU64,
371
372    // Use to generate unique inode
373    ino_allocator: UniqueInodeGenerator,
374    // Maps mount IDs to an open FD on the respective ID for the purpose of open_by_handle_at().
375    mount_fds: MountFds,
376
377    // File descriptor pointing to the `/proc/self/fd` directory. This is used to convert an fd from
378    // `inodes` into one that can go into `handles`. This is accomplished by reading the
379    // `/proc/self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are meant
380    // to be serving doesn't have access to `/proc/self/fd`.
381    proc_self_fd: File,
382
383    // Whether writeback caching is enabled for this directory. This will only be true when
384    // `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`.
385    writeback: AtomicBool,
386
387    // Whether no_open is enabled.
388    no_open: AtomicBool,
389
390    // Whether no_opendir is enabled.
391    no_opendir: AtomicBool,
392
393    // Whether kill_priv_v2 is enabled.
394    //killpriv_v2: AtomicBool,
395
396    // Whether no_readdir is enabled.
397    no_readdir: AtomicBool,
398
399    // Whether seal_size is enabled.
400    seal_size: AtomicBool,
401
402    // Whether per-file DAX feature is enabled.
403    // Init from guest kernel Init cmd of fuse fs.
404    //perfile_dax: AtomicBool,
405    dir_entry_timeout: Duration,
406    dir_attr_timeout: Duration,
407
408    cfg: Config,
409
410    _uuid: Uuid,
411
412    phantom: PhantomData<S>,
413}
414
415impl<S: BitmapSlice + Send + Sync> PassthroughFs<S> {
416    /// Create a Passthrough file system instance.
417    pub fn new(mut cfg: Config) -> Result<PassthroughFs<S>> {
418        if cfg.no_open && cfg.cache_policy != CachePolicy::Always {
419            warn!("passthroughfs: no_open only work with cache=always, reset to open mode");
420            cfg.no_open = false;
421        }
422        if cfg.writeback && cfg.cache_policy == CachePolicy::Never {
423            warn!(
424                "passthroughfs: writeback cache conflicts with cache=none, reset to no_writeback"
425            );
426            cfg.writeback = false;
427        }
428
429        // Safe because this is a constant value and a valid C string.
430        let proc_self_fd_cstr = unsafe { CStr::from_bytes_with_nul_unchecked(PROC_SELF_FD_CSTR) };
431        let proc_self_fd = Self::open_file(
432            &libc::AT_FDCWD,
433            proc_self_fd_cstr,
434            libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
435            0,
436        )?;
437
438        let (dir_entry_timeout, dir_attr_timeout) =
439            match (cfg.dir_entry_timeout, cfg.dir_attr_timeout) {
440                (Some(e), Some(a)) => (e, a),
441                (Some(e), None) => (e, cfg.attr_timeout),
442                (None, Some(a)) => (cfg.entry_timeout, a),
443                (None, None) => (cfg.entry_timeout, cfg.attr_timeout),
444            };
445
446        let mount_fds = MountFds::new(None)?;
447
448        Ok(PassthroughFs {
449            inode_map: InodeMap::new(),
450            next_inode: AtomicU64::new(ROOT_ID + 1),
451            ino_allocator: UniqueInodeGenerator::new(),
452
453            handle_map: HandleMap::new(),
454            next_handle: AtomicU64::new(1),
455
456            mount_fds,
457            proc_self_fd,
458
459            writeback: AtomicBool::new(false),
460            no_open: AtomicBool::new(false),
461            no_opendir: AtomicBool::new(false),
462            //killpriv_v2: AtomicBool::new(false),
463            no_readdir: AtomicBool::new(cfg.no_readdir),
464            seal_size: AtomicBool::new(cfg.seal_size),
465            //perfile_dax: AtomicBool::new(false),
466            dir_entry_timeout,
467            dir_attr_timeout,
468            cfg,
469
470            _uuid: Uuid::new_v4(),
471
472            phantom: PhantomData,
473        })
474    }
475
476    /// Initialize the Passthrough file system.
477    pub async fn import(&self) -> Result<()> {
478        let root = CString::new(self.cfg.root_dir.as_str()).expect("CString::new failed");
479
480        let (path_fd, handle_opt, st) = Self::open_file_and_handle(self, &libc::AT_FDCWD, &root)
481            .map_err(|e| {
482                error!("fuse: import: failed to get file or handle: {e:?}");
483                e
484            })?;
485        let id = InodeId::from_stat(&st);
486        let handle = if let Some(h) = handle_opt {
487            InodeHandle::Handle(self.to_openable_handle(h)?)
488        } else {
489            InodeHandle::File(path_fd)
490        };
491
492        // Safe because this doesn't modify any memory and there is no need to check the return
493        // value because this system call always succeeds. We need to clear the umask here because
494        // we want the client to be able to set all the bits in the mode.
495        unsafe { libc::umask(0o000) };
496
497        // Not sure why the root inode gets a refcount of 2 but that's what libfuse does.
498        self.inode_map
499            .insert(Arc::new(InodeData::new(
500                ROOT_ID,
501                handle,
502                2,
503                id,
504                st.st.st_mode,
505            )))
506            .await;
507
508        Ok(())
509    }
510
511    /// Get the list of file descriptors which should be reserved across live upgrade.
512    pub fn keep_fds(&self) -> Vec<RawFd> {
513        vec![self.proc_self_fd.as_raw_fd()]
514    }
515
516    fn readlinkat(dfd: i32, pathname: &CStr) -> Result<PathBuf> {
517        let mut buf = Vec::with_capacity(libc::PATH_MAX as usize);
518
519        // Safe because the kernel will only write data to buf and we check the return value
520        let buf_read = unsafe {
521            libc::readlinkat(
522                dfd,
523                pathname.as_ptr(),
524                buf.as_mut_ptr() as *mut libc::c_char,
525                buf.capacity(),
526            )
527        };
528        if buf_read < 0 {
529            error!("fuse: readlinkat error");
530            return Err(Error::last_os_error());
531        }
532
533        // Safe because we trust the value returned by kernel.
534        unsafe { buf.set_len(buf_read as usize) };
535        buf.shrink_to_fit();
536
537        // Be careful:
538        // - readlink() does not append a terminating null byte to buf
539        // - OsString instances are not NUL terminated
540        Ok(PathBuf::from(OsString::from_vec(buf)))
541    }
542
543    /// Get the file pathname corresponding to the Inode
544    /// This function is used by Nydus blobfs
545    pub async fn readlinkat_proc_file(&self, inode: Inode) -> Result<PathBuf> {
546        let data = self.inode_map.get(inode).await?;
547        let file = data.get_file()?;
548        let pathname = CString::new(format!("{}", file.as_raw_fd()))
549            .map_err(|e| Error::new(io::ErrorKind::InvalidData, e))?;
550
551        Self::readlinkat(self.proc_self_fd.as_raw_fd(), &pathname)
552    }
553
554    fn create_file_excl(
555        dir: &impl AsRawFd,
556        pathname: &CStr,
557        flags: i32,
558        mode: u32,
559    ) -> io::Result<Option<File>> {
560        match openat(dir, pathname, flags | libc::O_CREAT | libc::O_EXCL, mode) {
561            Ok(file) => Ok(Some(file)),
562            Err(err) => {
563                // Ignore the error if the file exists and O_EXCL is not present in `flags`.
564                if err.kind() == io::ErrorKind::AlreadyExists {
565                    if (flags & libc::O_EXCL) != 0 {
566                        return Err(err);
567                    }
568                    return Ok(None);
569                }
570                Err(err)
571            }
572        }
573    }
574
575    fn open_file(dfd: &impl AsRawFd, pathname: &CStr, flags: i32, mode: u32) -> io::Result<File> {
576        openat(dfd, pathname, flags, mode)
577    }
578
579    fn open_file_restricted(
580        &self,
581        dir: &impl AsRawFd,
582        pathname: &CStr,
583        flags: i32,
584        mode: u32,
585    ) -> io::Result<File> {
586        let flags = libc::O_NOFOLLOW | libc::O_CLOEXEC | flags;
587
588        // TODO
589        //if self.os_facts.has_openat2 {
590        //    oslib::do_open_relative_to(dir, pathname, flags, mode)
591        //} else {
592        openat(dir, pathname, flags, mode)
593        //}
594    }
595
596    /// Create a File or File Handle for `name` under directory `dir_fd` to support `lookup()`.
597    fn open_file_and_handle(
598        &self,
599        dir: &impl AsRawFd,
600        name: &CStr,
601    ) -> io::Result<(File, Option<FileHandle>, StatExt)> {
602        let path_file = self.open_file_restricted(dir, name, libc::O_PATH, 0)?;
603        let st = statx::statx(&path_file, None)?;
604        let handle = if self.cfg.inode_file_handles {
605            FileHandle::from_fd(&path_file)?
606        } else {
607            None
608        };
609
610        Ok((path_file, handle, st))
611    }
612
613    fn to_openable_handle(&self, fh: FileHandle) -> io::Result<Arc<OpenableFileHandle>> {
614        fh.into_openable(&self.mount_fds, |fd, flags, _mode| {
615            reopen_fd_through_proc(&fd, flags, &self.proc_self_fd)
616        })
617        .map(Arc::new)
618        .map_err(|e| {
619            if !e.silent() {
620                error!("{e}");
621            }
622            e.into_inner()
623        })
624    }
625
626    async fn allocate_inode(
627        &self,
628        inodes: &InodeStore,
629        id: &InodeId,
630        handle_opt: Option<&FileHandle>,
631    ) -> io::Result<Inode> {
632        if !self.cfg.use_host_ino {
633            // If the inode has already been assigned before, the new inode is not reassigned,
634            // ensuring that the same file is always the same inode
635            match InodeMap::get_inode_locked(inodes, id, handle_opt) {
636                Some(a) => Ok(a),
637                None => Ok(self.next_inode.fetch_add(1, Ordering::Relaxed)),
638            }
639        } else {
640            let inode = if id.ino > MAX_HOST_INO {
641                // Prefer looking for previous mappings from memory
642                match InodeMap::get_inode_locked(inodes, id, handle_opt) {
643                    Some(ino) => ino,
644                    None => self.ino_allocator.get_unique_inode(id)?,
645                }
646            } else {
647                self.ino_allocator.get_unique_inode(id)?
648            };
649            // trace!("fuse: allocate inode: {} for id: {:?}", inode, id);
650            Ok(inode)
651        }
652    }
653
654    async fn do_lookup(
655        &self,
656        parent: Inode,
657        name: &CStr,
658    ) -> std::result::Result<ReplyEntry, Errno> {
659        let name = if parent == ROOT_ID && name.to_bytes_with_nul().starts_with(PARENT_DIR_CSTR) {
660            // Safe as this is a constant value and a valid C string.
661            CStr::from_bytes_with_nul(CURRENT_DIR_CSTR).unwrap()
662        } else {
663            name
664        };
665
666        let dir = self.inode_map.get(parent).await?;
667        let dir_file = dir.get_file()?;
668        let (path_fd, handle_opt, st) = Self::open_file_and_handle(self, &dir_file, name)?;
669        let id = InodeId::from_stat(&st);
670        // trace!("FS {} do_lookup: parent: {}, name: {}, path_fd: {:?}, handle_opt: {:?}, id: {:?}",
671        // self.uuid, parent, name.to_string_lossy(), path_fd.as_raw_fd(), handle_opt, id);
672
673        let mut found = None;
674        'search: loop {
675            match self.inode_map.get_alt(&id, handle_opt.as_ref()).await {
676                // No existing entry found
677                None => break 'search,
678                Some(data) => {
679                    let curr = data.refcount.load(Ordering::Acquire);
680                    // forgot_one() has just destroyed the entry, retry...
681                    if curr == 0 {
682                        continue 'search;
683                    }
684
685                    // Saturating add to avoid integer overflow, it's not realistic to saturate u64.
686                    let new = curr.saturating_add(1);
687
688                    // Synchronizes with the forgot_one()
689                    if data
690                        .refcount
691                        .compare_exchange(curr, new, Ordering::AcqRel, Ordering::Acquire)
692                        .is_ok()
693                    {
694                        found = Some(data.inode);
695                        break;
696                    }
697                }
698            }
699        }
700
701        let inode = if let Some(v) = found {
702            v
703        } else {
704            let handle = if let Some(h) = handle_opt.clone() {
705                InodeHandle::Handle(self.to_openable_handle(h)?)
706            } else {
707                InodeHandle::File(path_fd)
708            };
709
710            // Write guard get_alt_locked() and insert_lock() to avoid race conditions.
711            let mut inodes = self.inode_map.inodes.write().await;
712
713            // Lookup inode_map again after acquiring the inode_map lock, as there might be another
714            // racing thread already added an inode with the same id while we're not holding
715            // the lock. If so just use the newly added inode, otherwise the inode will be replaced
716            // and results in EBADF.
717            // trace!("FS {} looking up inode for id: {:?} with handle: {:?}", self.uuid, id, handle);
718            match InodeMap::get_alt_locked(&inodes, &id, handle_opt.as_ref()) {
719                Some(data) => {
720                    // An inode was added concurrently while we did not hold a lock on
721                    // `self.inodes_map`, so we use that instead. `handle` will be dropped.
722                    // trace!("FS {} found existing inode: {}", self.uuid, data.inode);
723                    data.refcount.fetch_add(1, Ordering::Relaxed);
724                    data.inode
725                }
726                None => {
727                    let inode = self
728                        .allocate_inode(&inodes, &id, handle_opt.as_ref())
729                        .await?;
730                    // trace!("FS {} allocated new inode: {} for id: {:?}", self.uuid, inode, id);
731
732                    if inode > VFS_MAX_INO {
733                        error!("fuse: max inode number reached: {VFS_MAX_INO}");
734                        return Err(io::Error::other(format!(
735                            "max inode number reached: {VFS_MAX_INO}"
736                        ))
737                        .into());
738                    }
739
740                    InodeMap::insert_locked(
741                        inodes.deref_mut(),
742                        Arc::new(InodeData::new(inode, handle, 1, id, st.st.st_mode)),
743                    );
744
745                    inode
746                }
747            }
748        };
749
750        let (entry_timeout, _) = if is_dir(st.st.st_mode) {
751            (self.dir_entry_timeout, self.dir_attr_timeout)
752        } else {
753            (self.cfg.entry_timeout, self.cfg.attr_timeout)
754        };
755
756        // // Whether to enable file DAX according to the value of dax_file_size
757        // let mut attr_flags: u32 = 0;
758        // if let Some(dax_file_size) = self.cfg.dax_file_size {
759        //     // st.stat.st_size is i64
760        //     if self.perfile_dax.load().await
761        //         && st.st.st_size >= 0x0
762        //         && st.st.st_size as u64 >= dax_file_size
763        //     {
764        //         attr_flags |= FUSE_ATTR_DAX;
765        //     }
766        // }
767        let mut attr_temp = convert_stat64_to_file_attr(st.st);
768        attr_temp.ino = inode;
769        Ok(ReplyEntry {
770            ttl: entry_timeout,
771            attr: attr_temp,
772            generation: 0,
773        })
774    }
775
776    fn forget_one(&self, inodes: &mut InodeStore, inode: Inode, count: u64) {
777        // ROOT_ID should not be forgotten, or we're not able to access to files any more.
778        if inode == ROOT_ID {
779            return;
780        }
781
782        if let Some(data) = inodes.get(&inode) {
783            // Acquiring the write lock on the inode map prevents new lookups from incrementing the
784            // refcount but there is the possibility that a previous lookup already acquired a
785            // reference to the inode data and is in the process of updating the refcount so we need
786            // to loop here until we can decrement successfully.
787            loop {
788                let curr = data.refcount.load(Ordering::Acquire);
789
790                // Saturating sub because it doesn't make sense for a refcount to go below zero and
791                // we don't want misbehaving clients to cause integer overflow.
792                let new = curr.saturating_sub(count);
793
794                // Synchronizes with the acquire load in `do_lookup`.
795                if data
796                    .refcount
797                    .compare_exchange(curr, new, Ordering::AcqRel, Ordering::Acquire)
798                    .is_ok()
799                {
800                    if new == 0 {
801                        // We just removed the last refcount for this inode.
802                        // The allocated inode number should be kept in the map when use_host_ino
803                        // is false or host inode(don't use the virtual 56bit inode) is bigger than MAX_HOST_INO.
804                        let keep_mapping = !self.cfg.use_host_ino || data.id.ino > MAX_HOST_INO;
805                        inodes.remove(&inode, keep_mapping);
806                    }
807                    break;
808                }
809            }
810        }
811    }
812
813    async fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> {
814        self.handle_map.release(handle, inode).await
815    }
816
817    // Validate a path component, same as the one in vfs layer, but only do the validation if this
818    // passthroughfs is used without vfs layer, to avoid double validation.
819    fn validate_path_component(&self, name: &CStr) -> io::Result<()> {
820        // !self.cfg.do_import means we're under vfs, and vfs has already done the validation
821        if !self.cfg.do_import {
822            return Ok(());
823        }
824        validate_path_component(name)
825    }
826
827    //TODO: When seal_size is set, we don't allow operations that could change file size nor allocate
828    // space beyond EOF
829    // fn seal_size_check(
830    //     &self,
831    //     opcode: Opcode,
832    //     file_size: u64,
833    //     offset: u64,
834    //     size: u64,
835    //     mode: i32,
836    // ) -> io::Result<()> {
837    //     if offset.checked_add(size).is_none() {
838    //         error!(
839    //             "fuse: {:?}: invalid `offset` + `size` ({}+{}) overflows u64::MAX",
840    //             opcode, offset, size
841    //         );
842    //         return Err(einval());
843    //     }
844
845    //     match opcode {
846    //         // write should not exceed the file size.
847    //         Opcode::Write => {
848    //             if size + offset > file_size {
849    //                 return Err(eperm());
850    //             }
851    //         }
852
853    //         Opcode::Fallocate => {
854    //             let op = mode & !(libc::FALLOC_FL_KEEP_SIZE | libc::FALLOC_FL_UNSHARE_RANGE);
855    //             match op {
856    //                 // Allocate, punch and zero, must not change file size.
857    //                 0 | libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_ZERO_RANGE => {
858    //                     if size + offset > file_size {
859    //                         return Err(eperm());
860    //                     }
861    //                 }
862    //                 // collapse and insert will change file size, forbid.
863    //                 libc::FALLOC_FL_COLLAPSE_RANGE | libc::FALLOC_FL_INSERT_RANGE => {
864    //                     return Err(eperm());
865    //                 }
866    //                 // Invalid operation
867    //                 _ => return Err(einval()),
868    //             }
869    //         }
870
871    //         // setattr operation should be handled in setattr handler.
872    //         _ => return Err(enosys()),
873    //     }
874
875    //     Ok(())
876    // }
877
878    async fn get_writeback_open_flags(&self, flags: i32) -> i32 {
879        let mut new_flags = flags;
880        let writeback = self.writeback.load(Ordering::Relaxed);
881
882        // When writeback caching is enabled, the kernel may send read requests even if the
883        // userspace program opened the file write-only. So we need to ensure that we have opened
884        // the file for reading as well as writing.
885        if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY {
886            new_flags &= !libc::O_ACCMODE;
887            new_flags |= libc::O_RDWR;
888        }
889
890        // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`.
891        // However, this breaks atomicity as the file may have changed on disk, invalidating the
892        // cached copy of the data in the kernel and the offset that the kernel thinks is the end of
893        // the file. Just allow this for now as it is the user's responsibility to enable writeback
894        // caching only for directories that are not shared. It also means that we need to clear the
895        // `O_APPEND` flag.
896        if writeback && flags & libc::O_APPEND != 0 {
897            new_flags &= !libc::O_APPEND;
898        }
899
900        new_flags
901    }
902}
903
904#[cfg(test)]
905mod tests {
906    use std::ffi::OsString;
907
908    use rfuse3::{MountOptions, raw::Session};
909    use tokio::signal;
910
911    use crate::passthrough::newlogfs::LoggingFileSystem;
912
913    #[tokio::test]
914    async fn test_passthrough() {
915        let fs = super::new_passthroughfs_layer("/home/luxian/github/buck2-rust-third-party")
916            .await
917            .unwrap();
918        let logfs = LoggingFileSystem::new(fs);
919
920        let mount_path = OsString::from("/home/luxian/pass");
921
922        let uid = unsafe { libc::getuid() };
923        let gid = unsafe { libc::getgid() };
924
925        let not_unprivileged = true;
926
927        let mut mount_options = MountOptions::default();
928        // .allow_other(true)
929        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
930
931        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
932            Session::new(mount_options)
933                .mount_with_unprivileged(logfs, mount_path)
934                .await
935                .unwrap()
936        } else {
937            Session::new(mount_options)
938                .mount(logfs, mount_path)
939                .await
940                .unwrap()
941        };
942
943        let handle = &mut mount_handle;
944
945        tokio::select! {
946            res = handle => res.unwrap(),
947            _ = signal::ctrl_c() => {
948                mount_handle.unmount().await.unwrap()
949            }
950        }
951    }
952}