Skip to main content

libfuse_fs/passthrough/
mod.rs

1#![allow(clippy::useless_conversion)]
2use config::{CachePolicy, Config};
3use file_handle::{FileHandle, OpenableFileHandle};
4
5#[cfg(target_os = "macos")]
6use self::statx::statx_timestamp;
7use futures::executor::block_on;
8use inode_store::{InodeId, InodeStore};
9#[cfg(target_os = "linux")]
10use libc::{self, statx_timestamp};
11
12use moka::future::Cache;
13use rfuse3::{Errno, raw::reply::ReplyEntry};
14use uuid::Uuid;
15
16use crate::passthrough::mmap::{MmapCachedValue, MmapChunkKey};
17use crate::util::convert_stat64_to_file_attr;
18use mount_fd::MountFds;
19use statx::StatExt;
20use std::cmp;
21use std::io::Result;
22use std::ops::DerefMut;
23use std::os::unix::ffi::OsStrExt;
24use std::path::Path;
25use tracing::error;
26use tracing::{debug, warn};
27
28use std::sync::atomic::{AtomicBool, AtomicU32};
29use std::{
30    collections::{BTreeMap, btree_map},
31    ffi::{CStr, CString, OsString},
32    fs::File,
33    io::{self, Error},
34    marker::PhantomData,
35    os::{
36        fd::{AsFd, AsRawFd, BorrowedFd, RawFd},
37        unix::ffi::OsStringExt,
38    },
39    path::PathBuf,
40    sync::Arc,
41    sync::atomic::{AtomicU64, Ordering},
42    time::Duration,
43};
44use util::{
45    UniqueInodeGenerator, ebadf, is_dir, openat, reopen_fd_through_proc, stat_fd,
46    validate_path_component,
47};
48
49use vm_memory::bitmap::BitmapSlice;
50
51use nix::sys::resource::{Resource, getrlimit};
52
53pub mod async_io;
54mod config;
55mod file_handle;
56mod inode_store;
57mod mmap;
58mod mount_fd;
59mod os_compat;
60mod statx;
61pub mod util;
62
63/// Current directory
64pub const CURRENT_DIR_CSTR: &[u8] = b".\0";
65/// Parent directory
66pub const PARENT_DIR_CSTR: &[u8] = b"..\0";
67pub const VFS_MAX_INO: u64 = 0xff_ffff_ffff_ffff;
68#[cfg(target_os = "linux")]
69const MOUNT_INFO_FILE: &str = "/proc/self/mountinfo";
70#[cfg(target_os = "macos")]
71const MOUNT_INFO_FILE: &str = "/dev/null";
72pub const EMPTY_CSTR: &[u8] = b"\0";
73#[cfg(target_os = "linux")]
74pub const PROC_SELF_FD_CSTR: &[u8] = b"/proc/self/fd\0";
75#[cfg(target_os = "macos")]
76pub const PROC_SELF_FD_CSTR: &[u8] = b"/dev/fd\0";
77pub const ROOT_ID: u64 = 1;
78use tokio::sync::{Mutex, MutexGuard, RwLock};
79
80#[derive(Debug, Clone)]
81pub struct PassthroughArgs<P, M>
82where
83    P: AsRef<Path>,
84    M: AsRef<str>,
85{
86    pub root_dir: P,
87    pub mapping: Option<M>,
88}
89
90pub async fn new_passthroughfs_layer<P: AsRef<Path>, M: AsRef<str>>(
91    args: PassthroughArgs<P, M>,
92) -> Result<PassthroughFs> {
93    let mut config = Config {
94        root_dir: args.root_dir.as_ref().to_path_buf(),
95        // enable xattr
96        xattr: true,
97        do_import: true,
98        ..Default::default()
99    };
100    if let Some(mapping) = args.mapping {
101        config.mapping = mapping
102            .as_ref()
103            .parse()
104            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
105    }
106
107    let fs = PassthroughFs::<()>::new(config)?;
108
109    #[cfg(target_os = "linux")]
110    if fs.cfg.do_import {
111        fs.import().await?;
112    }
113    #[cfg(target_os = "macos")]
114    {
115        // On macOS, always import for now since we rely on the root node being set up?
116        // Or respect the config.
117        fs.import().await?;
118    }
119
120    Ok(fs)
121}
122
123type Inode = u64;
124type Handle = u64;
125
126/// Maximum host inode number supported by passthroughfs
127const MAX_HOST_INO: u64 = 0x7fff_ffff_ffff;
128
129/**
130 * Represents the file associated with an inode (`InodeData`).
131 *
132 * When obtaining such a file, it may either be a new file (the `Owned` variant), in which case the
133 * object's lifetime is static, or it may reference `InodeData.file` (the `Ref` variant), in which
134 * case the object's lifetime is that of the respective `InodeData` object.
135 */
136#[derive(Debug)]
137enum InodeFile<'a> {
138    Owned(File),
139    Ref(&'a File),
140}
141
142impl AsRawFd for InodeFile<'_> {
143    /// Return a file descriptor for this file
144    /// Note: This fd is only valid as long as the `InodeFile` exists.
145    fn as_raw_fd(&self) -> RawFd {
146        match self {
147            Self::Owned(file) => file.as_raw_fd(),
148            Self::Ref(file_ref) => file_ref.as_raw_fd(),
149        }
150    }
151}
152
153impl AsFd for InodeFile<'_> {
154    fn as_fd(&self) -> BorrowedFd<'_> {
155        match self {
156            Self::Owned(file) => file.as_fd(),
157            Self::Ref(file_ref) => file_ref.as_fd(),
158        }
159    }
160}
161
162#[derive(Debug)]
163#[allow(dead_code)]
164enum InodeHandle {
165    // TODO: Remove this variant once we have a way to handle files that are not
166    File(File),
167    Handle(Arc<OpenableFileHandle>),
168}
169
170impl InodeHandle {
171    fn file_handle(&self) -> Option<&FileHandle> {
172        match self {
173            InodeHandle::File(_) => None,
174            InodeHandle::Handle(h) => Some(h.file_handle()),
175        }
176    }
177
178    fn get_file(&self) -> Result<InodeFile<'_>> {
179        match self {
180            InodeHandle::File(f) => Ok(InodeFile::Ref(f)),
181            InodeHandle::Handle(h) => {
182                #[cfg(target_os = "linux")]
183                let f = h.open(libc::O_PATH)?;
184                #[cfg(target_os = "macos")]
185                let f = h.open(libc::O_RDONLY)?;
186                Ok(InodeFile::Owned(f))
187            }
188        }
189    }
190
191    fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> Result<File> {
192        match self {
193            InodeHandle::File(f) => reopen_fd_through_proc(f, flags, proc_self_fd),
194            InodeHandle::Handle(h) => h.open(flags),
195        }
196    }
197
198    #[cfg(target_os = "linux")]
199    fn stat(&self) -> Result<libc::stat64> {
200        self.do_stat()
201    }
202    #[cfg(target_os = "macos")]
203    fn stat(&self) -> Result<libc::stat> {
204        // On macOS, stat_fd returns libc::stat, which is the correct type.
205        // No explicit cast from stat64 is needed if stat_fd is correctly implemented
206        // to return the platform-specific stat struct.
207        self.do_stat()
208    }
209
210    #[cfg(target_os = "linux")]
211    fn do_stat(&self) -> Result<libc::stat64> {
212        match self {
213            InodeHandle::File(f) => stat_fd(f, None),
214            InodeHandle::Handle(_h) => {
215                let file = self.get_file()?;
216                stat_fd(&file, None)
217            }
218        }
219    }
220
221    #[cfg(target_os = "macos")]
222    fn do_stat(&self) -> Result<libc::stat> {
223        match self {
224            InodeHandle::File(f) => stat_fd(f, None),
225            InodeHandle::Handle(_h) => {
226                let file = self.get_file()?;
227                stat_fd(&file, None)
228            }
229        }
230    }
231}
232
233/// Represents an inode in `PassthroughFs`.
234#[derive(Debug)]
235pub struct InodeData {
236    inode: Inode,
237    // Most of these aren't actually files but ¯\_(ツ)_/¯.
238    handle: InodeHandle,
239    id: InodeId,
240    refcount: AtomicU64,
241    // File type and mode
242    mode: u32,
243    btime: statx_timestamp,
244}
245
246impl InodeData {
247    fn new(
248        inode: Inode,
249        f: InodeHandle,
250        refcount: u64,
251        id: InodeId,
252        mode: u32,
253        btime: statx_timestamp,
254    ) -> Self {
255        InodeData {
256            inode,
257            handle: f,
258            id,
259            refcount: AtomicU64::new(refcount),
260            mode,
261            btime,
262        }
263    }
264
265    fn get_file(&self) -> Result<InodeFile<'_>> {
266        self.handle.get_file()
267    }
268
269    fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> Result<File> {
270        self.handle.open_file(flags, proc_self_fd)
271    }
272}
273
274/// Data structures to manage accessed inodes.
275struct InodeMap {
276    pub inodes: RwLock<InodeStore>,
277}
278
279impl InodeMap {
280    fn new() -> Self {
281        InodeMap {
282            inodes: RwLock::new(Default::default()),
283        }
284    }
285
286    async fn clear(&self) {
287        // Do not expect poisoned lock here, so safe to unwrap().
288        self.inodes.write().await.clear();
289    }
290
291    async fn get(&self, inode: Inode) -> Result<Arc<InodeData>> {
292        // Do not expect poisoned lock here, so safe to unwrap().
293        self.inodes
294            .read()
295            .await
296            .get(&inode)
297            .cloned()
298            .ok_or_else(ebadf)
299    }
300
301    fn get_inode_locked(inodes: &InodeStore, handle: &InodeHandle) -> Option<Inode> {
302        if let Some(h) = handle.file_handle() {
303            inodes.inode_by_handle(h).copied()
304        } else {
305            None
306        }
307    }
308
309    async fn get_alt(&self, id: &InodeId, handle: &InodeHandle) -> Option<Arc<InodeData>> {
310        // Do not expect poisoned lock here, so safe to unwrap().
311        let inodes = self.inodes.read().await;
312
313        Self::get_alt_locked(&inodes, id, handle)
314    }
315
316    fn get_alt_locked(
317        inodes: &InodeStore,
318        id: &InodeId,
319        handle: &InodeHandle,
320    ) -> Option<Arc<InodeData>> {
321        let by_handle = if let Some(h) = handle.file_handle() {
322            inodes.get_by_handle(h)
323        } else {
324            None
325        };
326
327        by_handle
328            .or_else(|| {
329                inodes.get_by_id(id).filter(|data| {
330                    // When we have to fall back to looking up an inode by its IDs, ensure that
331                    // we hit an entry that does not have a file handle.  Entries with file
332                    // handles must also have a handle alt key, so if we have not found it by
333                    // that handle alt key, we must have found an entry with a mismatching
334                    // handle; i.e. an entry for a different file, even though it has the same
335                    // inode ID.
336                    // (This can happen when we look up a new file that has reused the inode ID
337                    // of some previously unlinked inode we still have in `.inodes`.)
338                    data.handle.file_handle().is_none()
339                })
340            })
341            .cloned()
342    }
343
344    async fn insert(&self, data: Arc<InodeData>) {
345        let mut inodes = self.inodes.write().await;
346
347        Self::insert_locked(&mut inodes, data)
348    }
349
350    fn insert_locked(inodes: &mut InodeStore, data: Arc<InodeData>) {
351        inodes.insert(data);
352    }
353}
354
355struct HandleData {
356    inode: Inode,
357    file: File,
358    lock: Mutex<()>,
359    open_flags: AtomicU32,
360}
361
362impl HandleData {
363    fn new(inode: Inode, file: File, flags: u32) -> Self {
364        HandleData {
365            inode,
366            file,
367            lock: Mutex::new(()),
368            open_flags: AtomicU32::new(flags),
369        }
370    }
371
372    fn get_file(&self) -> &File {
373        &self.file
374    }
375
376    async fn get_file_mut(&self) -> (MutexGuard<'_, ()>, &File) {
377        (self.lock.lock().await, &self.file)
378    }
379
380    fn borrow_fd(&self) -> BorrowedFd<'_> {
381        self.file.as_fd()
382    }
383
384    async fn get_flags(&self) -> u32 {
385        self.open_flags.load(Ordering::Relaxed)
386    }
387
388    async fn set_flags(&self, flags: u32) {
389        self.open_flags.store(flags, Ordering::Relaxed);
390    }
391}
392
393struct HandleMap {
394    handles: RwLock<BTreeMap<Handle, Arc<HandleData>>>,
395}
396
397impl HandleMap {
398    fn new() -> Self {
399        HandleMap {
400            handles: RwLock::new(BTreeMap::new()),
401        }
402    }
403
404    async fn clear(&self) {
405        // Do not expect poisoned lock here, so safe to unwrap().
406        self.handles.write().await.clear();
407    }
408
409    async fn insert(&self, handle: Handle, data: HandleData) {
410        // Do not expect poisoned lock here, so safe to unwrap().
411        self.handles.write().await.insert(handle, Arc::new(data));
412    }
413
414    async fn release(&self, handle: Handle, inode: Inode) -> Result<()> {
415        // Do not expect poisoned lock here, so safe to unwrap().
416        let mut handles = self.handles.write().await;
417
418        if let btree_map::Entry::Occupied(e) = handles.entry(handle)
419            && e.get().inode == inode
420        {
421            // We don't need to close the file here because that will happen automatically when
422            // the last `Arc` is dropped.
423            e.remove();
424
425            return Ok(());
426        }
427
428        Err(ebadf())
429    }
430
431    async fn get(&self, handle: Handle, inode: Inode) -> Result<Arc<HandleData>> {
432        // Do not expect poisoned lock here, so safe to unwrap().
433        self.handles
434            .read()
435            .await
436            .get(&handle)
437            .filter(|hd| hd.inode == inode)
438            .cloned()
439            .ok_or_else(ebadf)
440    }
441}
442
443#[derive(Debug, Hash, Eq, PartialEq)]
444struct FileUniqueKey(u64, statx_timestamp);
445
446/// A file system that simply "passes through" all requests it receives to the underlying file
447/// system.
448///
449/// To keep the implementation simple it servers the contents of its root directory. Users
450/// that wish to serve only a specific directory should set up the environment so that that
451/// directory ends up as the root of the file system process. One way to accomplish this is via a
452/// combination of mount namespaces and the pivot_root system call.
453pub struct PassthroughFs<S: BitmapSlice + Send + Sync = ()> {
454    // File descriptors for various points in the file system tree. These fds are always opened with
455    // the `O_PATH` option so they cannot be used for reading or writing any data. See the
456    // documentation of the `O_PATH` flag in `open(2)` for more details on what one can and cannot
457    // do with an fd opened with this flag.
458    inode_map: InodeMap,
459    next_inode: AtomicU64,
460
461    // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be
462    // used for reading and writing data.
463    handle_map: HandleMap,
464    next_handle: AtomicU64,
465
466    // Use to generate unique inode
467    ino_allocator: UniqueInodeGenerator,
468    // Maps mount IDs to an open FD on the respective ID for the purpose of open_by_handle_at().
469    mount_fds: MountFds,
470
471    // File descriptor pointing to the `/proc/self/fd` directory. This is used to convert an fd from
472    // `inodes` into one that can go into `handles`. This is accomplished by reading the
473    // `/proc/self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are meant
474    // to be serving doesn't have access to `/proc/self/fd`.
475    proc_self_fd: File,
476
477    // Whether writeback caching is enabled for this directory. This will only be true when
478    // `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`.
479    writeback: AtomicBool,
480
481    // Whether no_open is enabled.
482    no_open: AtomicBool,
483
484    // Whether no_opendir is enabled.
485    no_opendir: AtomicBool,
486
487    // Whether kill_priv_v2 is enabled.
488    //killpriv_v2: AtomicBool,
489
490    // Whether no_readdir is enabled.
491    no_readdir: AtomicBool,
492
493    // Whether seal_size is enabled.
494    seal_size: AtomicBool,
495
496    // Whether per-file DAX feature is enabled.
497    // Init from guest kernel Init cmd of fuse fs.
498    //perfile_dax: AtomicBool,
499    dir_entry_timeout: Duration,
500    dir_attr_timeout: Duration,
501
502    cfg: Config,
503
504    _uuid: Uuid,
505
506    phantom: PhantomData<S>,
507
508    handle_cache: Cache<FileUniqueKey, Arc<FileHandle>>,
509
510    mmap_chunks: Cache<MmapChunkKey, Arc<RwLock<mmap::MmapCachedValue>>>,
511}
512
513impl<S: BitmapSlice + Send + Sync> PassthroughFs<S> {
514    /// Create a Passthrough file system instance.
515    pub fn new(mut cfg: Config) -> Result<PassthroughFs<S>> {
516        if cfg.no_open && cfg.cache_policy != CachePolicy::Always {
517            warn!("passthroughfs: no_open only work with cache=always, reset to open mode");
518            cfg.no_open = false;
519        }
520        if cfg.writeback && cfg.cache_policy == CachePolicy::Never {
521            warn!(
522                "passthroughfs: writeback cache conflicts with cache=none, reset to no_writeback"
523            );
524            cfg.writeback = false;
525        }
526
527        // Safe because this is a constant value and a valid C string.
528        let proc_self_fd_cstr = unsafe { CStr::from_bytes_with_nul_unchecked(PROC_SELF_FD_CSTR) };
529
530        #[cfg(target_os = "linux")]
531        let flags = libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC;
532        #[cfg(target_os = "macos")]
533        let flags = libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_CLOEXEC;
534
535        let proc_self_fd = Self::open_file(&libc::AT_FDCWD, proc_self_fd_cstr, flags, 0)?;
536
537        let (dir_entry_timeout, dir_attr_timeout) =
538            match (cfg.dir_entry_timeout, cfg.dir_attr_timeout) {
539                (Some(e), Some(a)) => (e, a),
540                (Some(e), None) => (e, cfg.attr_timeout),
541                (None, Some(a)) => (cfg.entry_timeout, a),
542                (None, None) => (cfg.entry_timeout, cfg.attr_timeout),
543            };
544
545        let mount_fds = MountFds::new(None)?;
546
547        let fd_limit = match getrlimit(Resource::RLIMIT_NOFILE) {
548            Ok((soft, _)) => soft,
549            Err(_) => 65536,
550        };
551
552        let max_mmap_size = if cfg.use_mmap { cfg.max_mmap_size } else { 0 };
553
554        let mmap_cache_builder = Cache::builder()
555            .max_capacity(max_mmap_size)
556            .weigher(
557                |_key: &MmapChunkKey, value: &Arc<RwLock<mmap::MmapCachedValue>>| -> u32 {
558                    let guard = block_on(value.read());
559                    match &*guard {
560                        MmapCachedValue::Mmap(mmap) => mmap.len() as u32,
561                        MmapCachedValue::MmapMut(mmap_mut) => mmap_mut.len() as u32,
562                    }
563                },
564            )
565            .time_to_idle(Duration::from_millis(60));
566
567        Ok(PassthroughFs {
568            inode_map: InodeMap::new(),
569            next_inode: AtomicU64::new(ROOT_ID + 1),
570            ino_allocator: UniqueInodeGenerator::new(),
571
572            handle_map: HandleMap::new(),
573            next_handle: AtomicU64::new(1),
574
575            mount_fds,
576            proc_self_fd,
577
578            writeback: AtomicBool::new(false),
579            no_open: AtomicBool::new(false),
580            no_opendir: AtomicBool::new(false),
581            //killpriv_v2: AtomicBool::new(false),
582            no_readdir: AtomicBool::new(cfg.no_readdir),
583            seal_size: AtomicBool::new(cfg.seal_size),
584            //perfile_dax: AtomicBool::new(false),
585            dir_entry_timeout,
586            dir_attr_timeout,
587            cfg,
588
589            _uuid: Uuid::new_v4(),
590
591            phantom: PhantomData,
592
593            handle_cache: moka::future::Cache::new(fd_limit),
594
595            mmap_chunks: mmap_cache_builder.build(),
596        })
597    }
598
599    /// Initialize the Passthrough file system.
600    pub async fn import(&self) -> Result<()> {
601        let root =
602            CString::new(self.cfg.root_dir.as_os_str().as_bytes()).expect("Invalid root_dir");
603
604        let (handle, st) = Self::open_file_and_handle(self, &libc::AT_FDCWD, &root)
605            .await
606            .map_err(|e| {
607                error!("fuse: import: failed to get file or handle: {e:?}");
608
609                e
610            })?;
611
612        let id = InodeId::from_stat(&st);
613
614        // Safe because this doesn't modify any memory and there is no need to check the return
615        // value because this system call always succeeds. We need to clear the umask here because
616        // we want the client to be able to set all the bits in the mode.
617        unsafe { libc::umask(0o000) };
618
619        // Not sure why the root inode gets a refcount of 2 but that's what libfuse does.
620        self.inode_map
621            .insert(Arc::new(InodeData::new(
622                ROOT_ID,
623                handle,
624                2,
625                id,
626                st.st.st_mode.into(),
627                st.btime
628                    .ok_or_else(|| io::Error::other("birth time not available"))?,
629            )))
630            .await;
631
632        Ok(())
633    }
634
635    /// Get the list of file descriptors which should be reserved across live upgrade.
636    pub fn keep_fds(&self) -> Vec<RawFd> {
637        vec![self.proc_self_fd.as_raw_fd()]
638    }
639
640    fn readlinkat(dfd: i32, pathname: &CStr) -> Result<PathBuf> {
641        let mut buf = Vec::with_capacity(libc::PATH_MAX as usize);
642
643        // Safe because the kernel will only write data to buf and we check the return value
644        let buf_read = unsafe {
645            libc::readlinkat(
646                dfd,
647                pathname.as_ptr(),
648                buf.as_mut_ptr() as *mut libc::c_char,
649                buf.capacity(),
650            )
651        };
652        if buf_read < 0 {
653            error!("fuse: readlinkat error");
654            return Err(Error::last_os_error());
655        }
656
657        // Safe because we trust the value returned by kernel.
658        unsafe { buf.set_len(buf_read as usize) };
659        buf.shrink_to_fit();
660
661        // Be careful:
662        // - readlink() does not append a terminating null byte to buf
663        // - OsString instances are not NUL terminated
664        Ok(PathBuf::from(OsString::from_vec(buf)))
665    }
666
667    /// Get the file pathname corresponding to the Inode
668    /// This function is used by Nydus blobfs
669    pub async fn readlinkat_proc_file(&self, inode: Inode) -> Result<PathBuf> {
670        let data = self.inode_map.get(inode).await?;
671        let file = data.get_file()?;
672        let pathname = CString::new(format!("{}", file.as_raw_fd()))
673            .map_err(|e| Error::new(io::ErrorKind::InvalidData, e))?;
674
675        Self::readlinkat(self.proc_self_fd.as_raw_fd(), &pathname)
676    }
677
678    fn create_file_excl(
679        dir: &impl AsRawFd,
680        pathname: &CStr,
681        flags: i32,
682        mode: u32,
683    ) -> io::Result<Option<File>> {
684        match openat(dir, pathname, flags | libc::O_CREAT | libc::O_EXCL, mode) {
685            Ok(file) => Ok(Some(file)),
686            Err(err) => {
687                // Ignore the error if the file exists and O_EXCL is not present in `flags`.
688                if err.kind() == io::ErrorKind::AlreadyExists {
689                    if (flags & libc::O_EXCL) != 0 {
690                        return Err(err);
691                    }
692                    return Ok(None);
693                }
694                Err(err)
695            }
696        }
697    }
698
699    fn open_file(dfd: &impl AsRawFd, pathname: &CStr, flags: i32, mode: u32) -> io::Result<File> {
700        openat(dfd, pathname, flags, mode)
701    }
702
703    fn open_file_restricted(
704        &self,
705        dir: &impl AsRawFd,
706        pathname: &CStr,
707        flags: i32,
708        mode: u32,
709    ) -> io::Result<File> {
710        let flags = libc::O_NOFOLLOW | libc::O_CLOEXEC | flags;
711
712        // TODO
713        //if self.os_facts.has_openat2 {
714        //    oslib::do_open_relative_to(dir, pathname, flags, mode)
715        //} else {
716        openat(dir, pathname, flags, mode)
717        //}
718    }
719
720    /// Create a File or File Handle for `name` under directory `dir_fd` to support `lookup()`.
721    async fn open_file_and_handle(
722        &self,
723        dir: &impl AsRawFd,
724        name: &CStr,
725    ) -> io::Result<(InodeHandle, StatExt)> {
726        #[cfg(target_os = "linux")]
727        let path_file = self.open_file_restricted(dir, name, libc::O_PATH, 0)?;
728        #[cfg(target_os = "macos")]
729        let path_file = self.open_file_restricted(dir, name, libc::O_RDONLY, 0)?;
730        let st = statx::statx(&path_file, None)?;
731
732        let btime_is_valid = match st.btime {
733            Some(ts) => ts.tv_sec != 0 || ts.tv_nsec != 0,
734            None => false,
735        };
736
737        if btime_is_valid {
738            let key = FileUniqueKey(st.st.st_ino, st.btime.unwrap());
739            let cache = self.handle_cache.clone();
740            if let Some(h) = cache.get(&key).await {
741                // If found in cache, it's an Arc<FileHandle>. Convert to InodeHandle::Handle
742                let openable = self.to_openable_handle(h)?;
743                Ok((InodeHandle::Handle(openable), st))
744            } else if let Some(handle_from_fd) = FileHandle::from_fd(&path_file)? {
745                let handle_arc = Arc::new(handle_from_fd);
746                cache.insert(key, Arc::clone(&handle_arc)).await;
747                let openable = self.to_openable_handle(handle_arc)?;
748                Ok((InodeHandle::Handle(openable), st))
749            } else {
750                // Fallback for macOS if btime is valid but no handle
751                Ok((InodeHandle::File(path_file), st))
752            }
753        } else {
754            // If not valid btime
755            if let Some(handle_from_fd) = FileHandle::from_fd(&path_file)? {
756                let handle_arc = Arc::new(handle_from_fd);
757                let openable = self.to_openable_handle(handle_arc)?;
758                Ok((InodeHandle::Handle(openable), st))
759            } else {
760                // Fallback
761                Ok((InodeHandle::File(path_file), st))
762            }
763        }
764    }
765
766    fn to_openable_handle(&self, fh: Arc<FileHandle>) -> io::Result<Arc<OpenableFileHandle>> {
767        (*Arc::as_ref(&fh))
768            .clone()
769            .into_openable(&self.mount_fds, |fd, flags, _mode| {
770                reopen_fd_through_proc(&fd, flags, &self.proc_self_fd)
771            })
772            .map(Arc::new)
773            .map_err(|e| {
774                if !e.silent() {
775                    error!("{e}");
776                }
777                e.into_inner()
778            })
779    }
780
781    async fn allocate_inode(
782        &self,
783        inodes: &InodeStore,
784        id: &InodeId,
785        handle: &InodeHandle,
786    ) -> io::Result<Inode> {
787        if !self.cfg.use_host_ino {
788            // If the inode has already been assigned before, the new inode is not reassigned,
789            // ensuring that the same file is always the same inode
790            match InodeMap::get_inode_locked(inodes, handle) {
791                Some(a) => Ok(a),
792                None => Ok(self.next_inode.fetch_add(1, Ordering::Relaxed)),
793            }
794        } else {
795            let inode = if id.ino > MAX_HOST_INO {
796                // Prefer looking for previous mappings from memory
797                match InodeMap::get_inode_locked(inodes, handle) {
798                    Some(ino) => ino,
799                    None => self.ino_allocator.get_unique_inode(id)?,
800                }
801            } else {
802                self.ino_allocator.get_unique_inode(id)?
803            };
804            // trace!("fuse: allocate inode: {} for id: {:?}", inode, id);
805            Ok(inode)
806        }
807    }
808
809    async fn do_lookup(
810        &self,
811        parent: Inode,
812        name: &CStr,
813    ) -> std::result::Result<ReplyEntry, Errno> {
814        let name = if parent == ROOT_ID && name.to_bytes_with_nul().starts_with(PARENT_DIR_CSTR) {
815            // Safe as this is a constant value and a valid C string.
816            CStr::from_bytes_with_nul(CURRENT_DIR_CSTR).unwrap()
817        } else {
818            name
819        };
820
821        let dir = self.inode_map.get(parent).await?;
822        let dir_file = dir.get_file()?;
823        let (inode_handle, st) = self.open_file_and_handle(&dir_file, name).await?;
824        let id = InodeId::from_stat(&st);
825        debug!(
826            "do_lookup: parent: {}, name: {}, handle: {:?}, id: {:?}",
827            parent,
828            name.to_string_lossy(),
829            inode_handle,
830            id
831        );
832
833        let mut found = None;
834        'search: loop {
835            match self.inode_map.get_alt(&id, &inode_handle).await {
836                // No existing entry found
837                None => break 'search,
838                Some(data) => {
839                    let curr = data.refcount.load(Ordering::Acquire);
840                    // forgot_one() has just destroyed the entry, retry...
841                    if curr == 0 {
842                        continue 'search;
843                    }
844
845                    // Saturating add to avoid integer overflow, it's not realistic to saturate u64.
846                    let new = curr.saturating_add(1);
847
848                    // Synchronizes with the forgot_one()
849                    if data
850                        .refcount
851                        .compare_exchange(curr, new, Ordering::AcqRel, Ordering::Acquire)
852                        .is_ok()
853                    {
854                        found = Some(data.inode);
855                        break;
856                    }
857                }
858            }
859        }
860
861        let inode = if let Some(v) = found {
862            v
863        } else {
864            // Write guard get_alt_locked() and insert_lock() to avoid race conditions.
865            let mut inodes = self.inode_map.inodes.write().await;
866
867            // Lookup inode_map again after acquiring the inode_map lock, as there might be another
868            // racing thread already added an inode with the same id while we're not holding
869            // the lock. If so just use the newly added inode, otherwise the inode will be replaced
870            // and results in EBADF.
871            // trace!("FS {} looking up inode for id: {:?} with handle: {:?}", self.uuid, id, handle);
872            match InodeMap::get_alt_locked(&inodes, &id, &inode_handle) {
873                Some(data) => {
874                    // An inode was added concurrently while we did not hold a lock on
875                    // `self.inodes_map`, so we use that instead. `handle` will be dropped.
876                    // trace!("FS {} found existing inode: {}", self.uuid, data.inode);
877                    data.refcount.fetch_add(1, Ordering::Relaxed);
878                    data.inode
879                }
880                None => {
881                    let inode = self.allocate_inode(&inodes, &id, &inode_handle).await?;
882                    // trace!("FS {} allocated new inode: {} for id: {:?}", self.uuid, inode, id);
883
884                    if inode > VFS_MAX_INO {
885                        error!("fuse: max inode number reached: {VFS_MAX_INO}");
886                        return Err(io::Error::other(format!(
887                            "max inode number reached: {VFS_MAX_INO}"
888                        ))
889                        .into());
890                    }
891
892                    InodeMap::insert_locked(
893                        inodes.deref_mut(),
894                        Arc::new(InodeData::new(
895                            inode,
896                            inode_handle,
897                            1,
898                            id,
899                            st.st.st_mode.into(),
900                            st.btime
901                                .ok_or_else(|| io::Error::other("birth time not available"))?,
902                        )),
903                    );
904
905                    inode
906                }
907            }
908        };
909
910        let (entry_timeout, _) = if is_dir(st.st.st_mode.into()) {
911            (self.dir_entry_timeout, self.dir_attr_timeout)
912        } else {
913            (self.cfg.entry_timeout, self.cfg.attr_timeout)
914        };
915
916        // // Whether to enable file DAX according to the value of dax_file_size
917        // let mut attr_flags: u32 = 0;
918        // if let Some(dax_file_size) = self.cfg.dax_file_size {
919        //     // st.stat.st_size is i64
920        //     if self.perfile_dax.load().await
921        //         && st.st.st_size >= 0x0
922        //         && st.st.st_size as u64 >= dax_file_size
923        //     {
924        //         attr_flags |= FUSE_ATTR_DAX;
925        //     }
926        // }
927        let mut attr_temp = convert_stat64_to_file_attr(st.st);
928        attr_temp.ino = inode;
929        attr_temp.uid = self.cfg.mapping.find_mapping(attr_temp.uid, true, true);
930        attr_temp.gid = self.cfg.mapping.find_mapping(attr_temp.gid, true, false);
931        Ok(ReplyEntry {
932            ttl: entry_timeout,
933            attr: attr_temp,
934            generation: 0,
935        })
936    }
937
938    async fn forget_one(&self, inodes: &mut InodeStore, inode: Inode, count: u64) {
939        // ROOT_ID should not be forgotten, or we're not able to access to files any more.
940        if inode == ROOT_ID {
941            return;
942        }
943
944        if let Some(data) = inodes.get(&inode) {
945            // Acquiring the write lock on the inode map prevents new lookups from incrementing the
946            // refcount but there is the possibility that a previous lookup already acquired a
947            // reference to the inode data and is in the process of updating the refcount so we need
948            // to loop here until we can decrement successfully.
949            loop {
950                let curr = data.refcount.load(Ordering::Acquire);
951
952                // Saturating sub because it doesn't make sense for a refcount to go below zero and
953                // we don't want misbehaving clients to cause integer overflow.
954                let new = curr.saturating_sub(count);
955
956                // Synchronizes with the acquire load in `do_lookup`.
957                if data
958                    .refcount
959                    .compare_exchange(curr, new, Ordering::AcqRel, Ordering::Acquire)
960                    .is_ok()
961                {
962                    if new == 0 {
963                        if data.handle.file_handle().is_some()
964                            && (data.btime.tv_sec != 0 || data.btime.tv_nsec != 0)
965                        {
966                            let key = FileUniqueKey(data.id.ino, data.btime);
967                            let cache = self.handle_cache.clone();
968                            cache.invalidate(&key).await;
969                        }
970                        // We just removed the last refcount for this inode.
971                        // The allocated inode number should be kept in the map when use_host_ino
972                        // is false or host inode(don't use the virtual 56bit inode) is bigger than MAX_HOST_INO.
973                        let keep_mapping = !self.cfg.use_host_ino || data.id.ino > MAX_HOST_INO;
974                        inodes.remove(&inode, keep_mapping);
975                    }
976                    break;
977                }
978            }
979        }
980    }
981
982    async fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> {
983        self.handle_map.release(handle, inode).await
984    }
985
986    // Validate a path component, same as the one in vfs layer, but only do the validation if this
987    // passthroughfs is used without vfs layer, to avoid double validation.
988    fn validate_path_component(&self, name: &CStr) -> io::Result<()> {
989        // !self.cfg.do_import means we're under vfs, and vfs has already done the validation
990        if !self.cfg.do_import {
991            return Ok(());
992        }
993        validate_path_component(name)
994    }
995
996    //TODO: When seal_size is set, we don't allow operations that could change file size nor allocate
997    // space beyond EOF
998    // fn seal_size_check(
999    //     &self,
1000    //     opcode: Opcode,
1001    //     file_size: u64,
1002    //     offset: u64,
1003    //     size: u64,
1004    //     mode: i32,
1005    // ) -> io::Result<()> {
1006    //     if offset.checked_add(size).is_none() {
1007    //         error!(
1008    //             "fuse: {:?}: invalid `offset` + `size` ({}+{}) overflows u64::MAX",
1009    //             opcode, offset, size
1010    //         );
1011    //         return Err(einval());
1012    //     }
1013
1014    //     match opcode {
1015    //         // write should not exceed the file size.
1016    //         Opcode::Write => {
1017    //             if size + offset > file_size {
1018    //                 return Err(eperm());
1019    //             }
1020    //         }
1021
1022    //         Opcode::Fallocate => {
1023    //             let op = mode & !(libc::FALLOC_FL_KEEP_SIZE | libc::FALLOC_FL_UNSHARE_RANGE);
1024    //             match op {
1025    //                 // Allocate, punch and zero, must not change file size.
1026    //                 0 | libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_ZERO_RANGE => {
1027    //                     if size + offset > file_size {
1028    //                         return Err(eperm());
1029    //                     }
1030    //                 }
1031    //                 // collapse and insert will change file size, forbid.
1032    //                 libc::FALLOC_FL_COLLAPSE_RANGE | libc::FALLOC_FL_INSERT_RANGE => {
1033    //                     return Err(eperm());
1034    //                 }
1035    //                 // Invalid operation
1036    //                 _ => return Err(einval()),
1037    //             }
1038    //         }
1039
1040    //         // setattr operation should be handled in setattr handler.
1041    //         _ => return Err(enosys()),
1042    //     }
1043
1044    //     Ok(())
1045    // }
1046
1047    async fn get_writeback_open_flags(&self, flags: i32) -> i32 {
1048        let mut new_flags = flags;
1049        let writeback = self.writeback.load(Ordering::Relaxed);
1050
1051        // When writeback caching is enabled, the kernel may send read requests even if the
1052        // userspace program opened the file write-only. So we need to ensure that we have opened
1053        // the file for reading as well as writing.
1054        if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY {
1055            new_flags &= !libc::O_ACCMODE;
1056            new_flags |= libc::O_RDWR;
1057        }
1058
1059        // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`.
1060        // However, this breaks atomicity as the file may have changed on disk, invalidating the
1061        // cached copy of the data in the kernel and the offset that the kernel thinks is the end of
1062        // the file. Just allow this for now as it is the user's responsibility to enable writeback
1063        // caching only for directories that are not shared. It also means that we need to clear the
1064        // `O_APPEND` flag.
1065        if writeback && flags & libc::O_APPEND != 0 {
1066            new_flags &= !libc::O_APPEND;
1067        }
1068
1069        new_flags
1070    }
1071
1072    async fn get_mmap(
1073        &self,
1074        inode: Inode,
1075        offset: u64,
1076        file: &File,
1077    ) -> Option<(Arc<RwLock<mmap::MmapCachedValue>>, u64)> {
1078        let file_size = file.metadata().unwrap().len();
1079        let key = MmapChunkKey::new(inode, offset, file_size);
1080        let aligned_offset = key.aligned_offset;
1081
1082        if let Some(cached) = self.mmap_chunks.get(&key).await {
1083            let guard = cached.read().await;
1084            let cache_len = match &*guard {
1085                MmapCachedValue::Mmap(mmap) => mmap.len() as u64,
1086                MmapCachedValue::MmapMut(mmap_mut) => mmap_mut.len() as u64,
1087            };
1088            if offset < key.aligned_offset + cache_len {
1089                return Some((cached.clone(), key.aligned_offset));
1090            }
1091        }
1092
1093        let mmap = match mmap::create_mmap(offset, file).await {
1094            Ok(v) => v,
1095            Err(e) => {
1096                error!("Failed to create mmap:{e}");
1097                return None;
1098            }
1099        };
1100        self.mmap_chunks.insert(key, mmap.clone()).await;
1101        Some((mmap, aligned_offset))
1102    }
1103
1104    async fn read_from_mmap(
1105        &self,
1106        inode: Inode,
1107        offset: u64,
1108        size: u64,
1109        file: &File,
1110        buf: &mut [u8],
1111    ) -> Result<usize> {
1112        // check the buf size
1113        if buf.len() < size as usize {
1114            return Err(std::io::Error::new(
1115                std::io::ErrorKind::InvalidInput,
1116                format!("Buffer too small: {} < {}", buf.len(), size),
1117            ));
1118        }
1119
1120        let file_size = file.metadata()?.len();
1121
1122        // check the offset
1123        if offset >= file_size {
1124            return Ok(0); // offset exceeds file size, return 0 bytes read
1125        }
1126
1127        // compute the maximum readable length
1128        let max_readable = file_size - offset;
1129        let actual_size = cmp::min(size, max_readable) as usize;
1130
1131        let mut len = actual_size;
1132        let mut current_offset = offset;
1133        let mut buf_offset = 0;
1134
1135        while len > 0 {
1136            let (chunk, chunk_start_offset) = match self.get_mmap(inode, current_offset, file).await
1137            {
1138                Some((chunk, aligned_offset)) => (chunk, aligned_offset),
1139                None => {
1140                    return Err(std::io::Error::other("Failed to get mmap chunk"));
1141                }
1142            };
1143
1144            let chunk_guard = chunk.read().await;
1145            match &*chunk_guard {
1146                MmapCachedValue::Mmap(mmap) => {
1147                    let chunk_len = mmap.len();
1148
1149                    // compute the start offset within the chunk using cached alignment
1150                    let copy_start = (current_offset - chunk_start_offset) as usize;
1151
1152                    // ensure we don't read beyond the chunk boundary
1153                    let remaining_in_chunk = chunk_len - copy_start;
1154                    let copy_len = cmp::min(len, remaining_in_chunk);
1155
1156                    // ensure we don't read beyond the buffer boundary
1157                    let copy_len = cmp::min(copy_len, buf.len() - buf_offset);
1158
1159                    if copy_len == 0 {
1160                        break; // no more data to read
1161                    }
1162
1163                    // execute data copy
1164                    buf[buf_offset..buf_offset + copy_len]
1165                        .copy_from_slice(&mmap[copy_start..copy_start + copy_len]);
1166
1167                    buf_offset += copy_len;
1168                    len -= copy_len;
1169                    current_offset += copy_len as u64;
1170                }
1171                MmapCachedValue::MmapMut(mmap_mut) => {
1172                    let chunk_len = mmap_mut.len();
1173
1174                    // compute the start offset within the chunk using cached alignment
1175                    let copy_start = (current_offset - chunk_start_offset) as usize;
1176
1177                    // ensure we don't read beyond the chunk boundary
1178                    let remaining_in_chunk = chunk_len - copy_start;
1179                    let copy_len = cmp::min(len, remaining_in_chunk);
1180
1181                    // ensure we don't read beyond the buffer boundary
1182                    let copy_len = cmp::min(copy_len, buf.len() - buf_offset);
1183
1184                    if copy_len == 0 {
1185                        break; // no more data to read
1186                    }
1187
1188                    // execute data copy
1189                    buf[buf_offset..buf_offset + copy_len]
1190                        .copy_from_slice(&mmap_mut[copy_start..copy_start + copy_len]);
1191
1192                    buf_offset += copy_len;
1193                    len -= copy_len;
1194                    current_offset += copy_len as u64;
1195                }
1196            }
1197        }
1198        Ok(buf_offset)
1199    }
1200
1201    async fn write_to_mmap(
1202        &self,
1203        inode: Inode,
1204        offset: u64,
1205        data: &[u8],
1206        file: &File,
1207    ) -> Result<usize> {
1208        let file_size = file.metadata()?.len();
1209        let len = data.len();
1210
1211        // If the file needs to be extended, do so
1212        if offset + len as u64 > file_size {
1213            let raw_fd = file.as_raw_fd();
1214            let res = unsafe { libc::ftruncate(raw_fd, (offset + len as u64) as i64) };
1215
1216            if res < 0 {
1217                return Err(std::io::Error::other("error to ftruncate"));
1218            }
1219
1220            self.invalidate_mmap_cache(inode, file_size).await;
1221        }
1222
1223        let mut remaining = len;
1224        let mut current_offset = offset;
1225        let mut data_offset = 0;
1226
1227        while remaining > 0 {
1228            let (chunk, chunk_start_offset) = match self.get_mmap(inode, current_offset, file).await
1229            {
1230                Some((chunk, aligned_offset)) => (chunk, aligned_offset),
1231                None => {
1232                    return Err(std::io::Error::other("Failed to get mmap chunk"));
1233                }
1234            };
1235
1236            let mut chunk_guard = chunk.write().await;
1237            match &mut *chunk_guard {
1238                MmapCachedValue::Mmap(_) => {
1239                    return Err(std::io::Error::new(
1240                        std::io::ErrorKind::PermissionDenied,
1241                        "Cannot write to read-only mmap",
1242                    ));
1243                }
1244                MmapCachedValue::MmapMut(mmap_mut) => {
1245                    let chunk_len = mmap_mut.len();
1246
1247                    // Calculate the start position of the current chunk using cached alignment
1248                    let copy_start = (current_offset - chunk_start_offset) as usize;
1249
1250                    // Ensure we don't write beyond the chunk boundary
1251                    let remaining_in_chunk = chunk_len - copy_start;
1252                    let copy_len = cmp::min(remaining, remaining_in_chunk);
1253
1254                    // Ensure we don't write beyond the data boundary
1255                    let copy_len = cmp::min(copy_len, data.len() - data_offset);
1256
1257                    if copy_len == 0 {
1258                        break; // No more data to write
1259                    }
1260
1261                    // Perform data copy
1262                    mmap_mut[copy_start..copy_start + copy_len]
1263                        .copy_from_slice(&data[data_offset..data_offset + copy_len]);
1264
1265                    data_offset += copy_len;
1266                    remaining -= copy_len;
1267                    current_offset += copy_len as u64;
1268                    mmap_mut.flush_async_range(copy_start, copy_len)?;
1269                }
1270            }
1271        }
1272        Ok(data_offset)
1273    }
1274
1275    async fn invalidate_mmap_cache(&self, inode: Inode, old_size: u64) {
1276        let keys_to_remove: Vec<_> = self
1277            .mmap_chunks
1278            .iter()
1279            .filter(|item| {
1280                let key = item.0.clone();
1281                key.inode == inode && key.aligned_offset + mmap::MAX_WINDOW_SIZE as u64 >= old_size
1282            })
1283            .collect();
1284
1285        for item in keys_to_remove {
1286            self.mmap_chunks.invalidate(item.0.as_ref()).await;
1287        }
1288    }
1289}
1290
1291#[cfg(test)]
1292#[allow(unused_imports)]
1293#[allow(clippy::useless_conversion)]
1294mod tests {
1295    use crate::{
1296        passthrough::{PassthroughArgs, PassthroughFs, ROOT_ID, new_passthroughfs_layer},
1297        unwrap_or_skip_eperm,
1298    };
1299    use std::ffi::{CStr, OsStr, OsString};
1300
1301    use nix::unistd::{Gid, Uid, getgid, getuid};
1302    use rfuse3::{
1303        MountOptions,
1304        raw::{Filesystem, Request, Session},
1305    };
1306
1307    macro_rules! pass {
1308        () => {
1309            ()
1310        };
1311        ($($tt:tt)*) => {
1312            ()
1313        };
1314    }
1315
1316    /// This test attempts to mount a passthrough filesystem. In many CI/unprivileged
1317    /// environments operations like `allow_other` or FUSE mounting may return
1318    /// EPERM/EACCES. Instead of failing the whole test suite, we skip the test
1319    /// gracefully in that case so logic tests in other modules still run.
1320    #[tokio::test]
1321    async fn test_passthrough() {
1322        let temp_dir = std::env::temp_dir().join("libfuse_passthrough_test");
1323        let source_dir = temp_dir.join("src");
1324        let mount_dir = temp_dir.join("mnt");
1325        let _ = std::fs::create_dir_all(&source_dir);
1326        let _ = std::fs::create_dir_all(&mount_dir);
1327
1328        let args = PassthroughArgs {
1329            root_dir: source_dir.clone(),
1330            mapping: None::<&str>,
1331        };
1332        let fs = match super::new_passthroughfs_layer(args).await {
1333            Ok(fs) => fs,
1334            Err(e) => {
1335                eprintln!("skip test_passthrough: init failed: {e:?}");
1336                return;
1337            }
1338        };
1339
1340        let uid = unsafe { libc::getuid() };
1341        let gid = unsafe { libc::getgid() };
1342
1343        let mut mount_options = MountOptions::default();
1344        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
1345        // Intentionally DO NOT call allow_other here to avoid requiring /etc/fuse.conf config.
1346
1347        let mount_path = OsString::from(mount_dir.to_str().unwrap());
1348
1349        let session = Session::new(mount_options);
1350        let mount_handle =
1351            unwrap_or_skip_eperm!(session.mount(fs, mount_path).await, "mount passthrough fs");
1352
1353        // Immediately unmount to verify we at least mounted successfully.
1354        let _ = mount_handle.unmount().await; // errors ignored
1355
1356        // Cleanup
1357        let _ = std::fs::remove_dir_all(&temp_dir);
1358    }
1359
1360    // // Test for uid/gid mapping
1361    // async fn setup(
1362    //     mapping: Option<&str>,
1363    // ) -> (PassthroughFs, tempfile::TempDir, Uid, Gid, Uid, Gid) {
1364    //     let tmp_dir = tempfile::tempdir().unwrap();
1365    //     let src_dir = tmp_dir.path();
1366
1367    //     let cur_uid = getuid();
1368    //     let cur_gid = getgid();
1369
1370    //     let container_uid = Uid::from_raw(1000);
1371    //     let container_gid = Gid::from_raw(1000);
1372
1373    //     let args = PassthroughArgs {
1374    //         root_dir: src_dir.to_path_buf(),
1375    //         mapping: mapping,
1376    //     };
1377    //     let fs = new_passthroughfs_layer(args).await.unwrap();
1378
1379    //     (fs, tmp_dir, cur_uid, cur_gid, container_uid, container_gid)
1380    // }
1381
1382    /// Tests the reverse mapping (host -> container) for `lookup` and `getattr` operations.
1383    ///
1384    /// It sets up a mapping from the current host user to a container user (UID/GID 1000).
1385    /// Then, it creates a file owned by the host user and verifies that when FUSE looks up
1386    /// or gets attributes for this file, the returned UID/GID are correctly mapped to 1000.
1387    ///
1388    /// Unfortunately, this can not work because `do_lookup` calls `to_openable_handle` which
1389    /// requires CAP_DAC_READ_SEARCH capability, which is not available in unprivileged test environments.
1390    /// So this test is commented out for now.
1391    #[tokio::test]
1392    async fn test_lookup_and_getattr() {
1393        pass!()
1394    }
1395    // async fn test_lookup_and_getattr() {
1396    //     let cur_uid = getuid().as_raw();
1397    //     let cur_gid = getgid().as_raw();
1398    //     let mapping = format!("uidmapping={cur_uid}:1000:1,gidmapping={cur_gid}:1000:1");
1399
1400    //     let (fs, tmp_dir, ..) = setup(Some(&mapping)).await;
1401    //     let src = tmp_dir.path();
1402
1403    //     // Create a file in the source directory, owned by the current host user.
1404    //     let file_path = src.join("test_file.txt");
1405    //     std::fs::File::create(&file_path).unwrap();
1406    //     std::os::unix::fs::chown(&file_path, Some(cur_uid), Some(cur_gid)).unwrap();
1407
1408    //     // Simulate a FUSE request from the container user (UID/GID 1000).
1409    //     let req = Request::default();
1410    //     // Perform a lookup, which should trigger attribute fetching.
1411    //     let reply = fs
1412    //         .do_lookup(
1413    //             ROOT_ID,
1414    //             CStr::from_bytes_with_nul(b"test_file.txt\0").unwrap(),
1415    //         )
1416    //         .await
1417    //         .unwrap();
1418
1419    //     // Verify that the returned attributes are mapped to the container's perspective.
1420    //     assert_eq!(reply.attr.uid, 1000);
1421    //     assert_eq!(reply.attr.gid, 1000);
1422
1423    //     // Explicitly call getattr and verify the same mapping logic.
1424    //     let getattr_reply = fs.getattr(req, reply.attr.ino, None, 0).await.unwrap();
1425    //     assert_eq!(getattr_reply.attr.uid, 1000);
1426    //     assert_eq!(getattr_reply.attr.gid, 1000);
1427    // }
1428
1429    /// Tests the forward mapping (container -> host) for the `create` operation.
1430    ///
1431    /// It sets up a mapping from the current host user to a container user (UID/GID 1000).
1432    /// It then simulates a `create` request from the container user and verifies two things:
1433    /// 1. The newly created file on the host filesystem is owned by the mapped host user.
1434    /// 2. The attributes returned in the FUSE reply are correctly mapped back to the container user's ID.
1435    #[tokio::test]
1436    async fn test_create() {
1437        pass!()
1438    }
1439    // #[tokio::test]
1440    // async fn test_create() {
1441    //     let cur_uid = getuid().as_raw();
1442    //     let cur_gid = getgid().as_raw();
1443    //     let mapping = format!("uidmapping={cur_uid}:1000:1,gidmapping={cur_gid}:1000:1");
1444
1445    //     let (fs, tmp_dir, host_uid, host_gid, container_uid, container_gid) =
1446    //         setup(Some(&mapping)).await;
1447
1448    //     // Simulate a request coming from the container user (1000).
1449    //     let mut req = Request::default();
1450    //     req.uid = container_uid.as_raw();
1451    //     req.gid = container_gid.as_raw();
1452
1453    //     let file_name = OsStr::new("new_file.txt");
1454    //     let mode = libc::S_IFREG | 0o644;
1455
1456    //     // Perform the create operation.
1457    //     let created_reply = fs
1458    //         .create(req, ROOT_ID, file_name, mode, libc::O_CREAT as u32)
1459    //         .await
1460    //         .unwrap();
1461
1462    //     let file_path = tmp_dir.path().join(file_name);
1463    //     let metadata = std::fs::metadata(file_path).unwrap();
1464
1465    //     // Verify forward mapping: the file owner on the host should be the mapped host user.
1466    //     use std::os::unix::fs::MetadataExt;
1467    //     assert_eq!(Uid::from_raw(metadata.uid()), host_uid);
1468    //     assert_eq!(Gid::from_raw(metadata.gid()), host_gid);
1469
1470    //     // Verify reverse mapping in the reply: the attributes sent back to the container
1471    //     // should reflect the container's user ID.
1472    //     assert_eq!(created_reply.attr.uid, container_uid.as_raw());
1473    //     assert_eq!(created_reply.attr.gid, container_gid.as_raw());
1474    // }
1475}