Skip to main content

libfuse_fs/unionfs/
async_io.rs

1use super::utils;
2use super::{CachePolicy, HandleData, Inode, OverlayFs, RealHandle};
3use crate::util::open_options::OpenOptions;
4use rfuse3::raw::prelude::*;
5use rfuse3::*;
6use std::ffi::OsStr;
7use std::io::Error;
8use std::io::ErrorKind;
9use std::num::NonZeroU32;
10use std::sync::Arc;
11use std::sync::atomic::{AtomicU64, Ordering};
12use tokio::sync::Mutex;
13use tracing::info;
14use tracing::trace;
15
16impl Filesystem for OverlayFs {
17    /// initialize filesystem. Called before any other filesystem method.
18    async fn init(&self, _req: Request) -> Result<ReplyInit> {
19        if self.config.do_import {
20            self.import().await?;
21        }
22        for layer in self.lower_layers.iter() {
23            layer.init(_req).await?;
24        }
25        if let Some(upper) = &self.upper_layer {
26            upper.init(_req).await?;
27        }
28        if !self.config.do_import || self.config.writeback {
29            self.writeback.store(true, Ordering::Relaxed);
30        }
31        if !self.config.do_import || self.config.no_open {
32            self.no_open.store(true, Ordering::Relaxed);
33        }
34        if !self.config.do_import || self.config.no_opendir {
35            self.no_opendir.store(true, Ordering::Relaxed);
36        }
37        if !self.config.do_import || self.config.killpriv_v2 {
38            self.killpriv_v2.store(true, Ordering::Relaxed);
39        }
40        if self.config.perfile_dax {
41            self.perfile_dax.store(true, Ordering::Relaxed);
42        }
43
44        Ok(ReplyInit {
45            max_write: NonZeroU32::new(128 * 1024).unwrap(),
46        })
47    }
48
49    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
50    /// kernel may call forget for root. There is some discuss for this
51    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
52    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
53    async fn destroy(&self, _req: Request) {}
54
55    /// look up a directory entry by name and get its attributes.
56    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
57        let tmp = name.to_string_lossy().to_string();
58        let result = self.do_lookup(req, parent, tmp.as_str()).await;
59        match result {
60            Ok(e) => Ok(e),
61            Err(err) => Err(err.into()),
62        }
63    }
64
65    /// forget an inode. The nlookup parameter indicates the number of lookups previously
66    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
67    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
68    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
69    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
70    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
71    /// request for root and this library will stop session after call forget. There is some
72    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
73    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
74    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
75        self.forget_one(inode, nlookup).await;
76    }
77
78    /// get file attributes. If `fh` is None, means `fh` is not set.
79    async fn getattr(
80        &self,
81        req: Request,
82        inode: Inode,
83        fh: Option<u64>,
84        flags: u32,
85    ) -> Result<ReplyAttr> {
86        if !self.no_open.load(Ordering::Relaxed)
87            && let Some(h) = fh
88        {
89            let handles = self.handles.lock().await;
90            if let Some(hd) = handles.get(&h)
91                && let Some(ref rh) = hd.real_handle
92            {
93                let mut rep: ReplyAttr = rh
94                    .layer
95                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
96                    .await?;
97                rep.attr.ino = inode;
98                return Ok(rep);
99            }
100        }
101
102        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
103        let (layer, _, lower_inode) = node.first_layer_inode().await;
104        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
105        re.attr.ino = inode;
106        Ok(re)
107    }
108
109    /// set file attributes. If `fh` is None, means `fh` is not set.
110    async fn setattr(
111        &self,
112        req: Request,
113        inode: Inode,
114        fh: Option<u64>,
115        set_attr: SetAttr,
116    ) -> Result<ReplyAttr> {
117        // Check if upper layer exists.
118        self.upper_layer
119            .as_ref()
120            .cloned()
121            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
122
123        // deal with handle first
124        if !self.no_open.load(Ordering::Relaxed)
125            && let Some(h) = fh
126        {
127            let handles = self.handles.lock().await;
128            if let Some(hd) = handles.get(&h)
129                && let Some(ref rhd) = hd.real_handle
130            {
131                // handle opened in upper layer
132                if rhd.in_upper_layer {
133                    let mut rep = rhd
134                        .layer
135                        .setattr(
136                            req,
137                            rhd.inode,
138                            Some(rhd.handle.load(Ordering::Relaxed)),
139                            set_attr,
140                        )
141                        .await?;
142                    rep.attr.ino = inode;
143                    return Ok(rep);
144                }
145            }
146        }
147
148        let mut node = self.lookup_node(req, inode, "").await?;
149
150        if !node.in_upper_layer().await {
151            node = self.copy_node_up(req, node.clone()).await?
152        }
153
154        let (layer, _, real_inode) = node.first_layer_inode().await;
155        // layer.setattr(req, real_inode, None, set_attr).await
156        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
157        rep.attr.ino = inode;
158        Ok(rep)
159    }
160
161    /// read symbolic link.
162    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
163        trace!("READLINK: inode: {inode}\n");
164
165        let node = self.lookup_node(req, inode, "").await?;
166
167        if node.whiteout.load(Ordering::Relaxed) {
168            return Err(Error::from_raw_os_error(libc::ENOENT).into());
169        }
170
171        let (layer, _, inode) = node.first_layer_inode().await;
172        layer.readlink(req, inode).await
173    }
174
175    /// create a symbolic link.
176    async fn symlink(
177        &self,
178        req: Request,
179        parent: Inode,
180        name: &OsStr,
181        link: &OsStr,
182    ) -> Result<ReplyEntry> {
183        // soft link
184        let sname = name.to_string_lossy().into_owned().to_owned();
185        let slinkname = link.to_string_lossy().into_owned().to_owned();
186
187        let pnode = self.lookup_node(req, parent, "").await?;
188        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
189            .await?;
190
191        self.do_lookup(req, parent, sname.as_str())
192            .await
193            .map_err(|e| e.into())
194    }
195
196    /// create file node. Create a regular file, character device, block device, fifo or socket
197    /// node. When creating file, most cases user only need to implement
198    /// [`create`][Filesystem::create].
199    async fn mknod(
200        &self,
201        req: Request,
202        parent: Inode,
203        name: &OsStr,
204        mode: u32,
205        rdev: u32,
206    ) -> Result<ReplyEntry> {
207        let sname = name.to_string_lossy().to_string();
208
209        // Check if parent exists.
210        let pnode = self.lookup_node(req, parent, "").await?;
211        if pnode.whiteout.load(Ordering::Relaxed) {
212            return Err(Error::from_raw_os_error(libc::ENOENT).into());
213        }
214
215        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
216            .await?;
217        self.do_lookup(req, parent, sname.as_str())
218            .await
219            .map_err(|e| e.into())
220    }
221
222    /// create a directory.
223    async fn mkdir(
224        &self,
225        req: Request,
226        parent: Inode,
227        name: &OsStr,
228        mode: u32,
229        umask: u32,
230    ) -> Result<ReplyEntry> {
231        let sname = name.to_string_lossy().to_string();
232
233        // no entry or whiteout
234        let pnode = self.lookup_node(req, parent, "").await?;
235        if pnode.whiteout.load(Ordering::Relaxed) {
236            return Err(Error::from_raw_os_error(libc::ENOENT).into());
237        }
238
239        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
240            .await?;
241        self.do_lookup(req, parent, sname.as_str())
242            .await
243            .map_err(|e| e.into())
244    }
245
246    /// remove a file.
247    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
248        self.do_rm(req, parent, name, false)
249            .await
250            .map_err(|e| e.into())
251    }
252
253    /// remove a directory.
254    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
255        self.do_rm(req, parent, name, true)
256            .await
257            .map_err(|e| e.into())
258    }
259
260    /// rename a file or directory.
261    async fn rename(
262        &self,
263        req: Request,
264        parent: Inode,
265        name: &OsStr,
266        new_parent: Inode,
267        new_name: &OsStr,
268    ) -> Result<()> {
269        self.do_rename(req, parent, name, new_parent, new_name)
270            .await
271            .map_err(|e| e.into())
272    }
273
274    /// create a hard link.
275    async fn link(
276        &self,
277        req: Request,
278        inode: Inode,
279        new_parent: Inode,
280        new_name: &OsStr,
281    ) -> Result<ReplyEntry> {
282        let node = self.lookup_node(req, inode, "").await?;
283        if node.whiteout.load(Ordering::Relaxed) {
284            return Err(Error::from_raw_os_error(libc::ENOENT).into());
285        }
286
287        let newpnode = self.lookup_node(req, new_parent, "").await?;
288        if newpnode.whiteout.load(Ordering::Relaxed) {
289            return Err(Error::from_raw_os_error(libc::ENOENT).into());
290        }
291        let new_name = new_name
292            .to_str()
293            .ok_or_else(|| Error::from_raw_os_error(libc::EINVAL))?;
294        // trace!(
295        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
296        //     inode, new_parent, node.inode, newpnode.inode
297        // );
298        self.do_link(req, &node, &newpnode, new_name).await?;
299        // trace!("LINK: done, looking up new entry");
300        self.do_lookup(req, new_parent, new_name)
301            .await
302            .map_err(|e| e.into())
303    }
304
305    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
306    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
307    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
308    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
309    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
310    /// the file is opened. A filesystem need not implement this method if it
311    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
312    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
313    ///
314    /// # Notes:
315    ///
316    /// See `fuse_file_info` structure in
317    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
318    /// more details.
319    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
320        if self.no_open.load(Ordering::Relaxed) {
321            info!("fuse: open is not supported.");
322            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
323        }
324
325        let readonly: bool = flags
326            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
327                as u32
328            == 0;
329        // toggle flags
330        let mut flags: i32 = flags as i32;
331
332        flags |= libc::O_NOFOLLOW;
333
334        if self.config.writeback {
335            if flags & libc::O_ACCMODE == libc::O_WRONLY {
336                flags &= !libc::O_ACCMODE;
337                flags |= libc::O_RDWR;
338            }
339
340            if flags & libc::O_APPEND != 0 {
341                flags &= !libc::O_APPEND;
342            }
343        }
344        // lookup node
345        let node = self.lookup_node(req, inode, "").await?;
346
347        // whiteout node
348        if node.whiteout.load(Ordering::Relaxed) {
349            return Err(Error::from_raw_os_error(libc::ENOENT).into());
350        }
351
352        if !readonly {
353            // copy up to upper layer
354            self.copy_node_up(req, node.clone()).await?;
355        }
356
357        // assign a handle in overlayfs and open it
358        let (_l, h) = node.open(req, flags as u32, 0).await?;
359
360        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
361        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
362        let handle_data = HandleData {
363            node: node.clone(),
364            real_handle: Some(RealHandle {
365                layer,
366                in_upper_layer,
367                inode,
368                handle: AtomicU64::new(h.fh),
369            }),
370            dir_snapshot: Mutex::new(None),
371        };
372
373        self.handles.lock().await.insert(hd, Arc::new(handle_data));
374
375        let mut opts = OpenOptions::empty();
376        match self.config.cache_policy {
377            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
378            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
379            _ => {}
380        }
381        // trace!("OPEN: returning handle: {hd}");
382
383        Ok(ReplyOpen {
384            fh: hd,
385            flags: opts.bits(),
386        })
387    }
388
389    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
390    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
391    /// when the file has been opened in `direct_io` mode, in which case the return value of the
392    /// read system call will reflect the return value of this operation. `fh` will contain the
393    /// value set by the open method, or will be undefined if the open method didn't set any value.
394    async fn read(
395        &self,
396        req: Request,
397        inode: Inode,
398        fh: u64,
399        offset: u64,
400        size: u32,
401    ) -> Result<ReplyData> {
402        let data = self.get_data(req, Some(fh), inode, 0).await?;
403
404        match data.real_handle {
405            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
406            Some(ref hd) => {
407                hd.layer
408                    .read(
409                        req,
410                        hd.inode,
411                        hd.handle.load(Ordering::Relaxed),
412                        offset,
413                        size,
414                    )
415                    .await
416            }
417        }
418    }
419
420    /// write data. Write should return exactly the number of bytes requested except on error. An
421    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
422    /// return value of the write system call will reflect the return value of this operation. `fh`
423    /// will contain the value set by the open method, or will be undefined if the open method
424    /// didn't set any value. When `write_flags` contains
425    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
426    /// delay write.
427    #[allow(clippy::too_many_arguments)]
428    async fn write(
429        &self,
430        req: Request,
431        inode: Inode,
432        fh: u64,
433        offset: u64,
434        data: &[u8],
435        write_flags: u32,
436        flags: u32,
437    ) -> Result<ReplyWrite> {
438        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
439
440        match handle_data.real_handle {
441            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
442            Some(ref hd) => {
443                hd.layer
444                    .write(
445                        req,
446                        hd.inode,
447                        hd.handle.load(Ordering::Relaxed),
448                        offset,
449                        data,
450                        write_flags,
451                        flags,
452                    )
453                    .await
454            }
455        }
456    }
457
458    /// Copy a range of data from one file to another. This can improve performance because it
459    /// reduces data copying: normally, data will be copied from FUSE server to kernel, then to
460    /// user-space, then to kernel, and finally sent back to FUSE server. By implementing this
461    /// method, data will only be copied internally within the FUSE server.
462    #[allow(clippy::too_many_arguments)]
463    async fn copy_file_range(
464        &self,
465        req: Request,
466        inode_in: Inode,
467        fh_in: u64,
468        offset_in: u64,
469        inode_out: Inode,
470        fh_out: u64,
471        offset_out: u64,
472        length: u64,
473        flags: u64,
474    ) -> Result<ReplyCopyFileRange> {
475        // Get handle data for source file
476        let data_in = self.get_data(req, Some(fh_in), inode_in, 0).await?;
477        let handle_in = match data_in.real_handle {
478            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
479            Some(ref hd) => hd,
480        };
481
482        // Get handle data for destination file
483        let data_out = self.get_data(req, Some(fh_out), inode_out, 0).await?;
484        let handle_out = match data_out.real_handle {
485            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
486            Some(ref hd) => hd,
487        };
488
489        // Both files must be on the same layer for copy_file_range to work
490        if !Arc::ptr_eq(&handle_in.layer, &handle_out.layer) {
491            // Different layers - return EXDEV to trigger fallback to read/write
492            return Err(Error::from_raw_os_error(libc::EXDEV).into());
493        }
494
495        // Delegate to the underlying PassthroughFs layer
496        handle_in
497            .layer
498            .copy_file_range(
499                req,
500                handle_in.inode,
501                handle_in.handle.load(Ordering::Relaxed),
502                offset_in,
503                handle_out.inode,
504                handle_out.handle.load(Ordering::Relaxed),
505                offset_out,
506                length,
507                flags,
508            )
509            .await
510    }
511
512    /// get filesystem statistics.
513    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
514        self.do_statvfs(req, inode).await.map_err(|e| e.into())
515    }
516
517    /// release an open file. Release is called when there are no more references to an open file:
518    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
519    /// there will be exactly one release call. The filesystem may reply with an error, but error
520    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
521    /// contain the value set by the open method, or will be undefined if the open method didn't
522    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
523    /// data or not when closing file.
524    async fn release(
525        &self,
526        req: Request,
527        _inode: Inode,
528        fh: u64,
529        flags: u32,
530        lock_owner: u64,
531        flush: bool,
532    ) -> Result<()> {
533        if self.no_open.load(Ordering::Relaxed) {
534            info!("fuse: release is not supported.");
535            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
536        }
537
538        if let Some(hd) = self.handles.lock().await.get(&fh) {
539            let rh = if let Some(ref h) = hd.real_handle {
540                h
541            } else {
542                return Err(
543                    Error::other(format!("no real handle found for file handle {fh}")).into(),
544                );
545            };
546            let real_handle = rh.handle.load(Ordering::Relaxed);
547            let real_inode = rh.inode;
548            rh.layer
549                .release(req, real_inode, real_handle, flags, lock_owner, flush)
550                .await?;
551        }
552
553        self.handles.lock().await.remove(&fh);
554
555        Ok(())
556    }
557
558    /// synchronize file contents. If the `datasync` is true, then only the user data should be
559    /// flushed, not the metadata.
560    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
561        self.do_fsync(req, inode, datasync, fh, false)
562            .await
563            .map_err(|e| e.into())
564    }
565
566    /// set an extended attribute.
567    async fn setxattr(
568        &self,
569        req: Request,
570        inode: Inode,
571        name: &OsStr,
572        value: &[u8],
573        flags: u32,
574        position: u32,
575    ) -> Result<()> {
576        let node = self.lookup_node(req, inode, "").await?;
577
578        if node.whiteout.load(Ordering::Relaxed) {
579            return Err(Error::from_raw_os_error(libc::ENOENT).into());
580        }
581
582        if !node.in_upper_layer().await {
583            // Copy node up.
584            self.copy_node_up(req, node.clone()).await?;
585        }
586
587        let (layer, _, real_inode) = node.first_layer_inode().await;
588
589        layer
590            .setxattr(req, real_inode, name, value, flags, position)
591            .await
592    }
593
594    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
595    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
596    /// return an error.
597    async fn getxattr(
598        &self,
599        req: Request,
600        inode: Inode,
601        name: &OsStr,
602        size: u32,
603    ) -> Result<ReplyXAttr> {
604        let node = self.lookup_node(req, inode, "").await?;
605
606        if node.whiteout.load(Ordering::Relaxed) {
607            return Err(Error::from_raw_os_error(libc::ENOENT).into());
608        }
609
610        let (layer, real_inode) = self.find_real_inode(inode).await?;
611
612        layer.getxattr(req, real_inode, name, size).await
613    }
614
615    /// List extended attribute names.
616    ///
617    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
618    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
619    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
620        let node = self.lookup_node(req, inode, "").await?;
621        if node.whiteout.load(Ordering::Relaxed) {
622            return Err(Error::from_raw_os_error(libc::ENOENT).into());
623        }
624        let (layer, real_inode) = self.find_real_inode(inode).await?;
625        layer.listxattr(req, real_inode, size).await
626    }
627
628    /// remove an extended attribute.
629    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
630        let node = self.lookup_node(req, inode, "").await?;
631
632        if node.whiteout.load(Ordering::Relaxed) {
633            return Err(Error::from_raw_os_error(libc::ENOENT).into());
634        }
635
636        if !node.in_upper_layer().await {
637            // copy node into upper layer
638            self.copy_node_up(req, node.clone()).await?;
639        }
640
641        let (layer, _, ino) = node.first_layer_inode().await;
642        layer.removexattr(req, ino, name).await
643
644        // TODO: recreate the node since removexattr may remove the opaque xattr.
645    }
646
647    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
648    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
649    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
650    /// will be called at all. `fh` will contain the value set by the open method, or will be
651    /// undefined if the open method didn't set any value.
652    ///
653    /// # Notes:
654    ///
655    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
656    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
657    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
658    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
659    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
660        if self.no_open.load(Ordering::Relaxed) {
661            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
662        }
663
664        let node = self.lookup_node(req, inode, "").await;
665        match node {
666            Ok(n) => {
667                if n.whiteout.load(Ordering::Relaxed) {
668                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
669                }
670            }
671            Err(e) => {
672                if e.raw_os_error() == Some(libc::ENOENT) {
673                    trace!("flush: inode {inode} is stale");
674                } else {
675                    return Err(e.into());
676                }
677            }
678        }
679
680        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
681
682        // FIXME: need to test if inode matches corresponding handle?
683        if inode
684            != self
685                .handles
686                .lock()
687                .await
688                .get(&fh)
689                .map(|h| h.node.inode)
690                .unwrap_or(0)
691        {
692            return Err(Error::other("inode does not match handle").into());
693        }
694
695        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
696        layer.flush(req, real_inode, real_handle, lock_owner).await
697    }
698
699    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
700    /// `fh`, and use this in other all other directory stream operations
701    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
702    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
703    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
704    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
705    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
706    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
707        if self.no_opendir.load(Ordering::Relaxed) {
708            info!("fuse: opendir is not supported.");
709            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
710        }
711
712        // lookup node
713        let node = self.lookup_node(req, inode, ".").await?;
714
715        if node.whiteout.load(Ordering::Relaxed) {
716            return Err(Error::from_raw_os_error(libc::ENOENT).into());
717        }
718
719        let st = node.stat64(req).await?;
720        if !utils::is_dir(&st.attr.kind) {
721            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
722        }
723
724        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
725        // Get the layer information and open directory in the underlying layer
726        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
727        let reply = layer.opendir(req, real_inode, flags).await?;
728
729        self.handles.lock().await.insert(
730            handle,
731            Arc::new(HandleData {
732                node: Arc::clone(&node),
733                real_handle: Some(RealHandle {
734                    layer,
735                    in_upper_layer,
736                    inode: real_inode,
737                    handle: AtomicU64::new(reply.fh),
738                }),
739                dir_snapshot: Mutex::new(None),
740            }),
741        );
742
743        Ok(ReplyOpen { fh: handle, flags })
744    }
745
746    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
747    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
748    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
749    async fn readdir<'a>(
750        &'a self,
751        req: Request,
752        parent: Inode,
753        fh: u64,
754        offset: i64,
755    ) -> Result<
756        ReplyDirectory<
757            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
758        >,
759    > {
760        if self.config.no_readdir {
761            info!("fuse: readdir is not supported.");
762            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
763        }
764        let entries = self
765            .do_readdir(req, parent, fh, offset.try_into().unwrap())
766            .await?;
767        Ok(ReplyDirectory { entries })
768    }
769
770    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
771    /// + [`lookup`][Filesystem::lookup] at the same time.
772    async fn readdirplus<'a>(
773        &'a self,
774        req: Request,
775        parent: Inode,
776        fh: u64,
777        offset: u64,
778        _lock_owner: u64,
779    ) -> Result<
780        ReplyDirectoryPlus<
781            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
782        >,
783    > {
784        if self.config.no_readdir {
785            info!("fuse: readdir is not supported.");
786            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
787        }
788        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
789        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
790        match self.handles.lock().await.get(&fh) {
791            Some(h) => {
792                trace!(
793                    "after readdirplus: found handle, seeing real_handle: {}",
794                    h.real_handle.is_some()
795                );
796            }
797            None => trace!("after readdirplus: no handle found: {fh}"),
798        }
799        Ok(ReplyDirectoryPlus { entries })
800    }
801    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
802    /// be exactly one `releasedir` call. `fh` will contain the value set by the
803    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
804    /// [`opendir`][Filesystem::opendir] method didn't set any value.
805    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
806        if self.no_opendir.load(Ordering::Relaxed) {
807            info!("fuse: releasedir is not supported.");
808            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
809        }
810
811        if let Some(hd) = self.handles.lock().await.get(&fh) {
812            let rh = if let Some(ref h) = hd.real_handle {
813                h
814            } else {
815                return Err(
816                    Error::other(format!("no real handle found for file handle {fh}")).into(),
817                );
818            };
819            let real_handle = rh.handle.load(Ordering::Relaxed);
820            let real_inode = rh.inode;
821            rh.layer
822                .releasedir(req, real_inode, real_handle, flags)
823                .await?;
824        }
825
826        self.handles.lock().await.remove(&fh);
827        Ok(())
828    }
829
830    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
831    /// should be flushed, not the metadata. `fh` will contain the value set by the
832    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
833    /// [`opendir`][Filesystem::opendir] method didn't set any value.
834    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
835        self.do_fsync(req, inode, datasync, fh, true)
836            .await
837            .map_err(|e| e.into())
838    }
839
840    #[allow(clippy::too_many_arguments)]
841    async fn getlk(
842        &self,
843        req: Request,
844        _inode: Inode,
845        fh: u64,
846        lock_owner: u64,
847        start: u64,
848        end: u64,
849        r#type: u32,
850        pid: u32,
851    ) -> Result<ReplyLock> {
852        if !self.no_open.load(Ordering::Relaxed) {
853            let handles = self.handles.lock().await;
854            if let Some(hd) = handles.get(&fh)
855                && let Some(ref rh) = hd.real_handle
856            {
857                match rh
858                    .layer
859                    .getlk(
860                        req,
861                        rh.inode,
862                        rh.handle.load(Ordering::Relaxed),
863                        lock_owner,
864                        start,
865                        end,
866                        r#type,
867                        pid,
868                    )
869                    .await
870                {
871                    Ok(reply) => return Ok(reply),
872                    Err(e) => {
873                        // If underlying layer doesn't support locking, fall through to fallback
874                        let errno: i32 = e.into();
875                        if errno != libc::ENOSYS {
876                            return Err(errno.into());
877                        }
878                    }
879                }
880            }
881        }
882
883        // Fallback: report no lock conflict
884        Ok(ReplyLock {
885            start: 0,
886            end: 0,
887            r#type: libc::F_UNLCK as u32,
888            pid: 0,
889        })
890    }
891
892    #[allow(clippy::too_many_arguments)]
893    async fn setlk(
894        &self,
895        req: Request,
896        _inode: Inode,
897        fh: u64,
898        lock_owner: u64,
899        start: u64,
900        end: u64,
901        r#type: u32,
902        pid: u32,
903        block: bool,
904    ) -> Result<()> {
905        if !self.no_open.load(Ordering::Relaxed) {
906            let handles = self.handles.lock().await;
907            if let Some(hd) = handles.get(&fh)
908                && let Some(ref rh) = hd.real_handle
909            {
910                match rh
911                    .layer
912                    .setlk(
913                        req,
914                        rh.inode,
915                        rh.handle.load(Ordering::Relaxed),
916                        lock_owner,
917                        start,
918                        end,
919                        r#type,
920                        pid,
921                        block,
922                    )
923                    .await
924                {
925                    Ok(()) => return Ok(()),
926                    Err(e) => {
927                        // If underlying layer doesn't support locking, fall through to fallback
928                        let errno: i32 = e.into();
929                        if errno != libc::ENOSYS {
930                            return Err(errno.into());
931                        }
932                    }
933                }
934            }
935        }
936
937        // Fallback: silently accept the lock request
938        Ok(())
939    }
940    /// check file access permissions. This will be called for the `access()` system call. If the
941    /// `default_permissions` mount option is given, this method is not be called. This method is
942    /// not called under Linux kernel versions 2.4.x.
943    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
944        let node = self.lookup_node(req, inode, "").await?;
945
946        if node.whiteout.load(Ordering::Relaxed) {
947            return Err(Error::from_raw_os_error(libc::ENOENT).into());
948        }
949
950        let (layer, real_inode) = self.find_real_inode(inode).await?;
951        layer.access(req, real_inode, mask).await
952    }
953
954    /// create and open a file. If the file does not exist, first create it with the specified
955    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
956    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
957    /// this in other all other file operations ([`read`][Filesystem::read],
958    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
959    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
960    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
961    /// opened. If this method is not implemented or under Linux kernel versions earlier than
962    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
963    /// called instead.
964    ///
965    /// # Notes:
966    ///
967    /// See `fuse_file_info` structure in
968    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
969    /// more details.
970    async fn create(
971        &self,
972        req: Request,
973        parent: Inode,
974        name: &OsStr,
975        mode: u32,
976        flags: u32,
977    ) -> Result<ReplyCreated> {
978        // Parent doesn't exist.
979        let pnode = self.lookup_node(req, parent, "").await?;
980        if pnode.whiteout.load(Ordering::Relaxed) {
981            return Err(Error::from_raw_os_error(libc::ENOENT).into());
982        }
983
984        let mut flags: i32 = flags as i32;
985        flags |= libc::O_NOFOLLOW;
986        #[cfg(target_os = "linux")]
987        {
988            flags &= !libc::O_DIRECT;
989        }
990        if self.config.writeback {
991            if flags & libc::O_ACCMODE == libc::O_WRONLY {
992                flags &= !libc::O_ACCMODE;
993                flags |= libc::O_RDWR;
994            }
995
996            if flags & libc::O_APPEND != 0 {
997                flags &= !libc::O_APPEND;
998            }
999        }
1000
1001        let name_str = name
1002            .to_str()
1003            .ok_or_else(|| Error::from_raw_os_error(libc::EINVAL))?;
1004        let final_handle = self
1005            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
1006            .await?;
1007        let entry = self.do_lookup(req, parent, name_str).await?;
1008        let fh = final_handle
1009            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
1010
1011        let mut opts = OpenOptions::empty();
1012        match self.config.cache_policy {
1013            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
1014            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
1015            _ => {}
1016        }
1017
1018        Ok(ReplyCreated {
1019            ttl: entry.ttl,
1020            attr: entry.attr,
1021            generation: entry.generation,
1022            fh,
1023            flags: opts.bits(),
1024        })
1025    }
1026
1027    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
1028    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
1029        for inode in inodes {
1030            self.forget_one(inode.0, inode.1).await;
1031        }
1032    }
1033
1034    /// allocate space for an open file. This function ensures that required space is allocated for
1035    /// specified file.
1036    ///
1037    /// # Notes:
1038    ///
1039    /// more information about `fallocate`, please see **`man 2 fallocate`**
1040    async fn fallocate(
1041        &self,
1042        req: Request,
1043        inode: Inode,
1044        fh: u64,
1045        offset: u64,
1046        length: u64,
1047        mode: u32,
1048    ) -> Result<()> {
1049        // Use O_RDONLY flags which indicates no copy up.
1050        let data = self
1051            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
1052            .await?;
1053
1054        match data.real_handle {
1055            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
1056            Some(ref rhd) => {
1057                if !rhd.in_upper_layer {
1058                    // TODO: in lower layer, error out or just success?
1059                    return Err(Error::from_raw_os_error(libc::EROFS).into());
1060                }
1061                rhd.layer
1062                    .fallocate(
1063                        req,
1064                        rhd.inode,
1065                        rhd.handle.load(Ordering::Relaxed),
1066                        offset,
1067                        length,
1068                        mode,
1069                    )
1070                    .await
1071            }
1072        }
1073    }
1074
1075    /// find next data or hole after the specified offset.
1076    async fn lseek(
1077        &self,
1078        req: Request,
1079        inode: Inode,
1080        fh: u64,
1081        offset: u64,
1082        whence: u32,
1083    ) -> Result<ReplyLSeek> {
1084        let node = self.lookup_node(req, inode, "").await?;
1085
1086        if node.whiteout.load(Ordering::Relaxed) {
1087            return Err(Error::from_raw_os_error(libc::ENOENT).into());
1088        }
1089
1090        let st = node.stat64(req).await?;
1091        if utils::is_dir(&st.attr.kind) {
1092            // Special handling and security restrictions for directory operations.
1093            // Use the common API to obtain the underlying layer and handle info.
1094            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1095
1096            // Verify that the underlying handle refers to a directory.
1097            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
1098                Ok(s) => s,
1099                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
1100            };
1101
1102            if !utils::is_dir(&handle_stat.attr.kind) {
1103                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
1104            }
1105
1106            // Handle directory lseek operations according to POSIX standard
1107            // This enables seekdir/telldir functionality on directories
1108            match whence {
1109                // SEEK_SET: Set the directory position to an absolute value
1110                x if x == libc::SEEK_SET as u32 => {
1111                    // Validate offset bounds to prevent overflow
1112                    // Directory offsets should not exceed i64::MAX
1113                    if offset > i64::MAX as u64 {
1114                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
1115                    }
1116
1117                    // Perform the seek operation on the underlying layer
1118                    // Delegate to the lower layer implementation
1119                    layer
1120                        .lseek(req, real_inode, real_handle, offset, whence)
1121                        .await
1122                }
1123                // SEEK_CUR: Move relative to the current directory position
1124                x if x == libc::SEEK_CUR as u32 => {
1125                    // Get current position from underlying layer
1126                    // This is needed to calculate the new position
1127                    let current = match layer
1128                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
1129                        .await
1130                    {
1131                        Ok(r) => r.offset,
1132                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
1133                    };
1134
1135                    // Check for potential overflow when adding the provided offset
1136                    // This prevents invalid position calculations
1137                    if let Some(new_offset) = current.checked_add(offset) {
1138                        // Ensure the new offset is within valid bounds
1139                        if new_offset > i64::MAX as u64 {
1140                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
1141                        }
1142
1143                        // Actually set the underlying offset to the new value so behavior
1144                        // matches passthrough which uses libc::lseek64 to set the fd offset.
1145                        match layer
1146                            .lseek(
1147                                req,
1148                                real_inode,
1149                                real_handle,
1150                                new_offset,
1151                                libc::SEEK_SET as u32,
1152                            )
1153                            .await
1154                        {
1155                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
1156                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1157                        }
1158                    } else {
1159                        Err(Error::from_raw_os_error(libc::EINVAL).into())
1160                    }
1161                }
1162                // Any other whence value is invalid for directories
1163                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1164            }
1165        } else {
1166            // Keep the original lseek behavior for regular files
1167            // Delegate directly to the underlying layer
1168            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1169            layer
1170                .lseek(req, real_inode, real_handle, offset, whence)
1171                .await
1172        }
1173    }
1174
1175    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
1176        Ok(())
1177    }
1178}
1179
1180#[cfg(test)]
1181mod tests {
1182    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1183
1184    use rfuse3::{MountOptions, raw::Session};
1185    use tokio::signal;
1186    use tracing_subscriber::EnvFilter;
1187
1188    use crate::unionfs::BoxedLayer;
1189    use crate::{
1190        passthrough::{PassthroughArgs, new_passthroughfs_layer},
1191        unionfs::{OverlayFs, config::Config},
1192    };
1193    use rfuse3::raw::logfs::LoggingFileSystem;
1194
1195    #[tokio::test]
1196    #[ignore]
1197    async fn test_a_ovlfs() {
1198        let _ = tracing_subscriber::fmt()
1199            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1200            .try_init();
1201
1202        // Set up test environment
1203        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1204        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1205        let upperdir = PathBuf::from("/home/luxian/upper");
1206
1207        // Create lower layers
1208        let mut lower_layers: Vec<Arc<BoxedLayer>> = Vec::new();
1209        for lower in &lowerdir {
1210            let layer = new_passthroughfs_layer(PassthroughArgs {
1211                root_dir: lower.clone(),
1212                mapping: None::<&str>,
1213            })
1214            .await
1215            .unwrap();
1216            lower_layers.push(Arc::new(layer) as Arc<BoxedLayer>);
1217        }
1218        // Create upper layer
1219        let upper_layer: Arc<BoxedLayer> = Arc::new(
1220            new_passthroughfs_layer(PassthroughArgs {
1221                root_dir: upperdir,
1222                mapping: None::<&str>,
1223            })
1224            .await
1225            .unwrap(),
1226        );
1227        // Create overlayfs
1228        let config = Config {
1229            mountpoint: mountpoint.clone(),
1230            do_import: true,
1231            ..Default::default()
1232        };
1233
1234        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1235
1236        let logfs = LoggingFileSystem::new(overlayfs);
1237
1238        let mount_path: OsString = OsString::from(mountpoint);
1239
1240        let uid = unsafe { libc::getuid() };
1241        let gid = unsafe { libc::getgid() };
1242
1243        let not_unprivileged = false;
1244
1245        let mut mount_options = MountOptions::default();
1246        // .allow_other(true)
1247        #[cfg(target_os = "linux")]
1248        mount_options.force_readdir_plus(true);
1249        mount_options.uid(uid).gid(gid);
1250
1251        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1252            Session::new(mount_options)
1253                .mount_with_unprivileged(logfs, mount_path)
1254                .await
1255                .unwrap()
1256        } else {
1257            Session::new(mount_options)
1258                .mount(logfs, mount_path)
1259                .await
1260                .unwrap()
1261        };
1262
1263        let handle = &mut mount_handle;
1264
1265        tokio::select! {
1266            res = handle => res.unwrap(),
1267            _ = signal::ctrl_c() => {
1268                mount_handle.unmount().await.unwrap()
1269            }
1270        }
1271    }
1272}