libfuse_fs/unionfs/
async_io.rs

1use super::utils;
2use super::{CachePolicy, HandleData, Inode, OverlayFs, RealHandle};
3use crate::util::open_options::OpenOptions;
4use rfuse3::raw::prelude::*;
5use rfuse3::*;
6use std::ffi::OsStr;
7use std::io::Error;
8use std::io::ErrorKind;
9use std::num::NonZeroU32;
10use std::sync::Arc;
11use std::sync::atomic::{AtomicU64, Ordering};
12use tracing::info;
13use tracing::trace;
14
15impl Filesystem for OverlayFs {
16    /// initialize filesystem. Called before any other filesystem method.
17    async fn init(&self, _req: Request) -> Result<ReplyInit> {
18        if self.config.do_import {
19            self.import().await?;
20        }
21        if !self.config.do_import || self.config.writeback {
22            self.writeback.store(true, Ordering::Relaxed);
23        }
24        if !self.config.do_import || self.config.no_open {
25            self.no_open.store(true, Ordering::Relaxed);
26        }
27        if !self.config.do_import || self.config.no_opendir {
28            self.no_opendir.store(true, Ordering::Relaxed);
29        }
30        if !self.config.do_import || self.config.killpriv_v2 {
31            self.killpriv_v2.store(true, Ordering::Relaxed);
32        }
33        if self.config.perfile_dax {
34            self.perfile_dax.store(true, Ordering::Relaxed);
35        }
36
37        Ok(ReplyInit {
38            max_write: NonZeroU32::new(128 * 1024).unwrap(),
39        })
40    }
41
42    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
43    /// kernel may call forget for root. There is some discuss for this
44    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
45    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
46    async fn destroy(&self, _req: Request) {}
47
48    /// look up a directory entry by name and get its attributes.
49    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
50        let tmp = name.to_string_lossy().to_string();
51        let result = self.do_lookup(req, parent, tmp.as_str()).await;
52        match result {
53            Ok(e) => Ok(e),
54            Err(err) => Err(err.into()),
55        }
56    }
57
58    /// forget an inode. The nlookup parameter indicates the number of lookups previously
59    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
60    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
61    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
62    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
63    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
64    /// request for root and this library will stop session after call forget. There is some
65    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
66    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
67    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
68        self.forget_one(inode, nlookup).await;
69    }
70
71    /// get file attributes. If `fh` is None, means `fh` is not set.
72    async fn getattr(
73        &self,
74        req: Request,
75        inode: Inode,
76        fh: Option<u64>,
77        flags: u32,
78    ) -> Result<ReplyAttr> {
79        if !self.no_open.load(Ordering::Relaxed)
80            && let Some(h) = fh
81        {
82            let handles = self.handles.lock().await;
83            if let Some(hd) = handles.get(&h)
84                && let Some(ref rh) = hd.real_handle
85            {
86                let mut rep: ReplyAttr = rh
87                    .layer
88                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
89                    .await?;
90                rep.attr.ino = inode;
91                return Ok(rep);
92            }
93        }
94
95        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
96        let (layer, _, lower_inode) = node.first_layer_inode().await;
97        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
98        re.attr.ino = inode;
99        Ok(re)
100    }
101
102    /// set file attributes. If `fh` is None, means `fh` is not set.
103    async fn setattr(
104        &self,
105        req: Request,
106        inode: Inode,
107        fh: Option<u64>,
108        set_attr: SetAttr,
109    ) -> Result<ReplyAttr> {
110        // Check if upper layer exists.
111        self.upper_layer
112            .as_ref()
113            .cloned()
114            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
115
116        // deal with handle first
117        if !self.no_open.load(Ordering::Relaxed)
118            && let Some(h) = fh
119        {
120            let handles = self.handles.lock().await;
121            if let Some(hd) = handles.get(&h)
122                && let Some(ref rhd) = hd.real_handle
123            {
124                // handle opened in upper layer
125                if rhd.in_upper_layer {
126                    let mut rep = rhd
127                        .layer
128                        .setattr(
129                            req,
130                            rhd.inode,
131                            Some(rhd.handle.load(Ordering::Relaxed)),
132                            set_attr,
133                        )
134                        .await?;
135                    rep.attr.ino = inode;
136                    return Ok(rep);
137                }
138            }
139        }
140
141        let mut node = self.lookup_node(req, inode, "").await?;
142
143        if !node.in_upper_layer().await {
144            node = self.copy_node_up(req, node.clone()).await?
145        }
146
147        let (layer, _, real_inode) = node.first_layer_inode().await;
148        // layer.setattr(req, real_inode, None, set_attr).await
149        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
150        rep.attr.ino = inode;
151        Ok(rep)
152    }
153
154    /// read symbolic link.
155    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
156        trace!("READLINK: inode: {inode}\n");
157
158        let node = self.lookup_node(req, inode, "").await?;
159
160        if node.whiteout.load(Ordering::Relaxed) {
161            return Err(Error::from_raw_os_error(libc::ENOENT).into());
162        }
163
164        let (layer, _, inode) = node.first_layer_inode().await;
165        layer.readlink(req, inode).await
166    }
167
168    /// create a symbolic link.
169    async fn symlink(
170        &self,
171        req: Request,
172        parent: Inode,
173        name: &OsStr,
174        link: &OsStr,
175    ) -> Result<ReplyEntry> {
176        // soft link
177        let sname = name.to_string_lossy().into_owned().to_owned();
178        let slinkname = link.to_string_lossy().into_owned().to_owned();
179
180        let pnode = self.lookup_node(req, parent, "").await?;
181        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
182            .await?;
183
184        self.do_lookup(req, parent, sname.as_str())
185            .await
186            .map_err(|e| e.into())
187    }
188
189    /// create file node. Create a regular file, character device, block device, fifo or socket
190    /// node. When creating file, most cases user only need to implement
191    /// [`create`][Filesystem::create].
192    async fn mknod(
193        &self,
194        req: Request,
195        parent: Inode,
196        name: &OsStr,
197        mode: u32,
198        rdev: u32,
199    ) -> Result<ReplyEntry> {
200        let sname = name.to_string_lossy().to_string();
201
202        // Check if parent exists.
203        let pnode = self.lookup_node(req, parent, "").await?;
204        if pnode.whiteout.load(Ordering::Relaxed) {
205            return Err(Error::from_raw_os_error(libc::ENOENT).into());
206        }
207
208        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
209            .await?;
210        self.do_lookup(req, parent, sname.as_str())
211            .await
212            .map_err(|e| e.into())
213    }
214
215    /// create a directory.
216    async fn mkdir(
217        &self,
218        req: Request,
219        parent: Inode,
220        name: &OsStr,
221        mode: u32,
222        umask: u32,
223    ) -> Result<ReplyEntry> {
224        let sname = name.to_string_lossy().to_string();
225
226        // no entry or whiteout
227        let pnode = self.lookup_node(req, parent, "").await?;
228        if pnode.whiteout.load(Ordering::Relaxed) {
229            return Err(Error::from_raw_os_error(libc::ENOENT).into());
230        }
231
232        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
233            .await?;
234        self.do_lookup(req, parent, sname.as_str())
235            .await
236            .map_err(|e| e.into())
237    }
238
239    /// remove a file.
240    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
241        self.do_rm(req, parent, name, false)
242            .await
243            .map_err(|e| e.into())
244    }
245
246    /// remove a directory.
247    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
248        self.do_rm(req, parent, name, true)
249            .await
250            .map_err(|e| e.into())
251    }
252
253    /// rename a file or directory.
254    async fn rename(
255        &self,
256        req: Request,
257        parent: Inode,
258        name: &OsStr,
259        new_parent: Inode,
260        new_name: &OsStr,
261    ) -> Result<()> {
262        self.do_rename(req, parent, name, new_parent, new_name)
263            .await
264            .map_err(|e| e.into())
265    }
266
267    /// create a hard link.
268    async fn link(
269        &self,
270        req: Request,
271        inode: Inode,
272        new_parent: Inode,
273        new_name: &OsStr,
274    ) -> Result<ReplyEntry> {
275        let node = self.lookup_node(req, inode, "").await?;
276        if node.whiteout.load(Ordering::Relaxed) {
277            return Err(Error::from_raw_os_error(libc::ENOENT).into());
278        }
279
280        let newpnode = self.lookup_node(req, new_parent, "").await?;
281        if newpnode.whiteout.load(Ordering::Relaxed) {
282            return Err(Error::from_raw_os_error(libc::ENOENT).into());
283        }
284        let new_name = new_name.to_str().unwrap();
285        // trace!(
286        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
287        //     inode, new_parent, node.inode, newpnode.inode
288        // );
289        self.do_link(req, &node, &newpnode, new_name).await?;
290        // trace!("LINK: done, looking up new entry");
291        self.do_lookup(req, new_parent, new_name)
292            .await
293            .map_err(|e| e.into())
294    }
295
296    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
297    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
298    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
299    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
300    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
301    /// the file is opened. A filesystem need not implement this method if it
302    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
303    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
304    ///
305    /// # Notes:
306    ///
307    /// See `fuse_file_info` structure in
308    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
309    /// more details.
310    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
311        if self.no_open.load(Ordering::Relaxed) {
312            info!("fuse: open is not supported.");
313            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
314        }
315
316        let readonly: bool = flags
317            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
318                as u32
319            == 0;
320        // toggle flags
321        let mut flags: i32 = flags as i32;
322
323        flags |= libc::O_NOFOLLOW;
324
325        if self.config.writeback {
326            if flags & libc::O_ACCMODE == libc::O_WRONLY {
327                flags &= !libc::O_ACCMODE;
328                flags |= libc::O_RDWR;
329            }
330
331            if flags & libc::O_APPEND != 0 {
332                flags &= !libc::O_APPEND;
333            }
334        }
335        // lookup node
336        let node = self.lookup_node(req, inode, "").await?;
337
338        // whiteout node
339        if node.whiteout.load(Ordering::Relaxed) {
340            return Err(Error::from_raw_os_error(libc::ENOENT).into());
341        }
342
343        if !readonly {
344            // copy up to upper layer
345            self.copy_node_up(req, node.clone()).await?;
346        }
347
348        // assign a handle in overlayfs and open it
349        let (_l, h) = node.open(req, flags as u32, 0).await?;
350
351        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
352        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
353        let handle_data = HandleData {
354            node: node.clone(),
355            real_handle: Some(RealHandle {
356                layer,
357                in_upper_layer,
358                inode,
359                handle: AtomicU64::new(h.fh),
360            }),
361        };
362
363        self.handles.lock().await.insert(hd, Arc::new(handle_data));
364
365        trace!("OPEN: returning handle: {hd}");
366
367        Ok(ReplyOpen {
368            fh: hd,
369            flags: flags as u32,
370        })
371    }
372
373    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
374    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
375    /// when the file has been opened in `direct_io` mode, in which case the return value of the
376    /// read system call will reflect the return value of this operation. `fh` will contain the
377    /// value set by the open method, or will be undefined if the open method didn't set any value.
378    async fn read(
379        &self,
380        req: Request,
381        inode: Inode,
382        fh: u64,
383        offset: u64,
384        size: u32,
385    ) -> Result<ReplyData> {
386        let data = self.get_data(req, Some(fh), inode, 0).await?;
387
388        match data.real_handle {
389            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
390            Some(ref hd) => {
391                hd.layer
392                    .read(
393                        req,
394                        hd.inode,
395                        hd.handle.load(Ordering::Relaxed),
396                        offset,
397                        size,
398                    )
399                    .await
400            }
401        }
402    }
403
404    /// write data. Write should return exactly the number of bytes requested except on error. An
405    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
406    /// return value of the write system call will reflect the return value of this operation. `fh`
407    /// will contain the value set by the open method, or will be undefined if the open method
408    /// didn't set any value. When `write_flags` contains
409    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
410    /// delay write.
411    #[allow(clippy::too_many_arguments)]
412    async fn write(
413        &self,
414        req: Request,
415        inode: Inode,
416        fh: u64,
417        offset: u64,
418        data: &[u8],
419        write_flags: u32,
420        flags: u32,
421    ) -> Result<ReplyWrite> {
422        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
423
424        match handle_data.real_handle {
425            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
426            Some(ref hd) => {
427                hd.layer
428                    .write(
429                        req,
430                        hd.inode,
431                        hd.handle.load(Ordering::Relaxed),
432                        offset,
433                        data,
434                        write_flags,
435                        flags,
436                    )
437                    .await
438            }
439        }
440    }
441
442    /// get filesystem statistics.
443    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
444        self.do_statvfs(req, inode).await.map_err(|e| e.into())
445    }
446
447    /// release an open file. Release is called when there are no more references to an open file:
448    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
449    /// there will be exactly one release call. The filesystem may reply with an error, but error
450    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
451    /// contain the value set by the open method, or will be undefined if the open method didn't
452    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
453    /// data or not when closing file.
454    async fn release(
455        &self,
456        req: Request,
457        _inode: Inode,
458        fh: u64,
459        flags: u32,
460        lock_owner: u64,
461        flush: bool,
462    ) -> Result<()> {
463        if self.no_open.load(Ordering::Relaxed) {
464            info!("fuse: release is not supported.");
465            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
466        }
467
468        if let Some(hd) = self.handles.lock().await.get(&fh) {
469            let rh = if let Some(ref h) = hd.real_handle {
470                h
471            } else {
472                return Err(
473                    Error::other(format!("no real handle found for file handle {fh}")).into(),
474                );
475            };
476            let real_handle = rh.handle.load(Ordering::Relaxed);
477            let real_inode = rh.inode;
478            rh.layer
479                .release(req, real_inode, real_handle, flags, lock_owner, flush)
480                .await?;
481        }
482
483        self.handles.lock().await.remove(&fh);
484
485        Ok(())
486    }
487
488    /// synchronize file contents. If the `datasync` is true, then only the user data should be
489    /// flushed, not the metadata.
490    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
491        self.do_fsync(req, inode, datasync, fh, false)
492            .await
493            .map_err(|e| e.into())
494    }
495
496    /// set an extended attribute.
497    async fn setxattr(
498        &self,
499        req: Request,
500        inode: Inode,
501        name: &OsStr,
502        value: &[u8],
503        flags: u32,
504        position: u32,
505    ) -> Result<()> {
506        let node = self.lookup_node(req, inode, "").await?;
507
508        if node.whiteout.load(Ordering::Relaxed) {
509            return Err(Error::from_raw_os_error(libc::ENOENT).into());
510        }
511
512        if !node.in_upper_layer().await {
513            // Copy node up.
514            self.copy_node_up(req, node.clone()).await?;
515        }
516
517        let (layer, _, real_inode) = node.first_layer_inode().await;
518
519        layer
520            .setxattr(req, real_inode, name, value, flags, position)
521            .await
522    }
523
524    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
525    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
526    /// return an error.
527    async fn getxattr(
528        &self,
529        req: Request,
530        inode: Inode,
531        name: &OsStr,
532        size: u32,
533    ) -> Result<ReplyXAttr> {
534        let node = self.lookup_node(req, inode, "").await?;
535
536        if node.whiteout.load(Ordering::Relaxed) {
537            return Err(Error::from_raw_os_error(libc::ENOENT).into());
538        }
539
540        let (layer, real_inode) = self.find_real_inode(inode).await?;
541
542        layer.getxattr(req, real_inode, name, size).await
543    }
544
545    /// List extended attribute names.
546    ///
547    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
548    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
549    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
550        let node = self.lookup_node(req, inode, "").await?;
551        if node.whiteout.load(Ordering::Relaxed) {
552            return Err(Error::from_raw_os_error(libc::ENOENT).into());
553        }
554        let (layer, real_inode) = self.find_real_inode(inode).await?;
555        layer.listxattr(req, real_inode, size).await
556    }
557
558    /// remove an extended attribute.
559    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
560        let node = self.lookup_node(req, inode, "").await?;
561
562        if node.whiteout.load(Ordering::Relaxed) {
563            return Err(Error::from_raw_os_error(libc::ENOENT).into());
564        }
565
566        if !node.in_upper_layer().await {
567            // copy node into upper layer
568            self.copy_node_up(req, node.clone()).await?;
569        }
570
571        let (layer, _, ino) = node.first_layer_inode().await;
572        layer.removexattr(req, ino, name).await
573
574        // TODO: recreate the node since removexattr may remove the opaque xattr.
575    }
576
577    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
578    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
579    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
580    /// will be called at all. `fh` will contain the value set by the open method, or will be
581    /// undefined if the open method didn't set any value.
582    ///
583    /// # Notes:
584    ///
585    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
586    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
587    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
588    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
589    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
590        if self.no_open.load(Ordering::Relaxed) {
591            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
592        }
593
594        let node = self.lookup_node(req, inode, "").await;
595        match node {
596            Ok(n) => {
597                if n.whiteout.load(Ordering::Relaxed) {
598                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
599                }
600            }
601            Err(e) => {
602                if e.raw_os_error() == Some(libc::ENOENT) {
603                    trace!("flush: inode {inode} is stale");
604                } else {
605                    return Err(e.into());
606                }
607            }
608        }
609
610        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
611
612        // FIXME: need to test if inode matches corresponding handle?
613        if inode
614            != self
615                .handles
616                .lock()
617                .await
618                .get(&fh)
619                .map(|h| h.node.inode)
620                .unwrap_or(0)
621        {
622            return Err(Error::other("inode does not match handle").into());
623        }
624
625        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
626        layer.flush(req, real_inode, real_handle, lock_owner).await
627    }
628
629    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
630    /// `fh`, and use this in other all other directory stream operations
631    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
632    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
633    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
634    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
635    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
636    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
637        if self.no_opendir.load(Ordering::Relaxed) {
638            info!("fuse: opendir is not supported.");
639            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
640        }
641
642        // lookup node
643        let node = self.lookup_node(req, inode, ".").await?;
644
645        if node.whiteout.load(Ordering::Relaxed) {
646            return Err(Error::from_raw_os_error(libc::ENOENT).into());
647        }
648
649        let st = node.stat64(req).await?;
650        if !utils::is_dir(&st.attr.kind) {
651            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
652        }
653
654        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
655        // Get the layer information and open directory in the underlying layer
656        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
657        let reply = layer.opendir(req, real_inode, flags).await?;
658
659        self.handles.lock().await.insert(
660            handle,
661            Arc::new(HandleData {
662                node: Arc::clone(&node),
663                real_handle: Some(RealHandle {
664                    layer,
665                    in_upper_layer,
666                    inode: real_inode,
667                    handle: AtomicU64::new(reply.fh),
668                }),
669            }),
670        );
671
672        Ok(ReplyOpen { fh: handle, flags })
673    }
674
675    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
676    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
677    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
678    async fn readdir<'a>(
679        &'a self,
680        req: Request,
681        parent: Inode,
682        fh: u64,
683        offset: i64,
684    ) -> Result<
685        ReplyDirectory<
686            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
687        >,
688    > {
689        if self.config.no_readdir {
690            info!("fuse: readdir is not supported.");
691            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
692        }
693        let entries = self
694            .do_readdir(req, parent, fh, offset.try_into().unwrap())
695            .await?;
696        Ok(ReplyDirectory { entries })
697    }
698
699    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
700    /// + [`lookup`][Filesystem::lookup] at the same time.
701    async fn readdirplus<'a>(
702        &'a self,
703        req: Request,
704        parent: Inode,
705        fh: u64,
706        offset: u64,
707        _lock_owner: u64,
708    ) -> Result<
709        ReplyDirectoryPlus<
710            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
711        >,
712    > {
713        if self.config.no_readdir {
714            info!("fuse: readdir is not supported.");
715            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
716        }
717        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
718        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
719        match self.handles.lock().await.get(&fh) {
720            Some(h) => {
721                trace!(
722                    "after readdirplus: found handle, seeing real_handle: {}",
723                    h.real_handle.is_some()
724                );
725            }
726            None => trace!("after readdirplus: no handle found: {fh}"),
727        }
728        Ok(ReplyDirectoryPlus { entries })
729    }
730    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
731    /// be exactly one `releasedir` call. `fh` will contain the value set by the
732    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
733    /// [`opendir`][Filesystem::opendir] method didn't set any value.
734    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
735        if self.no_opendir.load(Ordering::Relaxed) {
736            info!("fuse: releasedir is not supported.");
737            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
738        }
739
740        if let Some(hd) = self.handles.lock().await.get(&fh) {
741            let rh = if let Some(ref h) = hd.real_handle {
742                h
743            } else {
744                return Err(
745                    Error::other(format!("no real handle found for file handle {fh}")).into(),
746                );
747            };
748            let real_handle = rh.handle.load(Ordering::Relaxed);
749            let real_inode = rh.inode;
750            rh.layer
751                .releasedir(req, real_inode, real_handle, flags)
752                .await?;
753        }
754
755        self.handles.lock().await.remove(&fh);
756        Ok(())
757    }
758
759    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
760    /// should be flushed, not the metadata. `fh` will contain the value set by the
761    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
762    /// [`opendir`][Filesystem::opendir] method didn't set any value.
763    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
764        self.do_fsync(req, inode, datasync, fh, true)
765            .await
766            .map_err(|e| e.into())
767    }
768    /// check file access permissions. This will be called for the `access()` system call. If the
769    /// `default_permissions` mount option is given, this method is not be called. This method is
770    /// not called under Linux kernel versions 2.4.x.
771    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
772        let node = self.lookup_node(req, inode, "").await?;
773
774        if node.whiteout.load(Ordering::Relaxed) {
775            return Err(Error::from_raw_os_error(libc::ENOENT).into());
776        }
777
778        let (layer, real_inode) = self.find_real_inode(inode).await?;
779        layer.access(req, real_inode, mask).await
780    }
781
782    /// create and open a file. If the file does not exist, first create it with the specified
783    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
784    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
785    /// this in other all other file operations ([`read`][Filesystem::read],
786    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
787    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
788    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
789    /// opened. If this method is not implemented or under Linux kernel versions earlier than
790    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
791    /// called instead.
792    ///
793    /// # Notes:
794    ///
795    /// See `fuse_file_info` structure in
796    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
797    /// more details.
798    async fn create(
799        &self,
800        req: Request,
801        parent: Inode,
802        name: &OsStr,
803        mode: u32,
804        flags: u32,
805    ) -> Result<ReplyCreated> {
806        // Parent doesn't exist.
807        let pnode = self.lookup_node(req, parent, "").await?;
808        if pnode.whiteout.load(Ordering::Relaxed) {
809            return Err(Error::from_raw_os_error(libc::ENOENT).into());
810        }
811
812        let mut flags: i32 = flags as i32;
813        flags |= libc::O_NOFOLLOW;
814        flags &= !libc::O_DIRECT;
815        if self.config.writeback {
816            if flags & libc::O_ACCMODE == libc::O_WRONLY {
817                flags &= !libc::O_ACCMODE;
818                flags |= libc::O_RDWR;
819            }
820
821            if flags & libc::O_APPEND != 0 {
822                flags &= !libc::O_APPEND;
823            }
824        }
825
826        let final_handle = self
827            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
828            .await?;
829        let entry = self.do_lookup(req, parent, name.to_str().unwrap()).await?;
830        let fh = final_handle
831            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
832
833        let mut opts = OpenOptions::empty();
834        match self.config.cache_policy {
835            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
836            CachePolicy::Auto => opts |= OpenOptions::DIRECT_IO,
837            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
838        }
839
840        Ok(ReplyCreated {
841            ttl: entry.ttl,
842            attr: entry.attr,
843            generation: entry.generation,
844            fh,
845            flags: opts.bits(),
846        })
847    }
848
849    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
850    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
851        for inode in inodes {
852            self.forget_one(inode.0, inode.1).await;
853        }
854    }
855
856    /// allocate space for an open file. This function ensures that required space is allocated for
857    /// specified file.
858    ///
859    /// # Notes:
860    ///
861    /// more information about `fallocate`, please see **`man 2 fallocate`**
862    async fn fallocate(
863        &self,
864        req: Request,
865        inode: Inode,
866        fh: u64,
867        offset: u64,
868        length: u64,
869        mode: u32,
870    ) -> Result<()> {
871        // Use O_RDONLY flags which indicates no copy up.
872        let data = self
873            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
874            .await?;
875
876        match data.real_handle {
877            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
878            Some(ref rhd) => {
879                if !rhd.in_upper_layer {
880                    // TODO: in lower layer, error out or just success?
881                    return Err(Error::from_raw_os_error(libc::EROFS).into());
882                }
883                rhd.layer
884                    .fallocate(
885                        req,
886                        rhd.inode,
887                        rhd.handle.load(Ordering::Relaxed),
888                        offset,
889                        length,
890                        mode,
891                    )
892                    .await
893            }
894        }
895    }
896
897    /// find next data or hole after the specified offset.
898    async fn lseek(
899        &self,
900        req: Request,
901        inode: Inode,
902        fh: u64,
903        offset: u64,
904        whence: u32,
905    ) -> Result<ReplyLSeek> {
906        let node = self.lookup_node(req, inode, "").await?;
907
908        if node.whiteout.load(Ordering::Relaxed) {
909            return Err(Error::from_raw_os_error(libc::ENOENT).into());
910        }
911
912        let st = node.stat64(req).await?;
913        if utils::is_dir(&st.attr.kind) {
914            // Special handling and security restrictions for directory operations.
915            // Use the common API to obtain the underlying layer and handle info.
916            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
917
918            // Verify that the underlying handle refers to a directory.
919            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
920                Ok(s) => s,
921                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
922            };
923
924            if !utils::is_dir(&handle_stat.attr.kind) {
925                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
926            }
927
928            // Handle directory lseek operations according to POSIX standard
929            // This enables seekdir/telldir functionality on directories
930            match whence {
931                // SEEK_SET: Set the directory position to an absolute value
932                x if x == libc::SEEK_SET as u32 => {
933                    // Validate offset bounds to prevent overflow
934                    // Directory offsets should not exceed i64::MAX
935                    if offset > i64::MAX as u64 {
936                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
937                    }
938
939                    // Perform the seek operation on the underlying layer
940                    // Delegate to the lower layer implementation
941                    layer
942                        .lseek(req, real_inode, real_handle, offset, whence)
943                        .await
944                }
945                // SEEK_CUR: Move relative to the current directory position
946                x if x == libc::SEEK_CUR as u32 => {
947                    // Get current position from underlying layer
948                    // This is needed to calculate the new position
949                    let current = match layer
950                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
951                        .await
952                    {
953                        Ok(r) => r.offset,
954                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
955                    };
956
957                    // Check for potential overflow when adding the provided offset
958                    // This prevents invalid position calculations
959                    if let Some(new_offset) = current.checked_add(offset) {
960                        // Ensure the new offset is within valid bounds
961                        if new_offset > i64::MAX as u64 {
962                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
963                        }
964
965                        // Actually set the underlying offset to the new value so behavior
966                        // matches passthrough which uses libc::lseek64 to set the fd offset.
967                        match layer
968                            .lseek(
969                                req,
970                                real_inode,
971                                real_handle,
972                                new_offset,
973                                libc::SEEK_SET as u32,
974                            )
975                            .await
976                        {
977                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
978                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
979                        }
980                    } else {
981                        Err(Error::from_raw_os_error(libc::EINVAL).into())
982                    }
983                }
984                // Any other whence value is invalid for directories
985                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
986            }
987        } else {
988            // Keep the original lseek behavior for regular files
989            // Delegate directly to the underlying layer
990            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
991            layer
992                .lseek(req, real_inode, real_handle, offset, whence)
993                .await
994        }
995    }
996
997    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
998        Ok(())
999    }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1005
1006    use rfuse3::{MountOptions, raw::Session};
1007    use tokio::signal;
1008    use tracing_subscriber::EnvFilter;
1009
1010    use crate::unionfs::BoxedLayer;
1011    use crate::{
1012        passthrough::{PassthroughArgs, new_passthroughfs_layer, newlogfs::LoggingFileSystem},
1013        unionfs::{OverlayFs, config::Config},
1014    };
1015
1016    #[tokio::test]
1017    #[ignore]
1018    async fn test_a_ovlfs() {
1019        let _ = tracing_subscriber::fmt()
1020            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1021            .try_init();
1022
1023        // Set up test environment
1024        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1025        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1026        let upperdir = PathBuf::from("/home/luxian/upper");
1027
1028        // Create lower layers
1029        let mut lower_layers: Vec<Arc<BoxedLayer>> = Vec::new();
1030        for lower in &lowerdir {
1031            let layer = new_passthroughfs_layer(PassthroughArgs {
1032                root_dir: lower.clone(),
1033                mapping: None::<&str>,
1034            })
1035            .await
1036            .unwrap();
1037            lower_layers.push(Arc::new(layer) as Arc<BoxedLayer>);
1038        }
1039        // Create upper layer
1040        let upper_layer: Arc<BoxedLayer> = Arc::new(
1041            new_passthroughfs_layer(PassthroughArgs {
1042                root_dir: upperdir,
1043                mapping: None::<&str>,
1044            })
1045            .await
1046            .unwrap(),
1047        );
1048        // Create overlayfs
1049        let config = Config {
1050            mountpoint: mountpoint.clone(),
1051            do_import: true,
1052            ..Default::default()
1053        };
1054
1055        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1056
1057        let logfs = LoggingFileSystem::new(overlayfs);
1058
1059        let mount_path: OsString = OsString::from(mountpoint);
1060
1061        let uid = unsafe { libc::getuid() };
1062        let gid = unsafe { libc::getgid() };
1063
1064        let not_unprivileged = false;
1065
1066        let mut mount_options = MountOptions::default();
1067        // .allow_other(true)
1068        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
1069
1070        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1071            Session::new(mount_options)
1072                .mount_with_unprivileged(logfs, mount_path)
1073                .await
1074                .unwrap()
1075        } else {
1076            Session::new(mount_options)
1077                .mount(logfs, mount_path)
1078                .await
1079                .unwrap()
1080        };
1081
1082        let handle = &mut mount_handle;
1083
1084        tokio::select! {
1085            res = handle => res.unwrap(),
1086            _ = signal::ctrl_c() => {
1087                mount_handle.unmount().await.unwrap()
1088            }
1089        }
1090    }
1091}