libfuse_fs/unionfs/
async_io.rs

1use super::utils;
2use super::{CachePolicy, HandleData, Inode, OverlayFs, RealHandle};
3use crate::util::open_options::OpenOptions;
4use rfuse3::raw::prelude::*;
5use rfuse3::*;
6use std::ffi::OsStr;
7use std::io::Error;
8use std::io::ErrorKind;
9use std::num::NonZeroU32;
10use std::sync::Arc;
11use std::sync::atomic::{AtomicU64, Ordering};
12use tracing::info;
13use tracing::trace;
14
15impl Filesystem for OverlayFs {
16    /// initialize filesystem. Called before any other filesystem method.
17    async fn init(&self, req: Request) -> Result<ReplyInit> {
18        for layer in self.lower_layers.iter() {
19            layer.init(req).await?;
20        }
21        if let Some(upper) = &self.upper_layer {
22            upper.init(req).await?;
23        }
24        if self.config.do_import {
25            self.import().await?;
26        }
27        if !self.config.do_import || self.config.writeback {
28            self.writeback.store(true, Ordering::Relaxed);
29        }
30        if !self.config.do_import || self.config.no_open {
31            self.no_open.store(true, Ordering::Relaxed);
32        }
33        if !self.config.do_import || self.config.no_opendir {
34            self.no_opendir.store(true, Ordering::Relaxed);
35        }
36        if !self.config.do_import || self.config.killpriv_v2 {
37            self.killpriv_v2.store(true, Ordering::Relaxed);
38        }
39        if self.config.perfile_dax {
40            self.perfile_dax.store(true, Ordering::Relaxed);
41        }
42
43        Ok(ReplyInit {
44            max_write: NonZeroU32::new(128 * 1024).unwrap(),
45        })
46    }
47
48    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
49    /// kernel may call forget for root. There is some discuss for this
50    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
51    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
52    async fn destroy(&self, _req: Request) {}
53
54    /// look up a directory entry by name and get its attributes.
55    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
56        let tmp = name.to_string_lossy().to_string();
57        let result = self.do_lookup(req, parent, tmp.as_str()).await;
58        match result {
59            Ok(e) => Ok(e),
60            Err(err) => Err(err.into()),
61        }
62    }
63
64    /// forget an inode. The nlookup parameter indicates the number of lookups previously
65    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
66    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
67    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
68    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
69    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
70    /// request for root and this library will stop session after call forget. There is some
71    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
72    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
73    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
74        self.forget_one(inode, nlookup).await;
75    }
76
77    /// get file attributes. If `fh` is None, means `fh` is not set.
78    async fn getattr(
79        &self,
80        req: Request,
81        inode: Inode,
82        fh: Option<u64>,
83        flags: u32,
84    ) -> Result<ReplyAttr> {
85        if !self.no_open.load(Ordering::Relaxed)
86            && let Some(h) = fh
87        {
88            let handles = self.handles.lock().await;
89            if let Some(hd) = handles.get(&h)
90                && let Some(ref rh) = hd.real_handle
91            {
92                let mut rep: ReplyAttr = rh
93                    .layer
94                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
95                    .await?;
96                rep.attr.ino = inode;
97                return Ok(rep);
98            }
99        }
100
101        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
102        let (layer, _, lower_inode) = node.first_layer_inode().await;
103        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
104        re.attr.ino = inode;
105        Ok(re)
106    }
107
108    /// set file attributes. If `fh` is None, means `fh` is not set.
109    async fn setattr(
110        &self,
111        req: Request,
112        inode: Inode,
113        fh: Option<u64>,
114        set_attr: SetAttr,
115    ) -> Result<ReplyAttr> {
116        // Check if upper layer exists.
117        self.upper_layer
118            .as_ref()
119            .cloned()
120            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
121
122        // deal with handle first
123        if !self.no_open.load(Ordering::Relaxed)
124            && let Some(h) = fh
125        {
126            let handles = self.handles.lock().await;
127            if let Some(hd) = handles.get(&h)
128                && let Some(ref rhd) = hd.real_handle
129            {
130                // handle opened in upper layer
131                if rhd.in_upper_layer {
132                    let mut rep = rhd
133                        .layer
134                        .setattr(
135                            req,
136                            rhd.inode,
137                            Some(rhd.handle.load(Ordering::Relaxed)),
138                            set_attr,
139                        )
140                        .await?;
141                    rep.attr.ino = inode;
142                    return Ok(rep);
143                }
144            }
145        }
146
147        let mut node = self.lookup_node(req, inode, "").await?;
148
149        if !node.in_upper_layer().await {
150            node = self.copy_node_up(req, node.clone()).await?
151        }
152
153        let (layer, _, real_inode) = node.first_layer_inode().await;
154        // layer.setattr(req, real_inode, None, set_attr).await
155        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
156        rep.attr.ino = inode;
157        Ok(rep)
158    }
159
160    /// read symbolic link.
161    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
162        trace!("READLINK: inode: {inode}\n");
163
164        let node = self.lookup_node(req, inode, "").await?;
165
166        if node.whiteout.load(Ordering::Relaxed) {
167            return Err(Error::from_raw_os_error(libc::ENOENT).into());
168        }
169
170        let (layer, _, inode) = node.first_layer_inode().await;
171        layer.readlink(req, inode).await
172    }
173
174    /// create a symbolic link.
175    async fn symlink(
176        &self,
177        req: Request,
178        parent: Inode,
179        name: &OsStr,
180        link: &OsStr,
181    ) -> Result<ReplyEntry> {
182        // soft link
183        let sname = name.to_string_lossy().into_owned().to_owned();
184        let slinkname = link.to_string_lossy().into_owned().to_owned();
185
186        let pnode = self.lookup_node(req, parent, "").await?;
187        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
188            .await?;
189
190        self.do_lookup(req, parent, sname.as_str())
191            .await
192            .map_err(|e| e.into())
193    }
194
195    /// create file node. Create a regular file, character device, block device, fifo or socket
196    /// node. When creating file, most cases user only need to implement
197    /// [`create`][Filesystem::create].
198    async fn mknod(
199        &self,
200        req: Request,
201        parent: Inode,
202        name: &OsStr,
203        mode: u32,
204        rdev: u32,
205    ) -> Result<ReplyEntry> {
206        let sname = name.to_string_lossy().to_string();
207
208        // Check if parent exists.
209        let pnode = self.lookup_node(req, parent, "").await?;
210        if pnode.whiteout.load(Ordering::Relaxed) {
211            return Err(Error::from_raw_os_error(libc::ENOENT).into());
212        }
213
214        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
215            .await?;
216        self.do_lookup(req, parent, sname.as_str())
217            .await
218            .map_err(|e| e.into())
219    }
220
221    /// create a directory.
222    async fn mkdir(
223        &self,
224        req: Request,
225        parent: Inode,
226        name: &OsStr,
227        mode: u32,
228        umask: u32,
229    ) -> Result<ReplyEntry> {
230        let sname = name.to_string_lossy().to_string();
231
232        // no entry or whiteout
233        let pnode = self.lookup_node(req, parent, "").await?;
234        if pnode.whiteout.load(Ordering::Relaxed) {
235            return Err(Error::from_raw_os_error(libc::ENOENT).into());
236        }
237
238        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
239            .await?;
240        self.do_lookup(req, parent, sname.as_str())
241            .await
242            .map_err(|e| e.into())
243    }
244
245    /// remove a file.
246    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
247        self.do_rm(req, parent, name, false)
248            .await
249            .map_err(|e| e.into())
250    }
251
252    /// remove a directory.
253    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
254        self.do_rm(req, parent, name, true)
255            .await
256            .map_err(|e| e.into())
257    }
258
259    /// rename a file or directory.
260    async fn rename(
261        &self,
262        req: Request,
263        parent: Inode,
264        name: &OsStr,
265        new_parent: Inode,
266        new_name: &OsStr,
267    ) -> Result<()> {
268        self.do_rename(req, parent, name, new_parent, new_name)
269            .await
270            .map_err(|e| e.into())
271    }
272
273    /// create a hard link.
274    async fn link(
275        &self,
276        req: Request,
277        inode: Inode,
278        new_parent: Inode,
279        new_name: &OsStr,
280    ) -> Result<ReplyEntry> {
281        let node = self.lookup_node(req, inode, "").await?;
282        if node.whiteout.load(Ordering::Relaxed) {
283            return Err(Error::from_raw_os_error(libc::ENOENT).into());
284        }
285
286        let newpnode = self.lookup_node(req, new_parent, "").await?;
287        if newpnode.whiteout.load(Ordering::Relaxed) {
288            return Err(Error::from_raw_os_error(libc::ENOENT).into());
289        }
290        let new_name = new_name.to_str().unwrap();
291        // trace!(
292        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
293        //     inode, new_parent, node.inode, newpnode.inode
294        // );
295        self.do_link(req, &node, &newpnode, new_name).await?;
296        // trace!("LINK: done, looking up new entry");
297        self.do_lookup(req, new_parent, new_name)
298            .await
299            .map_err(|e| e.into())
300    }
301
302    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
303    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
304    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
305    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
306    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
307    /// the file is opened. A filesystem need not implement this method if it
308    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
309    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
310    ///
311    /// # Notes:
312    ///
313    /// See `fuse_file_info` structure in
314    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
315    /// more details.
316    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
317        if self.no_open.load(Ordering::Relaxed) {
318            info!("fuse: open is not supported.");
319            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
320        }
321
322        let readonly: bool = flags
323            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
324                as u32
325            == 0;
326        // toggle flags
327        let mut flags: i32 = flags as i32;
328
329        flags |= libc::O_NOFOLLOW;
330
331        if self.config.writeback {
332            if flags & libc::O_ACCMODE == libc::O_WRONLY {
333                flags &= !libc::O_ACCMODE;
334                flags |= libc::O_RDWR;
335            }
336
337            if flags & libc::O_APPEND != 0 {
338                flags &= !libc::O_APPEND;
339            }
340        }
341        // lookup node
342        let node = self.lookup_node(req, inode, "").await?;
343
344        // whiteout node
345        if node.whiteout.load(Ordering::Relaxed) {
346            return Err(Error::from_raw_os_error(libc::ENOENT).into());
347        }
348
349        if !readonly {
350            // copy up to upper layer
351            self.copy_node_up(req, node.clone()).await?;
352        }
353
354        // assign a handle in overlayfs and open it
355        let (_l, h) = node.open(req, flags as u32, 0).await?;
356
357        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
358        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
359        let handle_data = HandleData {
360            node: node.clone(),
361            real_handle: Some(RealHandle {
362                layer,
363                in_upper_layer,
364                inode,
365                handle: AtomicU64::new(h.fh),
366            }),
367        };
368
369        self.handles.lock().await.insert(hd, Arc::new(handle_data));
370
371        trace!("OPEN: returning handle: {hd}");
372
373        Ok(ReplyOpen {
374            fh: hd,
375            flags: flags as u32,
376        })
377    }
378
379    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
380    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
381    /// when the file has been opened in `direct_io` mode, in which case the return value of the
382    /// read system call will reflect the return value of this operation. `fh` will contain the
383    /// value set by the open method, or will be undefined if the open method didn't set any value.
384    async fn read(
385        &self,
386        req: Request,
387        inode: Inode,
388        fh: u64,
389        offset: u64,
390        size: u32,
391    ) -> Result<ReplyData> {
392        let data = self.get_data(req, Some(fh), inode, 0).await?;
393
394        match data.real_handle {
395            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
396            Some(ref hd) => {
397                hd.layer
398                    .read(
399                        req,
400                        hd.inode,
401                        hd.handle.load(Ordering::Relaxed),
402                        offset,
403                        size,
404                    )
405                    .await
406            }
407        }
408    }
409
410    /// write data. Write should return exactly the number of bytes requested except on error. An
411    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
412    /// return value of the write system call will reflect the return value of this operation. `fh`
413    /// will contain the value set by the open method, or will be undefined if the open method
414    /// didn't set any value. When `write_flags` contains
415    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
416    /// delay write.
417    #[allow(clippy::too_many_arguments)]
418    async fn write(
419        &self,
420        req: Request,
421        inode: Inode,
422        fh: u64,
423        offset: u64,
424        data: &[u8],
425        write_flags: u32,
426        flags: u32,
427    ) -> Result<ReplyWrite> {
428        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
429
430        match handle_data.real_handle {
431            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
432            Some(ref hd) => {
433                hd.layer
434                    .write(
435                        req,
436                        hd.inode,
437                        hd.handle.load(Ordering::Relaxed),
438                        offset,
439                        data,
440                        write_flags,
441                        flags,
442                    )
443                    .await
444            }
445        }
446    }
447
448    /// get filesystem statistics.
449    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
450        self.do_statvfs(req, inode).await.map_err(|e| e.into())
451    }
452
453    /// release an open file. Release is called when there are no more references to an open file:
454    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
455    /// there will be exactly one release call. The filesystem may reply with an error, but error
456    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
457    /// contain the value set by the open method, or will be undefined if the open method didn't
458    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
459    /// data or not when closing file.
460    async fn release(
461        &self,
462        req: Request,
463        _inode: Inode,
464        fh: u64,
465        flags: u32,
466        lock_owner: u64,
467        flush: bool,
468    ) -> Result<()> {
469        if self.no_open.load(Ordering::Relaxed) {
470            info!("fuse: release is not supported.");
471            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
472        }
473
474        if let Some(hd) = self.handles.lock().await.get(&fh) {
475            let rh = if let Some(ref h) = hd.real_handle {
476                h
477            } else {
478                return Err(
479                    Error::other(format!("no real handle found for file handle {fh}")).into(),
480                );
481            };
482            let real_handle = rh.handle.load(Ordering::Relaxed);
483            let real_inode = rh.inode;
484            rh.layer
485                .release(req, real_inode, real_handle, flags, lock_owner, flush)
486                .await?;
487        }
488
489        self.handles.lock().await.remove(&fh);
490
491        Ok(())
492    }
493
494    /// synchronize file contents. If the `datasync` is true, then only the user data should be
495    /// flushed, not the metadata.
496    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
497        self.do_fsync(req, inode, datasync, fh, false)
498            .await
499            .map_err(|e| e.into())
500    }
501
502    /// set an extended attribute.
503    async fn setxattr(
504        &self,
505        req: Request,
506        inode: Inode,
507        name: &OsStr,
508        value: &[u8],
509        flags: u32,
510        position: u32,
511    ) -> Result<()> {
512        let node = self.lookup_node(req, inode, "").await?;
513
514        if node.whiteout.load(Ordering::Relaxed) {
515            return Err(Error::from_raw_os_error(libc::ENOENT).into());
516        }
517
518        if !node.in_upper_layer().await {
519            // Copy node up.
520            self.copy_node_up(req, node.clone()).await?;
521        }
522
523        let (layer, _, real_inode) = node.first_layer_inode().await;
524
525        layer
526            .setxattr(req, real_inode, name, value, flags, position)
527            .await
528    }
529
530    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
531    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
532    /// return an error.
533    async fn getxattr(
534        &self,
535        req: Request,
536        inode: Inode,
537        name: &OsStr,
538        size: u32,
539    ) -> Result<ReplyXAttr> {
540        let node = self.lookup_node(req, inode, "").await?;
541
542        if node.whiteout.load(Ordering::Relaxed) {
543            return Err(Error::from_raw_os_error(libc::ENOENT).into());
544        }
545
546        let (layer, real_inode) = self.find_real_inode(inode).await?;
547
548        layer.getxattr(req, real_inode, name, size).await
549    }
550
551    /// List extended attribute names.
552    ///
553    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
554    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
555    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
556        let node = self.lookup_node(req, inode, "").await?;
557        if node.whiteout.load(Ordering::Relaxed) {
558            return Err(Error::from_raw_os_error(libc::ENOENT).into());
559        }
560        let (layer, real_inode) = self.find_real_inode(inode).await?;
561        layer.listxattr(req, real_inode, size).await
562    }
563
564    /// remove an extended attribute.
565    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
566        let node = self.lookup_node(req, inode, "").await?;
567
568        if node.whiteout.load(Ordering::Relaxed) {
569            return Err(Error::from_raw_os_error(libc::ENOENT).into());
570        }
571
572        if !node.in_upper_layer().await {
573            // copy node into upper layer
574            self.copy_node_up(req, node.clone()).await?;
575        }
576
577        let (layer, _, ino) = node.first_layer_inode().await;
578        layer.removexattr(req, ino, name).await
579
580        // TODO: recreate the node since removexattr may remove the opaque xattr.
581    }
582
583    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
584    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
585    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
586    /// will be called at all. `fh` will contain the value set by the open method, or will be
587    /// undefined if the open method didn't set any value.
588    ///
589    /// # Notes:
590    ///
591    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
592    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
593    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
594    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
595    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
596        if self.no_open.load(Ordering::Relaxed) {
597            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
598        }
599
600        let node = self.lookup_node(req, inode, "").await;
601        match node {
602            Ok(n) => {
603                if n.whiteout.load(Ordering::Relaxed) {
604                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
605                }
606            }
607            Err(e) => {
608                if e.raw_os_error() == Some(libc::ENOENT) {
609                    trace!("flush: inode {inode} is stale");
610                } else {
611                    return Err(e.into());
612                }
613            }
614        }
615
616        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
617
618        // FIXME: need to test if inode matches corresponding handle?
619        if inode
620            != self
621                .handles
622                .lock()
623                .await
624                .get(&fh)
625                .map(|h| h.node.inode)
626                .unwrap_or(0)
627        {
628            return Err(Error::other("inode does not match handle").into());
629        }
630
631        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
632        layer.flush(req, real_inode, real_handle, lock_owner).await
633    }
634
635    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
636    /// `fh`, and use this in other all other directory stream operations
637    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
638    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
639    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
640    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
641    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
642    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
643        if self.no_opendir.load(Ordering::Relaxed) {
644            info!("fuse: opendir is not supported.");
645            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
646        }
647
648        // lookup node
649        let node = self.lookup_node(req, inode, ".").await?;
650
651        if node.whiteout.load(Ordering::Relaxed) {
652            return Err(Error::from_raw_os_error(libc::ENOENT).into());
653        }
654
655        let st = node.stat64(req).await?;
656        if !utils::is_dir(&st.attr.kind) {
657            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
658        }
659
660        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
661        // Get the layer information and open directory in the underlying layer
662        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
663        let reply = layer.opendir(req, real_inode, flags).await?;
664
665        self.handles.lock().await.insert(
666            handle,
667            Arc::new(HandleData {
668                node: Arc::clone(&node),
669                real_handle: Some(RealHandle {
670                    layer,
671                    in_upper_layer,
672                    inode: real_inode,
673                    handle: AtomicU64::new(reply.fh),
674                }),
675            }),
676        );
677
678        Ok(ReplyOpen { fh: handle, flags })
679    }
680
681    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
682    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
683    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
684    async fn readdir<'a>(
685        &'a self,
686        req: Request,
687        parent: Inode,
688        fh: u64,
689        offset: i64,
690    ) -> Result<
691        ReplyDirectory<
692            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
693        >,
694    > {
695        if self.config.no_readdir {
696            info!("fuse: readdir is not supported.");
697            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
698        }
699        let entries = self
700            .do_readdir(req, parent, fh, offset.try_into().unwrap())
701            .await?;
702        Ok(ReplyDirectory { entries })
703    }
704
705    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
706    /// + [`lookup`][Filesystem::lookup] at the same time.
707    async fn readdirplus<'a>(
708        &'a self,
709        req: Request,
710        parent: Inode,
711        fh: u64,
712        offset: u64,
713        _lock_owner: u64,
714    ) -> Result<
715        ReplyDirectoryPlus<
716            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
717        >,
718    > {
719        if self.config.no_readdir {
720            info!("fuse: readdir is not supported.");
721            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
722        }
723        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
724        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
725        match self.handles.lock().await.get(&fh) {
726            Some(h) => {
727                trace!(
728                    "after readdirplus: found handle, seeing real_handle: {}",
729                    h.real_handle.is_some()
730                );
731            }
732            None => trace!("after readdirplus: no handle found: {fh}"),
733        }
734        Ok(ReplyDirectoryPlus { entries })
735    }
736    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
737    /// be exactly one `releasedir` call. `fh` will contain the value set by the
738    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
739    /// [`opendir`][Filesystem::opendir] method didn't set any value.
740    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
741        if self.no_opendir.load(Ordering::Relaxed) {
742            info!("fuse: releasedir is not supported.");
743            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
744        }
745
746        if let Some(hd) = self.handles.lock().await.get(&fh) {
747            let rh = if let Some(ref h) = hd.real_handle {
748                h
749            } else {
750                return Err(
751                    Error::other(format!("no real handle found for file handle {fh}")).into(),
752                );
753            };
754            let real_handle = rh.handle.load(Ordering::Relaxed);
755            let real_inode = rh.inode;
756            rh.layer
757                .releasedir(req, real_inode, real_handle, flags)
758                .await?;
759        }
760
761        self.handles.lock().await.remove(&fh);
762        Ok(())
763    }
764
765    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
766    /// should be flushed, not the metadata. `fh` will contain the value set by the
767    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
768    /// [`opendir`][Filesystem::opendir] method didn't set any value.
769    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
770        self.do_fsync(req, inode, datasync, fh, true)
771            .await
772            .map_err(|e| e.into())
773    }
774    /// check file access permissions. This will be called for the `access()` system call. If the
775    /// `default_permissions` mount option is given, this method is not be called. This method is
776    /// not called under Linux kernel versions 2.4.x.
777    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
778        let node = self.lookup_node(req, inode, "").await?;
779
780        if node.whiteout.load(Ordering::Relaxed) {
781            return Err(Error::from_raw_os_error(libc::ENOENT).into());
782        }
783
784        let (layer, real_inode) = self.find_real_inode(inode).await?;
785        layer.access(req, real_inode, mask).await
786    }
787
788    /// create and open a file. If the file does not exist, first create it with the specified
789    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
790    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
791    /// this in other all other file operations ([`read`][Filesystem::read],
792    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
793    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
794    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
795    /// opened. If this method is not implemented or under Linux kernel versions earlier than
796    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
797    /// called instead.
798    ///
799    /// # Notes:
800    ///
801    /// See `fuse_file_info` structure in
802    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
803    /// more details.
804    async fn create(
805        &self,
806        req: Request,
807        parent: Inode,
808        name: &OsStr,
809        mode: u32,
810        flags: u32,
811    ) -> Result<ReplyCreated> {
812        // Parent doesn't exist.
813        let pnode = self.lookup_node(req, parent, "").await?;
814        if pnode.whiteout.load(Ordering::Relaxed) {
815            return Err(Error::from_raw_os_error(libc::ENOENT).into());
816        }
817
818        let mut flags: i32 = flags as i32;
819        flags |= libc::O_NOFOLLOW;
820        flags &= !libc::O_DIRECT;
821        if self.config.writeback {
822            if flags & libc::O_ACCMODE == libc::O_WRONLY {
823                flags &= !libc::O_ACCMODE;
824                flags |= libc::O_RDWR;
825            }
826
827            if flags & libc::O_APPEND != 0 {
828                flags &= !libc::O_APPEND;
829            }
830        }
831
832        let final_handle = self
833            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
834            .await?;
835        let entry = self.do_lookup(req, parent, name.to_str().unwrap()).await?;
836        let fh = final_handle
837            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
838
839        let mut opts = OpenOptions::empty();
840        match self.config.cache_policy {
841            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
842            CachePolicy::Auto => opts |= OpenOptions::DIRECT_IO,
843            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
844        }
845
846        Ok(ReplyCreated {
847            ttl: entry.ttl,
848            attr: entry.attr,
849            generation: entry.generation,
850            fh,
851            flags: opts.bits(),
852        })
853    }
854
855    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
856    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
857        for inode in inodes {
858            self.forget_one(inode.0, inode.1).await;
859        }
860    }
861
862    /// allocate space for an open file. This function ensures that required space is allocated for
863    /// specified file.
864    ///
865    /// # Notes:
866    ///
867    /// more information about `fallocate`, please see **`man 2 fallocate`**
868    async fn fallocate(
869        &self,
870        req: Request,
871        inode: Inode,
872        fh: u64,
873        offset: u64,
874        length: u64,
875        mode: u32,
876    ) -> Result<()> {
877        // Use O_RDONLY flags which indicates no copy up.
878        let data = self
879            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
880            .await?;
881
882        match data.real_handle {
883            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
884            Some(ref rhd) => {
885                if !rhd.in_upper_layer {
886                    // TODO: in lower layer, error out or just success?
887                    return Err(Error::from_raw_os_error(libc::EROFS).into());
888                }
889                rhd.layer
890                    .fallocate(
891                        req,
892                        rhd.inode,
893                        rhd.handle.load(Ordering::Relaxed),
894                        offset,
895                        length,
896                        mode,
897                    )
898                    .await
899            }
900        }
901    }
902
903    /// find next data or hole after the specified offset.
904    async fn lseek(
905        &self,
906        req: Request,
907        inode: Inode,
908        fh: u64,
909        offset: u64,
910        whence: u32,
911    ) -> Result<ReplyLSeek> {
912        let node = self.lookup_node(req, inode, "").await?;
913
914        if node.whiteout.load(Ordering::Relaxed) {
915            return Err(Error::from_raw_os_error(libc::ENOENT).into());
916        }
917
918        let st = node.stat64(req).await?;
919        if utils::is_dir(&st.attr.kind) {
920            // Special handling and security restrictions for directory operations.
921            // Use the common API to obtain the underlying layer and handle info.
922            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
923
924            // Verify that the underlying handle refers to a directory.
925            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
926                Ok(s) => s,
927                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
928            };
929
930            if !utils::is_dir(&handle_stat.attr.kind) {
931                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
932            }
933
934            // Handle directory lseek operations according to POSIX standard
935            // This enables seekdir/telldir functionality on directories
936            match whence {
937                // SEEK_SET: Set the directory position to an absolute value
938                x if x == libc::SEEK_SET as u32 => {
939                    // Validate offset bounds to prevent overflow
940                    // Directory offsets should not exceed i64::MAX
941                    if offset > i64::MAX as u64 {
942                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
943                    }
944
945                    // Perform the seek operation on the underlying layer
946                    // Delegate to the lower layer implementation
947                    layer
948                        .lseek(req, real_inode, real_handle, offset, whence)
949                        .await
950                }
951                // SEEK_CUR: Move relative to the current directory position
952                x if x == libc::SEEK_CUR as u32 => {
953                    // Get current position from underlying layer
954                    // This is needed to calculate the new position
955                    let current = match layer
956                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
957                        .await
958                    {
959                        Ok(r) => r.offset,
960                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
961                    };
962
963                    // Check for potential overflow when adding the provided offset
964                    // This prevents invalid position calculations
965                    if let Some(new_offset) = current.checked_add(offset) {
966                        // Ensure the new offset is within valid bounds
967                        if new_offset > i64::MAX as u64 {
968                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
969                        }
970
971                        // Actually set the underlying offset to the new value so behavior
972                        // matches passthrough which uses libc::lseek64 to set the fd offset.
973                        match layer
974                            .lseek(
975                                req,
976                                real_inode,
977                                real_handle,
978                                new_offset,
979                                libc::SEEK_SET as u32,
980                            )
981                            .await
982                        {
983                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
984                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
985                        }
986                    } else {
987                        Err(Error::from_raw_os_error(libc::EINVAL).into())
988                    }
989                }
990                // Any other whence value is invalid for directories
991                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
992            }
993        } else {
994            // Keep the original lseek behavior for regular files
995            // Delegate directly to the underlying layer
996            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
997            layer
998                .lseek(req, real_inode, real_handle, offset, whence)
999                .await
1000        }
1001    }
1002
1003    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
1004        Ok(())
1005    }
1006}
1007
1008#[cfg(test)]
1009mod tests {
1010    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1011
1012    use rfuse3::{MountOptions, raw::Session};
1013    use tokio::signal;
1014    use tracing_subscriber::EnvFilter;
1015
1016    use crate::unionfs::BoxedLayer;
1017    use crate::{
1018        passthrough::{PassthroughArgs, new_passthroughfs_layer, newlogfs::LoggingFileSystem},
1019        unionfs::{OverlayFs, config::Config},
1020    };
1021
1022    #[tokio::test]
1023    #[ignore]
1024    async fn test_a_ovlfs() {
1025        let _ = tracing_subscriber::fmt()
1026            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1027            .try_init();
1028
1029        // Set up test environment
1030        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1031        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1032        let upperdir = PathBuf::from("/home/luxian/upper");
1033
1034        // Create lower layers
1035        let mut lower_layers: Vec<Arc<BoxedLayer>> = Vec::new();
1036        for lower in &lowerdir {
1037            let layer = new_passthroughfs_layer(PassthroughArgs {
1038                root_dir: lower.clone(),
1039                mapping: None::<&str>,
1040            })
1041            .await
1042            .unwrap();
1043            lower_layers.push(Arc::new(layer) as Arc<BoxedLayer>);
1044        }
1045        // Create upper layer
1046        let upper_layer: Arc<BoxedLayer> = Arc::new(
1047            new_passthroughfs_layer(PassthroughArgs {
1048                root_dir: upperdir,
1049                mapping: None::<&str>,
1050            })
1051            .await
1052            .unwrap(),
1053        );
1054        // Create overlayfs
1055        let config = Config {
1056            mountpoint: mountpoint.clone(),
1057            do_import: true,
1058            ..Default::default()
1059        };
1060
1061        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1062
1063        let logfs = LoggingFileSystem::new(overlayfs);
1064
1065        let mount_path: OsString = OsString::from(mountpoint);
1066
1067        let uid = unsafe { libc::getuid() };
1068        let gid = unsafe { libc::getgid() };
1069
1070        let not_unprivileged = false;
1071
1072        let mut mount_options = MountOptions::default();
1073        // .allow_other(true)
1074        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
1075
1076        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1077            Session::new(mount_options)
1078                .mount_with_unprivileged(logfs, mount_path)
1079                .await
1080                .unwrap()
1081        } else {
1082            Session::new(mount_options)
1083                .mount(logfs, mount_path)
1084                .await
1085                .unwrap()
1086        };
1087
1088        let handle = &mut mount_handle;
1089
1090        tokio::select! {
1091            res = handle => res.unwrap(),
1092            _ = signal::ctrl_c() => {
1093                mount_handle.unmount().await.unwrap()
1094            }
1095        }
1096    }
1097}