libfuse_fs/overlayfs/
async_io.rs

1use super::Inode;
2use super::OverlayFs;
3use super::utils;
4use crate::overlayfs::HandleData;
5use crate::overlayfs::RealHandle;
6use crate::overlayfs::{AtomicU64, CachePolicy};
7use crate::util::open_options::OpenOptions;
8use rfuse3::raw::prelude::*;
9use rfuse3::*;
10use std::ffi::OsStr;
11use std::io::Error;
12use std::io::ErrorKind;
13use std::num::NonZeroU32;
14use std::sync::Arc;
15use std::sync::atomic::Ordering;
16use tracing::info;
17use tracing::trace;
18
19impl Filesystem for OverlayFs {
20    /// initialize filesystem. Called before any other filesystem method.
21    async fn init(&self, _req: Request) -> Result<ReplyInit> {
22        if self.config.do_import {
23            self.import().await?;
24        }
25        if !self.config.do_import || self.config.writeback {
26            self.writeback.store(true, Ordering::Relaxed);
27        }
28        if !self.config.do_import || self.config.no_open {
29            self.no_open.store(true, Ordering::Relaxed);
30        }
31        if !self.config.do_import || self.config.no_opendir {
32            self.no_opendir.store(true, Ordering::Relaxed);
33        }
34        if !self.config.do_import || self.config.killpriv_v2 {
35            self.killpriv_v2.store(true, Ordering::Relaxed);
36        }
37        if self.config.perfile_dax {
38            self.perfile_dax.store(true, Ordering::Relaxed);
39        }
40
41        Ok(ReplyInit {
42            max_write: NonZeroU32::new(128 * 1024).unwrap(),
43        })
44    }
45
46    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
47    /// kernel may call forget for root. There is some discuss for this
48    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
49    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
50    async fn destroy(&self, _req: Request) {}
51
52    /// look up a directory entry by name and get its attributes.
53    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
54        let tmp = name.to_string_lossy().to_string();
55        let result = self.do_lookup(req, parent, tmp.as_str()).await;
56        match result {
57            Ok(e) => Ok(e),
58            Err(err) => Err(err.into()),
59        }
60    }
61
62    /// forget an inode. The nlookup parameter indicates the number of lookups previously
63    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
64    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
65    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
66    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
67    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
68    /// request for root and this library will stop session after call forget. There is some
69    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
70    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
71    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
72        self.forget_one(inode, nlookup).await;
73    }
74
75    /// get file attributes. If `fh` is None, means `fh` is not set.
76    async fn getattr(
77        &self,
78        req: Request,
79        inode: Inode,
80        fh: Option<u64>,
81        flags: u32,
82    ) -> Result<ReplyAttr> {
83        if !self.no_open.load(Ordering::Relaxed)
84            && let Some(h) = fh
85        {
86            let handles = self.handles.lock().await;
87            if let Some(hd) = handles.get(&h)
88                && let Some(ref rh) = hd.real_handle
89            {
90                let mut rep: ReplyAttr = rh
91                    .layer
92                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
93                    .await?;
94                rep.attr.ino = inode;
95                return Ok(rep);
96            }
97        }
98
99        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
100        let (layer, _, lower_inode) = node.first_layer_inode().await;
101        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
102        re.attr.ino = inode;
103        Ok(re)
104    }
105
106    /// set file attributes. If `fh` is None, means `fh` is not set.
107    async fn setattr(
108        &self,
109        req: Request,
110        inode: Inode,
111        fh: Option<u64>,
112        set_attr: SetAttr,
113    ) -> Result<ReplyAttr> {
114        // Check if upper layer exists.
115        self.upper_layer
116            .as_ref()
117            .cloned()
118            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
119
120        // deal with handle first
121        if !self.no_open.load(Ordering::Relaxed)
122            && let Some(h) = fh
123        {
124            let handles = self.handles.lock().await;
125            if let Some(hd) = handles.get(&h)
126                && let Some(ref rhd) = hd.real_handle
127            {
128                // handle opened in upper layer
129                if rhd.in_upper_layer {
130                    let mut rep = rhd
131                        .layer
132                        .setattr(
133                            req,
134                            rhd.inode,
135                            Some(rhd.handle.load(Ordering::Relaxed)),
136                            set_attr,
137                        )
138                        .await?;
139                    rep.attr.ino = inode;
140                    return Ok(rep);
141                }
142            }
143        }
144
145        let mut node = self.lookup_node(req, inode, "").await?;
146
147        if !node.in_upper_layer().await {
148            node = self.copy_node_up(req, node.clone()).await?
149        }
150
151        let (layer, _, real_inode) = node.first_layer_inode().await;
152        // layer.setattr(req, real_inode, None, set_attr).await
153        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
154        rep.attr.ino = inode;
155        Ok(rep)
156    }
157
158    /// read symbolic link.
159    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
160        trace!("READLINK: inode: {inode}\n");
161
162        let node = self.lookup_node(req, inode, "").await?;
163
164        if node.whiteout.load(Ordering::Relaxed) {
165            return Err(Error::from_raw_os_error(libc::ENOENT).into());
166        }
167
168        let (layer, _, inode) = node.first_layer_inode().await;
169        layer.readlink(req, inode).await
170    }
171
172    /// create a symbolic link.
173    async fn symlink(
174        &self,
175        req: Request,
176        parent: Inode,
177        name: &OsStr,
178        link: &OsStr,
179    ) -> Result<ReplyEntry> {
180        // soft link
181        let sname = name.to_string_lossy().into_owned().to_owned();
182        let slinkname = link.to_string_lossy().into_owned().to_owned();
183
184        let pnode = self.lookup_node(req, parent, "").await?;
185        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
186            .await?;
187
188        self.do_lookup(req, parent, sname.as_str())
189            .await
190            .map_err(|e| e.into())
191    }
192
193    /// create file node. Create a regular file, character device, block device, fifo or socket
194    /// node. When creating file, most cases user only need to implement
195    /// [`create`][Filesystem::create].
196    async fn mknod(
197        &self,
198        req: Request,
199        parent: Inode,
200        name: &OsStr,
201        mode: u32,
202        rdev: u32,
203    ) -> Result<ReplyEntry> {
204        let sname = name.to_string_lossy().to_string();
205
206        // Check if parent exists.
207        let pnode = self.lookup_node(req, parent, "").await?;
208        if pnode.whiteout.load(Ordering::Relaxed) {
209            return Err(Error::from_raw_os_error(libc::ENOENT).into());
210        }
211
212        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
213            .await?;
214        self.do_lookup(req, parent, sname.as_str())
215            .await
216            .map_err(|e| e.into())
217    }
218
219    /// create a directory.
220    async fn mkdir(
221        &self,
222        req: Request,
223        parent: Inode,
224        name: &OsStr,
225        mode: u32,
226        umask: u32,
227    ) -> Result<ReplyEntry> {
228        let sname = name.to_string_lossy().to_string();
229
230        // no entry or whiteout
231        let pnode = self.lookup_node(req, parent, "").await?;
232        if pnode.whiteout.load(Ordering::Relaxed) {
233            return Err(Error::from_raw_os_error(libc::ENOENT).into());
234        }
235
236        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
237            .await?;
238        self.do_lookup(req, parent, sname.as_str())
239            .await
240            .map_err(|e| e.into())
241    }
242
243    /// remove a file.
244    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
245        self.do_rm(req, parent, name, false)
246            .await
247            .map_err(|e| e.into())
248    }
249
250    /// remove a directory.
251    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
252        self.do_rm(req, parent, name, true)
253            .await
254            .map_err(|e| e.into())
255    }
256
257    /// rename a file or directory.
258    async fn rename(
259        &self,
260        req: Request,
261        parent: Inode,
262        name: &OsStr,
263        new_parent: Inode,
264        new_name: &OsStr,
265    ) -> Result<()> {
266        self.do_rename(req, parent, name, new_parent, new_name)
267            .await
268            .map_err(|e| e.into())
269    }
270
271    /// create a hard link.
272    async fn link(
273        &self,
274        req: Request,
275        inode: Inode,
276        new_parent: Inode,
277        new_name: &OsStr,
278    ) -> Result<ReplyEntry> {
279        let node = self.lookup_node(req, inode, "").await?;
280        if node.whiteout.load(Ordering::Relaxed) {
281            return Err(Error::from_raw_os_error(libc::ENOENT).into());
282        }
283
284        let newpnode = self.lookup_node(req, new_parent, "").await?;
285        if newpnode.whiteout.load(Ordering::Relaxed) {
286            return Err(Error::from_raw_os_error(libc::ENOENT).into());
287        }
288        let new_name = new_name.to_str().unwrap();
289        // trace!(
290        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
291        //     inode, new_parent, node.inode, newpnode.inode
292        // );
293        self.do_link(req, &node, &newpnode, new_name).await?;
294        // trace!("LINK: done, looking up new entry");
295        self.do_lookup(req, new_parent, new_name)
296            .await
297            .map_err(|e| e.into())
298    }
299
300    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
301    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
302    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
303    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
304    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
305    /// the file is opened. A filesystem need not implement this method if it
306    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
307    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
308    ///
309    /// # Notes:
310    ///
311    /// See `fuse_file_info` structure in
312    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
313    /// more details.
314    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
315        if self.no_open.load(Ordering::Relaxed) {
316            info!("fuse: open is not supported.");
317            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
318        }
319
320        let readonly: bool = flags
321            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
322                as u32
323            == 0;
324        // toggle flags
325        let mut flags: i32 = flags as i32;
326
327        flags |= libc::O_NOFOLLOW;
328
329        if self.config.writeback {
330            if flags & libc::O_ACCMODE == libc::O_WRONLY {
331                flags &= !libc::O_ACCMODE;
332                flags |= libc::O_RDWR;
333            }
334
335            if flags & libc::O_APPEND != 0 {
336                flags &= !libc::O_APPEND;
337            }
338        }
339        // lookup node
340        let node = self.lookup_node(req, inode, "").await?;
341
342        // whiteout node
343        if node.whiteout.load(Ordering::Relaxed) {
344            return Err(Error::from_raw_os_error(libc::ENOENT).into());
345        }
346
347        if !readonly {
348            // copy up to upper layer
349            self.copy_node_up(req, node.clone()).await?;
350        }
351
352        // assign a handle in overlayfs and open it
353        let (_l, h) = node.open(req, flags as u32, 0).await?;
354
355        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
356        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
357        let handle_data = HandleData {
358            node: node.clone(),
359            real_handle: Some(RealHandle {
360                layer,
361                in_upper_layer,
362                inode,
363                handle: AtomicU64::new(h.fh),
364            }),
365        };
366
367        self.handles.lock().await.insert(hd, Arc::new(handle_data));
368
369        let mut opts = OpenOptions::empty();
370        match self.config.cache_policy {
371            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
372            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
373            _ => {}
374        }
375
376        // trace!("OPEN: returning handle: {hd}");
377
378        Ok(ReplyOpen {
379            fh: hd,
380            flags: opts.bits(),
381        })
382    }
383
384    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
385    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
386    /// when the file has been opened in `direct_io` mode, in which case the return value of the
387    /// read system call will reflect the return value of this operation. `fh` will contain the
388    /// value set by the open method, or will be undefined if the open method didn't set any value.
389    async fn read(
390        &self,
391        req: Request,
392        inode: Inode,
393        fh: u64,
394        offset: u64,
395        size: u32,
396    ) -> Result<ReplyData> {
397        let data = self.get_data(req, Some(fh), inode, 0).await?;
398
399        match data.real_handle {
400            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
401            Some(ref hd) => {
402                hd.layer
403                    .read(
404                        req,
405                        hd.inode,
406                        hd.handle.load(Ordering::Relaxed),
407                        offset,
408                        size,
409                    )
410                    .await
411            }
412        }
413    }
414
415    /// write data. Write should return exactly the number of bytes requested except on error. An
416    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
417    /// return value of the write system call will reflect the return value of this operation. `fh`
418    /// will contain the value set by the open method, or will be undefined if the open method
419    /// didn't set any value. When `write_flags` contains
420    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
421    /// delay write.
422    #[allow(clippy::too_many_arguments)]
423    async fn write(
424        &self,
425        req: Request,
426        inode: Inode,
427        fh: u64,
428        offset: u64,
429        data: &[u8],
430        write_flags: u32,
431        flags: u32,
432    ) -> Result<ReplyWrite> {
433        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
434
435        match handle_data.real_handle {
436            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
437            Some(ref hd) => {
438                hd.layer
439                    .write(
440                        req,
441                        hd.inode,
442                        hd.handle.load(Ordering::Relaxed),
443                        offset,
444                        data,
445                        write_flags,
446                        flags,
447                    )
448                    .await
449            }
450        }
451    }
452
453    /// Copy a range of data from one file to another. This can improve performance because it
454    /// reduces data copying: normally, data will be copied from FUSE server to kernel, then to
455    /// user-space, then to kernel, and finally sent back to FUSE server. By implementing this
456    /// method, data will only be copied internally within the FUSE server.
457    #[allow(clippy::too_many_arguments)]
458    async fn copy_file_range(
459        &self,
460        req: Request,
461        inode_in: Inode,
462        fh_in: u64,
463        offset_in: u64,
464        inode_out: Inode,
465        fh_out: u64,
466        offset_out: u64,
467        length: u64,
468        flags: u64,
469    ) -> Result<ReplyCopyFileRange> {
470        // Get handle data for source file
471        let data_in = self.get_data(req, Some(fh_in), inode_in, 0).await?;
472        let handle_in = match data_in.real_handle {
473            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
474            Some(ref hd) => hd,
475        };
476
477        // Get handle data for destination file
478        let data_out = self.get_data(req, Some(fh_out), inode_out, 0).await?;
479        let handle_out = match data_out.real_handle {
480            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
481            Some(ref hd) => hd,
482        };
483
484        // Both files must be on the same layer for copy_file_range to work
485        if !Arc::ptr_eq(&handle_in.layer, &handle_out.layer) {
486            // Different layers - return EXDEV to trigger fallback to read/write
487            return Err(Error::from_raw_os_error(libc::EXDEV).into());
488        }
489
490        // Delegate to the underlying PassthroughFs layer
491        handle_in
492            .layer
493            .copy_file_range(
494                req,
495                handle_in.inode,
496                handle_in.handle.load(Ordering::Relaxed),
497                offset_in,
498                handle_out.inode,
499                handle_out.handle.load(Ordering::Relaxed),
500                offset_out,
501                length,
502                flags,
503            )
504            .await
505    }
506    /// get filesystem statistics.
507    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
508        self.do_statvfs(req, inode).await.map_err(|e| e.into())
509    }
510
511    /// release an open file. Release is called when there are no more references to an open file:
512    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
513    /// there will be exactly one release call. The filesystem may reply with an error, but error
514    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
515    /// contain the value set by the open method, or will be undefined if the open method didn't
516    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
517    /// data or not when closing file.
518    async fn release(
519        &self,
520        req: Request,
521        _inode: Inode,
522        fh: u64,
523        flags: u32,
524        lock_owner: u64,
525        flush: bool,
526    ) -> Result<()> {
527        if self.no_open.load(Ordering::Relaxed) {
528            info!("fuse: release is not supported.");
529            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
530        }
531
532        if let Some(hd) = self.handles.lock().await.get(&fh) {
533            let rh = if let Some(ref h) = hd.real_handle {
534                h
535            } else {
536                return Err(
537                    Error::other(format!("no real handle found for file handle {fh}")).into(),
538                );
539            };
540            let real_handle = rh.handle.load(Ordering::Relaxed);
541            let real_inode = rh.inode;
542            rh.layer
543                .release(req, real_inode, real_handle, flags, lock_owner, flush)
544                .await?;
545        }
546
547        self.handles.lock().await.remove(&fh);
548
549        Ok(())
550    }
551
552    /// synchronize file contents. If the `datasync` is true, then only the user data should be
553    /// flushed, not the metadata.
554    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
555        self.do_fsync(req, inode, datasync, fh, false)
556            .await
557            .map_err(|e| e.into())
558    }
559
560    /// set an extended attribute.
561    async fn setxattr(
562        &self,
563        req: Request,
564        inode: Inode,
565        name: &OsStr,
566        value: &[u8],
567        flags: u32,
568        position: u32,
569    ) -> Result<()> {
570        let node = self.lookup_node(req, inode, "").await?;
571
572        if node.whiteout.load(Ordering::Relaxed) {
573            return Err(Error::from_raw_os_error(libc::ENOENT).into());
574        }
575
576        if !node.in_upper_layer().await {
577            // Copy node up.
578            self.copy_node_up(req, node.clone()).await?;
579        }
580
581        let (layer, _, real_inode) = node.first_layer_inode().await;
582
583        layer
584            .setxattr(req, real_inode, name, value, flags, position)
585            .await
586    }
587
588    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
589    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
590    /// return an error.
591    async fn getxattr(
592        &self,
593        req: Request,
594        inode: Inode,
595        name: &OsStr,
596        size: u32,
597    ) -> Result<ReplyXAttr> {
598        let node = self.lookup_node(req, inode, "").await?;
599
600        if node.whiteout.load(Ordering::Relaxed) {
601            return Err(Error::from_raw_os_error(libc::ENOENT).into());
602        }
603
604        let (layer, real_inode) = self.find_real_inode(inode).await?;
605
606        layer.getxattr(req, real_inode, name, size).await
607    }
608
609    /// List extended attribute names.
610    ///
611    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
612    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
613    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
614        let node = self.lookup_node(req, inode, "").await?;
615        if node.whiteout.load(Ordering::Relaxed) {
616            return Err(Error::from_raw_os_error(libc::ENOENT).into());
617        }
618        let (layer, real_inode) = self.find_real_inode(inode).await?;
619        layer.listxattr(req, real_inode, size).await
620    }
621
622    /// remove an extended attribute.
623    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
624        let node = self.lookup_node(req, inode, "").await?;
625
626        if node.whiteout.load(Ordering::Relaxed) {
627            return Err(Error::from_raw_os_error(libc::ENOENT).into());
628        }
629
630        if !node.in_upper_layer().await {
631            // copy node into upper layer
632            self.copy_node_up(req, node.clone()).await?;
633        }
634
635        let (layer, _, ino) = node.first_layer_inode().await;
636        layer.removexattr(req, ino, name).await
637
638        // TODO: recreate the node since removexattr may remove the opaque xattr.
639    }
640
641    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
642    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
643    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
644    /// will be called at all. `fh` will contain the value set by the open method, or will be
645    /// undefined if the open method didn't set any value.
646    ///
647    /// # Notes:
648    ///
649    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
650    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
651    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
652    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
653    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
654        if self.no_open.load(Ordering::Relaxed) {
655            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
656        }
657
658        let node = self.lookup_node(req, inode, "").await;
659        match node {
660            Ok(n) => {
661                if n.whiteout.load(Ordering::Relaxed) {
662                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
663                }
664            }
665            Err(e) => {
666                if e.raw_os_error() == Some(libc::ENOENT) {
667                    trace!("flush: inode {inode} is stale");
668                } else {
669                    return Err(e.into());
670                }
671            }
672        }
673
674        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
675
676        // FIXME: need to test if inode matches corresponding handle?
677        if inode
678            != self
679                .handles
680                .lock()
681                .await
682                .get(&fh)
683                .map(|h| h.node.inode)
684                .unwrap_or(0)
685        {
686            return Err(Error::other("inode does not match handle").into());
687        }
688
689        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
690        layer.flush(req, real_inode, real_handle, lock_owner).await
691    }
692
693    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
694    /// `fh`, and use this in other all other directory stream operations
695    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
696    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
697    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
698    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
699    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
700    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
701        if self.no_opendir.load(Ordering::Relaxed) {
702            info!("fuse: opendir is not supported.");
703            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
704        }
705
706        // lookup node
707        let node = self.lookup_node(req, inode, ".").await?;
708
709        if node.whiteout.load(Ordering::Relaxed) {
710            return Err(Error::from_raw_os_error(libc::ENOENT).into());
711        }
712
713        let st = node.stat64(req).await?;
714        if !utils::is_dir(&st.attr.kind) {
715            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
716        }
717
718        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
719        // Get the layer information and open directory in the underlying layer
720        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
721        let reply = layer.opendir(req, real_inode, flags).await?;
722
723        self.handles.lock().await.insert(
724            handle,
725            Arc::new(HandleData {
726                node: Arc::clone(&node),
727                real_handle: Some(RealHandle {
728                    layer,
729                    in_upper_layer,
730                    inode: real_inode,
731                    handle: AtomicU64::new(reply.fh),
732                }),
733            }),
734        );
735
736        Ok(ReplyOpen { fh: handle, flags })
737    }
738
739    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
740    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
741    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
742    async fn readdir<'a>(
743        &'a self,
744        req: Request,
745        parent: Inode,
746        fh: u64,
747        offset: i64,
748    ) -> Result<
749        ReplyDirectory<
750            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
751        >,
752    > {
753        if self.config.no_readdir {
754            info!("fuse: readdir is not supported.");
755            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
756        }
757        let entries = self
758            .do_readdir(req, parent, fh, offset.try_into().unwrap())
759            .await?;
760        Ok(ReplyDirectory { entries })
761    }
762
763    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
764    /// + [`lookup`][Filesystem::lookup] at the same time.
765    async fn readdirplus<'a>(
766        &'a self,
767        req: Request,
768        parent: Inode,
769        fh: u64,
770        offset: u64,
771        _lock_owner: u64,
772    ) -> Result<
773        ReplyDirectoryPlus<
774            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
775        >,
776    > {
777        if self.config.no_readdir {
778            info!("fuse: readdir is not supported.");
779            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
780        }
781        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
782        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
783        match self.handles.lock().await.get(&fh) {
784            Some(h) => {
785                trace!(
786                    "after readdirplus: found handle, seeing real_handle: {}",
787                    h.real_handle.is_some()
788                );
789            }
790            None => trace!("after readdirplus: no handle found: {fh}"),
791        }
792        Ok(ReplyDirectoryPlus { entries })
793    }
794    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
795    /// be exactly one `releasedir` call. `fh` will contain the value set by the
796    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
797    /// [`opendir`][Filesystem::opendir] method didn't set any value.
798    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
799        if self.no_opendir.load(Ordering::Relaxed) {
800            info!("fuse: releasedir is not supported.");
801            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
802        }
803
804        if let Some(hd) = self.handles.lock().await.get(&fh) {
805            let rh = if let Some(ref h) = hd.real_handle {
806                h
807            } else {
808                return Err(
809                    Error::other(format!("no real handle found for file handle {fh}")).into(),
810                );
811            };
812            let real_handle = rh.handle.load(Ordering::Relaxed);
813            let real_inode = rh.inode;
814            rh.layer
815                .releasedir(req, real_inode, real_handle, flags)
816                .await?;
817        }
818
819        self.handles.lock().await.remove(&fh);
820        Ok(())
821    }
822
823    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
824    /// should be flushed, not the metadata. `fh` will contain the value set by the
825    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
826    /// [`opendir`][Filesystem::opendir] method didn't set any value.
827    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
828        self.do_fsync(req, inode, datasync, fh, true)
829            .await
830            .map_err(|e| e.into())
831    }
832    /// check file access permissions. This will be called for the `access()` system call. If the
833    /// `default_permissions` mount option is given, this method is not be called. This method is
834    /// not called under Linux kernel versions 2.4.x.
835    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
836        let node = self.lookup_node(req, inode, "").await?;
837
838        if node.whiteout.load(Ordering::Relaxed) {
839            return Err(Error::from_raw_os_error(libc::ENOENT).into());
840        }
841
842        let (layer, real_inode) = self.find_real_inode(inode).await?;
843        layer.access(req, real_inode, mask).await
844    }
845
846    /// create and open a file. If the file does not exist, first create it with the specified
847    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
848    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
849    /// this in other all other file operations ([`read`][Filesystem::read],
850    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
851    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
852    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
853    /// opened. If this method is not implemented or under Linux kernel versions earlier than
854    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
855    /// called instead.
856    ///
857    /// # Notes:
858    ///
859    /// See `fuse_file_info` structure in
860    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
861    /// more details.
862    async fn create(
863        &self,
864        req: Request,
865        parent: Inode,
866        name: &OsStr,
867        mode: u32,
868        flags: u32,
869    ) -> Result<ReplyCreated> {
870        // Parent doesn't exist.
871        let pnode = self.lookup_node(req, parent, "").await?;
872        if pnode.whiteout.load(Ordering::Relaxed) {
873            return Err(Error::from_raw_os_error(libc::ENOENT).into());
874        }
875
876        let mut flags: i32 = flags as i32;
877        flags |= libc::O_NOFOLLOW;
878        flags &= !libc::O_DIRECT;
879        if self.config.writeback {
880            if flags & libc::O_ACCMODE == libc::O_WRONLY {
881                flags &= !libc::O_ACCMODE;
882                flags |= libc::O_RDWR;
883            }
884
885            if flags & libc::O_APPEND != 0 {
886                flags &= !libc::O_APPEND;
887            }
888        }
889
890        let final_handle = self
891            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
892            .await?;
893        let entry = self.do_lookup(req, parent, name.to_str().unwrap()).await?;
894        let fh = final_handle
895            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
896
897        let mut opts = OpenOptions::empty();
898        match self.config.cache_policy {
899            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
900            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
901            _ => {}
902        }
903
904        Ok(ReplyCreated {
905            ttl: entry.ttl,
906            attr: entry.attr,
907            generation: entry.generation,
908            fh,
909            flags: opts.bits(),
910        })
911    }
912
913    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
914    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
915        for inode in inodes {
916            self.forget_one(inode.0, inode.1).await;
917        }
918    }
919
920    /// allocate space for an open file. This function ensures that required space is allocated for
921    /// specified file.
922    ///
923    /// # Notes:
924    ///
925    /// more information about `fallocate`, please see **`man 2 fallocate`**
926    async fn fallocate(
927        &self,
928        req: Request,
929        inode: Inode,
930        fh: u64,
931        offset: u64,
932        length: u64,
933        mode: u32,
934    ) -> Result<()> {
935        // Use O_RDONLY flags which indicates no copy up.
936        let data = self
937            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
938            .await?;
939
940        match data.real_handle {
941            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
942            Some(ref rhd) => {
943                if !rhd.in_upper_layer {
944                    // TODO: in lower layer, error out or just success?
945                    return Err(Error::from_raw_os_error(libc::EROFS).into());
946                }
947                rhd.layer
948                    .fallocate(
949                        req,
950                        rhd.inode,
951                        rhd.handle.load(Ordering::Relaxed),
952                        offset,
953                        length,
954                        mode,
955                    )
956                    .await
957            }
958        }
959    }
960
961    /// find next data or hole after the specified offset.
962    async fn lseek(
963        &self,
964        req: Request,
965        inode: Inode,
966        fh: u64,
967        offset: u64,
968        whence: u32,
969    ) -> Result<ReplyLSeek> {
970        let node = self.lookup_node(req, inode, "").await?;
971
972        if node.whiteout.load(Ordering::Relaxed) {
973            return Err(Error::from_raw_os_error(libc::ENOENT).into());
974        }
975
976        let st = node.stat64(req).await?;
977        if utils::is_dir(&st.attr.kind) {
978            // Special handling and security restrictions for directory operations.
979            // Use the common API to obtain the underlying layer and handle info.
980            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
981
982            // Verify that the underlying handle refers to a directory.
983            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
984                Ok(s) => s,
985                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
986            };
987
988            if !utils::is_dir(&handle_stat.attr.kind) {
989                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
990            }
991
992            // Handle directory lseek operations according to POSIX standard
993            // This enables seekdir/telldir functionality on directories
994            match whence {
995                // SEEK_SET: Set the directory position to an absolute value
996                x if x == libc::SEEK_SET as u32 => {
997                    // Validate offset bounds to prevent overflow
998                    // Directory offsets should not exceed i64::MAX
999                    if offset > i64::MAX as u64 {
1000                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
1001                    }
1002
1003                    // Perform the seek operation on the underlying layer
1004                    // Delegate to the lower layer implementation
1005                    layer
1006                        .lseek(req, real_inode, real_handle, offset, whence)
1007                        .await
1008                }
1009                // SEEK_CUR: Move relative to the current directory position
1010                x if x == libc::SEEK_CUR as u32 => {
1011                    // Get current position from underlying layer
1012                    // This is needed to calculate the new position
1013                    let current = match layer
1014                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
1015                        .await
1016                    {
1017                        Ok(r) => r.offset,
1018                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
1019                    };
1020
1021                    // Check for potential overflow when adding the provided offset
1022                    // This prevents invalid position calculations
1023                    if let Some(new_offset) = current.checked_add(offset) {
1024                        // Ensure the new offset is within valid bounds
1025                        if new_offset > i64::MAX as u64 {
1026                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
1027                        }
1028
1029                        // Actually set the underlying offset to the new value so behavior
1030                        // matches passthrough which uses libc::lseek64 to set the fd offset.
1031                        match layer
1032                            .lseek(
1033                                req,
1034                                real_inode,
1035                                real_handle,
1036                                new_offset,
1037                                libc::SEEK_SET as u32,
1038                            )
1039                            .await
1040                        {
1041                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
1042                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1043                        }
1044                    } else {
1045                        Err(Error::from_raw_os_error(libc::EINVAL).into())
1046                    }
1047                }
1048                // Any other whence value is invalid for directories
1049                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1050            }
1051        } else {
1052            // Keep the original lseek behavior for regular files
1053            // Delegate directly to the underlying layer
1054            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1055            layer
1056                .lseek(req, real_inode, real_handle, offset, whence)
1057                .await
1058        }
1059    }
1060
1061    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
1062        Ok(())
1063    }
1064}
1065
1066#[cfg(test)]
1067mod tests {
1068    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1069
1070    use rfuse3::{MountOptions, raw::Session};
1071    use tokio::signal;
1072    use tracing_subscriber::EnvFilter;
1073
1074    use crate::{
1075        overlayfs::{OverlayFs, config::Config},
1076        passthrough::{PassthroughArgs, new_passthroughfs_layer, newlogfs::LoggingFileSystem},
1077    };
1078
1079    #[tokio::test]
1080    #[ignore]
1081    async fn test_a_ovlfs() {
1082        let _ = tracing_subscriber::fmt()
1083            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1084            .try_init();
1085
1086        // Set up test environment
1087        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1088        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1089        let upperdir = PathBuf::from("/home/luxian/upper");
1090
1091        // Create lower layers
1092        let mut lower_layers = Vec::new();
1093        for lower in &lowerdir {
1094            let layer = new_passthroughfs_layer(PassthroughArgs {
1095                root_dir: lower.clone(),
1096                mapping: None::<&str>,
1097            })
1098            .await
1099            .unwrap();
1100            lower_layers.push(Arc::new(layer));
1101        }
1102        // Create upper layer
1103        let upper_layer = Arc::new(
1104            new_passthroughfs_layer(PassthroughArgs {
1105                root_dir: upperdir,
1106                mapping: None::<&str>,
1107            })
1108            .await
1109            .unwrap(),
1110        );
1111        // Create overlayfs
1112        let config = Config {
1113            mountpoint: mountpoint.clone(),
1114            do_import: true,
1115            ..Default::default()
1116        };
1117
1118        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1119
1120        let logfs = LoggingFileSystem::new(overlayfs);
1121
1122        let mount_path: OsString = OsString::from(mountpoint);
1123
1124        let uid = unsafe { libc::getuid() };
1125        let gid = unsafe { libc::getgid() };
1126
1127        let not_unprivileged = false;
1128
1129        let mut mount_options = MountOptions::default();
1130        // .allow_other(true)
1131        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
1132
1133        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1134            Session::new(mount_options)
1135                .mount_with_unprivileged(logfs, mount_path)
1136                .await
1137                .unwrap()
1138        } else {
1139            Session::new(mount_options)
1140                .mount(logfs, mount_path)
1141                .await
1142                .unwrap()
1143        };
1144
1145        let handle = &mut mount_handle;
1146
1147        tokio::select! {
1148            res = handle => res.unwrap(),
1149            _ = signal::ctrl_c() => {
1150                mount_handle.unmount().await.unwrap()
1151            }
1152        }
1153    }
1154}