Skip to main content

libfuse_fs/overlayfs/
async_io.rs

1use super::Inode;
2use super::OverlayFs;
3use super::utils;
4use crate::overlayfs::HandleData;
5use crate::overlayfs::RealHandle;
6use crate::overlayfs::{AtomicU64, CachePolicy};
7use crate::util::open_options::OpenOptions;
8use rfuse3::raw::prelude::*;
9use rfuse3::*;
10use std::ffi::OsStr;
11use std::io::Error;
12use std::io::ErrorKind;
13use std::num::NonZeroU32;
14use std::sync::Arc;
15use std::sync::atomic::Ordering;
16use tokio::sync::Mutex;
17use tracing::info;
18use tracing::trace;
19
20impl Filesystem for OverlayFs {
21    /// initialize filesystem. Called before any other filesystem method.
22    async fn init(&self, _req: Request) -> Result<ReplyInit> {
23        if self.config.do_import {
24            self.import().await?;
25        }
26        for layer in self.lower_layers.iter() {
27            layer.init(_req).await?;
28        }
29        if let Some(upper) = &self.upper_layer {
30            upper.init(_req).await?;
31        }
32        if !self.config.do_import || self.config.writeback {
33            self.writeback.store(true, Ordering::Relaxed);
34        }
35        if !self.config.do_import || self.config.no_open {
36            self.no_open.store(true, Ordering::Relaxed);
37        }
38        if !self.config.do_import || self.config.no_opendir {
39            self.no_opendir.store(true, Ordering::Relaxed);
40        }
41        if !self.config.do_import || self.config.killpriv_v2 {
42            self.killpriv_v2.store(true, Ordering::Relaxed);
43        }
44        if self.config.perfile_dax {
45            self.perfile_dax.store(true, Ordering::Relaxed);
46        }
47
48        Ok(ReplyInit {
49            max_write: NonZeroU32::new(128 * 1024).unwrap(),
50        })
51    }
52
53    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
54    /// kernel may call forget for root. There is some discuss for this
55    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
56    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
57    async fn destroy(&self, _req: Request) {}
58
59    /// look up a directory entry by name and get its attributes.
60    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
61        let tmp = name.to_string_lossy().to_string();
62        let result = self.do_lookup(req, parent, tmp.as_str()).await;
63        match result {
64            Ok(e) => Ok(e),
65            Err(err) => Err(err.into()),
66        }
67    }
68
69    /// forget an inode. The nlookup parameter indicates the number of lookups previously
70    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
71    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
72    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
73    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
74    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
75    /// request for root and this library will stop session after call forget. There is some
76    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
77    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
78    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
79        self.forget_one(inode, nlookup).await;
80    }
81
82    /// get file attributes. If `fh` is None, means `fh` is not set.
83    async fn getattr(
84        &self,
85        req: Request,
86        inode: Inode,
87        fh: Option<u64>,
88        flags: u32,
89    ) -> Result<ReplyAttr> {
90        if !self.no_open.load(Ordering::Relaxed)
91            && let Some(h) = fh
92        {
93            let handles = self.handles.lock().await;
94            if let Some(hd) = handles.get(&h)
95                && let Some(ref rh) = hd.real_handle
96            {
97                let mut rep: ReplyAttr = rh
98                    .layer
99                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
100                    .await?;
101                rep.attr.ino = inode;
102                return Ok(rep);
103            }
104        }
105
106        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
107        let (layer, _, lower_inode) = node.first_layer_inode().await;
108        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
109        re.attr.ino = inode;
110        Ok(re)
111    }
112
113    /// set file attributes. If `fh` is None, means `fh` is not set.
114    async fn setattr(
115        &self,
116        req: Request,
117        inode: Inode,
118        fh: Option<u64>,
119        set_attr: SetAttr,
120    ) -> Result<ReplyAttr> {
121        // Check if upper layer exists.
122        self.upper_layer
123            .as_ref()
124            .cloned()
125            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
126
127        // deal with handle first
128        if !self.no_open.load(Ordering::Relaxed)
129            && let Some(h) = fh
130        {
131            let handles = self.handles.lock().await;
132            if let Some(hd) = handles.get(&h)
133                && let Some(ref rhd) = hd.real_handle
134            {
135                // handle opened in upper layer
136                if rhd.in_upper_layer {
137                    let mut rep = rhd
138                        .layer
139                        .setattr(
140                            req,
141                            rhd.inode,
142                            Some(rhd.handle.load(Ordering::Relaxed)),
143                            set_attr,
144                        )
145                        .await?;
146                    rep.attr.ino = inode;
147                    return Ok(rep);
148                }
149            }
150        }
151
152        let mut node = self.lookup_node(req, inode, "").await?;
153
154        if !node.in_upper_layer().await {
155            node = self.copy_node_up(req, node.clone()).await?
156        }
157
158        let (layer, _, real_inode) = node.first_layer_inode().await;
159        // layer.setattr(req, real_inode, None, set_attr).await
160        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
161        rep.attr.ino = inode;
162        Ok(rep)
163    }
164
165    /// read symbolic link.
166    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
167        trace!("READLINK: inode: {inode}\n");
168
169        let node = self.lookup_node(req, inode, "").await?;
170
171        if node.whiteout.load(Ordering::Relaxed) {
172            return Err(Error::from_raw_os_error(libc::ENOENT).into());
173        }
174
175        let (layer, _, inode) = node.first_layer_inode().await;
176        layer.readlink(req, inode).await
177    }
178
179    /// create a symbolic link.
180    async fn symlink(
181        &self,
182        req: Request,
183        parent: Inode,
184        name: &OsStr,
185        link: &OsStr,
186    ) -> Result<ReplyEntry> {
187        // soft link
188        let sname = name.to_string_lossy().into_owned().to_owned();
189        let slinkname = link.to_string_lossy().into_owned().to_owned();
190
191        let pnode = self.lookup_node(req, parent, "").await?;
192        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
193            .await?;
194
195        self.do_lookup(req, parent, sname.as_str())
196            .await
197            .map_err(|e| e.into())
198    }
199
200    /// create file node. Create a regular file, character device, block device, fifo or socket
201    /// node. When creating file, most cases user only need to implement
202    /// [`create`][Filesystem::create].
203    async fn mknod(
204        &self,
205        req: Request,
206        parent: Inode,
207        name: &OsStr,
208        mode: u32,
209        rdev: u32,
210    ) -> Result<ReplyEntry> {
211        let sname = name.to_string_lossy().to_string();
212
213        // Check if parent exists.
214        let pnode = self.lookup_node(req, parent, "").await?;
215        if pnode.whiteout.load(Ordering::Relaxed) {
216            return Err(Error::from_raw_os_error(libc::ENOENT).into());
217        }
218
219        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
220            .await?;
221        self.do_lookup(req, parent, sname.as_str())
222            .await
223            .map_err(|e| e.into())
224    }
225
226    /// create a directory.
227    async fn mkdir(
228        &self,
229        req: Request,
230        parent: Inode,
231        name: &OsStr,
232        mode: u32,
233        umask: u32,
234    ) -> Result<ReplyEntry> {
235        let sname = name.to_string_lossy().to_string();
236
237        // no entry or whiteout
238        let pnode = self.lookup_node(req, parent, "").await?;
239        if pnode.whiteout.load(Ordering::Relaxed) {
240            return Err(Error::from_raw_os_error(libc::ENOENT).into());
241        }
242
243        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
244            .await?;
245        self.do_lookup(req, parent, sname.as_str())
246            .await
247            .map_err(|e| e.into())
248    }
249
250    /// remove a file.
251    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
252        self.do_rm(req, parent, name, false)
253            .await
254            .map_err(|e| e.into())
255    }
256
257    /// remove a directory.
258    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
259        self.do_rm(req, parent, name, true)
260            .await
261            .map_err(|e| e.into())
262    }
263
264    /// rename a file or directory.
265    async fn rename(
266        &self,
267        req: Request,
268        parent: Inode,
269        name: &OsStr,
270        new_parent: Inode,
271        new_name: &OsStr,
272    ) -> Result<()> {
273        self.do_rename(req, parent, name, new_parent, new_name)
274            .await
275            .map_err(|e| e.into())
276    }
277
278    /// create a hard link.
279    async fn link(
280        &self,
281        req: Request,
282        inode: Inode,
283        new_parent: Inode,
284        new_name: &OsStr,
285    ) -> Result<ReplyEntry> {
286        let node = self.lookup_node(req, inode, "").await?;
287        if node.whiteout.load(Ordering::Relaxed) {
288            return Err(Error::from_raw_os_error(libc::ENOENT).into());
289        }
290
291        let newpnode = self.lookup_node(req, new_parent, "").await?;
292        if newpnode.whiteout.load(Ordering::Relaxed) {
293            return Err(Error::from_raw_os_error(libc::ENOENT).into());
294        }
295        let new_name = new_name
296            .to_str()
297            .ok_or_else(|| Error::from_raw_os_error(libc::EINVAL))?;
298        // trace!(
299        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
300        //     inode, new_parent, node.inode, newpnode.inode
301        // );
302        self.do_link(req, &node, &newpnode, new_name).await?;
303        // trace!("LINK: done, looking up new entry");
304        self.do_lookup(req, new_parent, new_name)
305            .await
306            .map_err(|e| e.into())
307    }
308
309    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
310    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
311    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
312    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
313    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
314    /// the file is opened. A filesystem need not implement this method if it
315    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
316    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
317    ///
318    /// # Notes:
319    ///
320    /// See `fuse_file_info` structure in
321    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
322    /// more details.
323    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
324        if self.no_open.load(Ordering::Relaxed) {
325            info!("fuse: open is not supported.");
326            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
327        }
328
329        let readonly: bool = flags
330            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
331                as u32
332            == 0;
333        // toggle flags
334        let mut flags: i32 = flags as i32;
335
336        flags |= libc::O_NOFOLLOW;
337
338        if self.config.writeback {
339            if flags & libc::O_ACCMODE == libc::O_WRONLY {
340                flags &= !libc::O_ACCMODE;
341                flags |= libc::O_RDWR;
342            }
343
344            if flags & libc::O_APPEND != 0 {
345                flags &= !libc::O_APPEND;
346            }
347        }
348        // lookup node
349        let node = self.lookup_node(req, inode, "").await?;
350
351        // whiteout node
352        if node.whiteout.load(Ordering::Relaxed) {
353            return Err(Error::from_raw_os_error(libc::ENOENT).into());
354        }
355
356        if !readonly {
357            // copy up to upper layer
358            self.copy_node_up(req, node.clone()).await?;
359        }
360
361        // assign a handle in overlayfs and open it
362        let (_l, h) = node.open(req, flags as u32, 0).await?;
363
364        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
365        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
366        let handle_data = HandleData {
367            node: node.clone(),
368            real_handle: Some(RealHandle {
369                layer,
370                in_upper_layer,
371                inode,
372                handle: AtomicU64::new(h.fh),
373            }),
374            dir_snapshot: Mutex::new(None),
375        };
376
377        self.handles.lock().await.insert(hd, Arc::new(handle_data));
378
379        let mut opts = OpenOptions::empty();
380        match self.config.cache_policy {
381            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
382            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
383            _ => {}
384        }
385
386        // trace!("OPEN: returning handle: {hd}");
387
388        Ok(ReplyOpen {
389            fh: hd,
390            flags: opts.bits(),
391        })
392    }
393
394    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
395    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
396    /// when the file has been opened in `direct_io` mode, in which case the return value of the
397    /// read system call will reflect the return value of this operation. `fh` will contain the
398    /// value set by the open method, or will be undefined if the open method didn't set any value.
399    async fn read(
400        &self,
401        req: Request,
402        inode: Inode,
403        fh: u64,
404        offset: u64,
405        size: u32,
406    ) -> Result<ReplyData> {
407        let data = self.get_data(req, Some(fh), inode, 0).await?;
408
409        match data.real_handle {
410            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
411            Some(ref hd) => {
412                hd.layer
413                    .read(
414                        req,
415                        hd.inode,
416                        hd.handle.load(Ordering::Relaxed),
417                        offset,
418                        size,
419                    )
420                    .await
421            }
422        }
423    }
424
425    /// write data. Write should return exactly the number of bytes requested except on error. An
426    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
427    /// return value of the write system call will reflect the return value of this operation. `fh`
428    /// will contain the value set by the open method, or will be undefined if the open method
429    /// didn't set any value. When `write_flags` contains
430    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
431    /// delay write.
432    #[allow(clippy::too_many_arguments)]
433    async fn write(
434        &self,
435        req: Request,
436        inode: Inode,
437        fh: u64,
438        offset: u64,
439        data: &[u8],
440        write_flags: u32,
441        flags: u32,
442    ) -> Result<ReplyWrite> {
443        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
444
445        match handle_data.real_handle {
446            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
447            Some(ref hd) => {
448                hd.layer
449                    .write(
450                        req,
451                        hd.inode,
452                        hd.handle.load(Ordering::Relaxed),
453                        offset,
454                        data,
455                        write_flags,
456                        flags,
457                    )
458                    .await
459            }
460        }
461    }
462
463    /// Copy a range of data from one file to another. This can improve performance because it
464    /// reduces data copying: normally, data will be copied from FUSE server to kernel, then to
465    /// user-space, then to kernel, and finally sent back to FUSE server. By implementing this
466    /// method, data will only be copied internally within the FUSE server.
467    #[allow(clippy::too_many_arguments)]
468    async fn copy_file_range(
469        &self,
470        req: Request,
471        inode_in: Inode,
472        fh_in: u64,
473        offset_in: u64,
474        inode_out: Inode,
475        fh_out: u64,
476        offset_out: u64,
477        length: u64,
478        flags: u64,
479    ) -> Result<ReplyCopyFileRange> {
480        // Get handle data for source file
481        let data_in = self.get_data(req, Some(fh_in), inode_in, 0).await?;
482        let handle_in = match data_in.real_handle {
483            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
484            Some(ref hd) => hd,
485        };
486
487        // Get handle data for destination file
488        let data_out = self.get_data(req, Some(fh_out), inode_out, 0).await?;
489        let handle_out = match data_out.real_handle {
490            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
491            Some(ref hd) => hd,
492        };
493
494        // Both files must be on the same layer for copy_file_range to work
495        if !Arc::ptr_eq(&handle_in.layer, &handle_out.layer) {
496            // Different layers - return EXDEV to trigger fallback to read/write
497            return Err(Error::from_raw_os_error(libc::EXDEV).into());
498        }
499
500        // Delegate to the underlying PassthroughFs layer
501        handle_in
502            .layer
503            .copy_file_range(
504                req,
505                handle_in.inode,
506                handle_in.handle.load(Ordering::Relaxed),
507                offset_in,
508                handle_out.inode,
509                handle_out.handle.load(Ordering::Relaxed),
510                offset_out,
511                length,
512                flags,
513            )
514            .await
515    }
516
517    /// get filesystem statistics.
518    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
519        self.do_statvfs(req, inode).await.map_err(|e| e.into())
520    }
521
522    /// release an open file. Release is called when there are no more references to an open file:
523    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
524    /// there will be exactly one release call. The filesystem may reply with an error, but error
525    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
526    /// contain the value set by the open method, or will be undefined if the open method didn't
527    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
528    /// data or not when closing file.
529    async fn release(
530        &self,
531        req: Request,
532        _inode: Inode,
533        fh: u64,
534        flags: u32,
535        lock_owner: u64,
536        flush: bool,
537    ) -> Result<()> {
538        if self.no_open.load(Ordering::Relaxed) {
539            info!("fuse: release is not supported.");
540            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
541        }
542
543        if let Some(hd) = self.handles.lock().await.get(&fh) {
544            let rh = if let Some(ref h) = hd.real_handle {
545                h
546            } else {
547                return Err(
548                    Error::other(format!("no real handle found for file handle {fh}")).into(),
549                );
550            };
551            let real_handle = rh.handle.load(Ordering::Relaxed);
552            let real_inode = rh.inode;
553            rh.layer
554                .release(req, real_inode, real_handle, flags, lock_owner, flush)
555                .await?;
556        }
557
558        self.handles.lock().await.remove(&fh);
559
560        Ok(())
561    }
562
563    /// synchronize file contents. If the `datasync` is true, then only the user data should be
564    /// flushed, not the metadata.
565    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
566        self.do_fsync(req, inode, datasync, fh, false)
567            .await
568            .map_err(|e| e.into())
569    }
570
571    /// set an extended attribute.
572    async fn setxattr(
573        &self,
574        req: Request,
575        inode: Inode,
576        name: &OsStr,
577        value: &[u8],
578        flags: u32,
579        position: u32,
580    ) -> Result<()> {
581        let node = self.lookup_node(req, inode, "").await?;
582
583        if node.whiteout.load(Ordering::Relaxed) {
584            return Err(Error::from_raw_os_error(libc::ENOENT).into());
585        }
586
587        if !node.in_upper_layer().await {
588            // Copy node up.
589            self.copy_node_up(req, node.clone()).await?;
590        }
591
592        let (layer, _, real_inode) = node.first_layer_inode().await;
593
594        layer
595            .setxattr(req, real_inode, name, value, flags, position)
596            .await
597    }
598
599    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
600    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
601    /// return an error.
602    async fn getxattr(
603        &self,
604        req: Request,
605        inode: Inode,
606        name: &OsStr,
607        size: u32,
608    ) -> Result<ReplyXAttr> {
609        let node = self.lookup_node(req, inode, "").await?;
610
611        if node.whiteout.load(Ordering::Relaxed) {
612            return Err(Error::from_raw_os_error(libc::ENOENT).into());
613        }
614
615        let (layer, real_inode) = self.find_real_inode(inode).await?;
616
617        layer.getxattr(req, real_inode, name, size).await
618    }
619
620    /// List extended attribute names.
621    ///
622    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
623    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
624    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
625        let node = self.lookup_node(req, inode, "").await?;
626        if node.whiteout.load(Ordering::Relaxed) {
627            return Err(Error::from_raw_os_error(libc::ENOENT).into());
628        }
629        let (layer, real_inode) = self.find_real_inode(inode).await?;
630        layer.listxattr(req, real_inode, size).await
631    }
632
633    /// remove an extended attribute.
634    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
635        let node = self.lookup_node(req, inode, "").await?;
636
637        if node.whiteout.load(Ordering::Relaxed) {
638            return Err(Error::from_raw_os_error(libc::ENOENT).into());
639        }
640
641        if !node.in_upper_layer().await {
642            // copy node into upper layer
643            self.copy_node_up(req, node.clone()).await?;
644        }
645
646        let (layer, _, ino) = node.first_layer_inode().await;
647        layer.removexattr(req, ino, name).await
648
649        // TODO: recreate the node since removexattr may remove the opaque xattr.
650    }
651
652    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
653    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
654    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
655    /// will be called at all. `fh` will contain the value set by the open method, or will be
656    /// undefined if the open method didn't set any value.
657    ///
658    /// # Notes:
659    ///
660    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
661    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
662    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
663    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
664    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
665        if self.no_open.load(Ordering::Relaxed) {
666            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
667        }
668
669        let node = self.lookup_node(req, inode, "").await;
670        match node {
671            Ok(n) => {
672                if n.whiteout.load(Ordering::Relaxed) {
673                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
674                }
675            }
676            Err(e) => {
677                if e.raw_os_error() == Some(libc::ENOENT) {
678                    trace!("flush: inode {inode} is stale");
679                } else {
680                    return Err(e.into());
681                }
682            }
683        }
684
685        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
686
687        // FIXME: need to test if inode matches corresponding handle?
688        if inode
689            != self
690                .handles
691                .lock()
692                .await
693                .get(&fh)
694                .map(|h| h.node.inode)
695                .unwrap_or(0)
696        {
697            return Err(Error::other("inode does not match handle").into());
698        }
699
700        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
701        layer.flush(req, real_inode, real_handle, lock_owner).await
702    }
703
704    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
705    /// `fh`, and use this in other all other directory stream operations
706    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
707    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
708    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
709    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
710    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
711    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
712        if self.no_opendir.load(Ordering::Relaxed) {
713            info!("fuse: opendir is not supported.");
714            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
715        }
716
717        // lookup node
718        let node = self.lookup_node(req, inode, ".").await?;
719
720        if node.whiteout.load(Ordering::Relaxed) {
721            return Err(Error::from_raw_os_error(libc::ENOENT).into());
722        }
723
724        let st = node.stat64(req).await?;
725        if !utils::is_dir(&st.attr.kind) {
726            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
727        }
728
729        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
730        // Get the layer information and open directory in the underlying layer
731        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
732        let reply = layer.opendir(req, real_inode, flags).await?;
733
734        self.handles.lock().await.insert(
735            handle,
736            Arc::new(HandleData {
737                node: Arc::clone(&node),
738                real_handle: Some(RealHandle {
739                    layer,
740                    in_upper_layer,
741                    inode: real_inode,
742                    handle: AtomicU64::new(reply.fh),
743                }),
744                dir_snapshot: Mutex::new(None),
745            }),
746        );
747
748        Ok(ReplyOpen { fh: handle, flags })
749    }
750
751    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
752    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
753    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
754    async fn readdir<'a>(
755        &'a self,
756        req: Request,
757        parent: Inode,
758        fh: u64,
759        offset: i64,
760    ) -> Result<
761        ReplyDirectory<
762            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
763        >,
764    > {
765        if self.config.no_readdir {
766            info!("fuse: readdir is not supported.");
767            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
768        }
769        let entries = self
770            .do_readdir(req, parent, fh, offset.try_into().unwrap())
771            .await?;
772        Ok(ReplyDirectory { entries })
773    }
774
775    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
776    /// + [`lookup`][Filesystem::lookup] at the same time.
777    async fn readdirplus<'a>(
778        &'a self,
779        req: Request,
780        parent: Inode,
781        fh: u64,
782        offset: u64,
783        _lock_owner: u64,
784    ) -> Result<
785        ReplyDirectoryPlus<
786            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
787        >,
788    > {
789        if self.config.no_readdir {
790            info!("fuse: readdir is not supported.");
791            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
792        }
793        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
794        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
795        match self.handles.lock().await.get(&fh) {
796            Some(h) => {
797                trace!(
798                    "after readdirplus: found handle, seeing real_handle: {}",
799                    h.real_handle.is_some()
800                );
801            }
802            None => trace!("after readdirplus: no handle found: {fh}"),
803        }
804        Ok(ReplyDirectoryPlus { entries })
805    }
806    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
807    /// be exactly one `releasedir` call. `fh` will contain the value set by the
808    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
809    /// [`opendir`][Filesystem::opendir] method didn't set any value.
810    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
811        if self.no_opendir.load(Ordering::Relaxed) {
812            info!("fuse: releasedir is not supported.");
813            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
814        }
815
816        if let Some(hd) = self.handles.lock().await.get(&fh) {
817            let rh = if let Some(ref h) = hd.real_handle {
818                h
819            } else {
820                return Err(
821                    Error::other(format!("no real handle found for file handle {fh}")).into(),
822                );
823            };
824            let real_handle = rh.handle.load(Ordering::Relaxed);
825            let real_inode = rh.inode;
826            rh.layer
827                .releasedir(req, real_inode, real_handle, flags)
828                .await?;
829        }
830
831        self.handles.lock().await.remove(&fh);
832        Ok(())
833    }
834
835    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
836    /// should be flushed, not the metadata. `fh` will contain the value set by the
837    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
838    /// [`opendir`][Filesystem::opendir] method didn't set any value.
839    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
840        self.do_fsync(req, inode, datasync, fh, true)
841            .await
842            .map_err(|e| e.into())
843    }
844
845    #[allow(clippy::too_many_arguments)]
846    async fn getlk(
847        &self,
848        req: Request,
849        _inode: Inode,
850        fh: u64,
851        lock_owner: u64,
852        start: u64,
853        end: u64,
854        r#type: u32,
855        pid: u32,
856    ) -> Result<ReplyLock> {
857        if !self.no_open.load(Ordering::Relaxed) {
858            let handles = self.handles.lock().await;
859            if let Some(hd) = handles.get(&fh)
860                && let Some(ref rh) = hd.real_handle
861            {
862                match rh
863                    .layer
864                    .getlk(
865                        req,
866                        rh.inode,
867                        rh.handle.load(Ordering::Relaxed),
868                        lock_owner,
869                        start,
870                        end,
871                        r#type,
872                        pid,
873                    )
874                    .await
875                {
876                    Ok(reply) => return Ok(reply),
877                    Err(e) => {
878                        // If underlying layer doesn't support locking, fall through to fallback
879                        let errno: i32 = e.into();
880                        if errno != libc::ENOSYS {
881                            return Err(errno.into());
882                        }
883                    }
884                }
885            }
886        }
887
888        // Fallback: report no lock conflict
889        Ok(ReplyLock {
890            start: 0,
891            end: 0,
892            r#type: libc::F_UNLCK as u32,
893            pid: 0,
894        })
895    }
896
897    #[allow(clippy::too_many_arguments)]
898    async fn setlk(
899        &self,
900        req: Request,
901        _inode: Inode,
902        fh: u64,
903        lock_owner: u64,
904        start: u64,
905        end: u64,
906        r#type: u32,
907        pid: u32,
908        block: bool,
909    ) -> Result<()> {
910        if !self.no_open.load(Ordering::Relaxed) {
911            let handles = self.handles.lock().await;
912            if let Some(hd) = handles.get(&fh)
913                && let Some(ref rh) = hd.real_handle
914            {
915                match rh
916                    .layer
917                    .setlk(
918                        req,
919                        rh.inode,
920                        rh.handle.load(Ordering::Relaxed),
921                        lock_owner,
922                        start,
923                        end,
924                        r#type,
925                        pid,
926                        block,
927                    )
928                    .await
929                {
930                    Ok(()) => return Ok(()),
931                    Err(e) => {
932                        // If underlying layer doesn't support locking, fall through to fallback
933                        let errno: i32 = e.into();
934                        if errno != libc::ENOSYS {
935                            return Err(errno.into());
936                        }
937                    }
938                }
939            }
940        }
941
942        // Fallback: silently accept the lock request
943        Ok(())
944    }
945    /// check file access permissions. This will be called for the `access()` system call. If the
946    /// `default_permissions` mount option is given, this method is not be called. This method is
947    /// not called under Linux kernel versions 2.4.x.
948    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
949        let node = self.lookup_node(req, inode, "").await?;
950
951        if node.whiteout.load(Ordering::Relaxed) {
952            return Err(Error::from_raw_os_error(libc::ENOENT).into());
953        }
954
955        let (layer, real_inode) = self.find_real_inode(inode).await?;
956        layer.access(req, real_inode, mask).await
957    }
958
959    /// create and open a file. If the file does not exist, first create it with the specified
960    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
961    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
962    /// this in other all other file operations ([`read`][Filesystem::read],
963    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
964    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
965    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
966    /// opened. If this method is not implemented or under Linux kernel versions earlier than
967    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
968    /// called instead.
969    ///
970    /// # Notes:
971    ///
972    /// See `fuse_file_info` structure in
973    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
974    /// more details.
975    async fn create(
976        &self,
977        req: Request,
978        parent: Inode,
979        name: &OsStr,
980        mode: u32,
981        flags: u32,
982    ) -> Result<ReplyCreated> {
983        // Parent doesn't exist.
984        let pnode = self.lookup_node(req, parent, "").await?;
985        if pnode.whiteout.load(Ordering::Relaxed) {
986            return Err(Error::from_raw_os_error(libc::ENOENT).into());
987        }
988
989        let mut flags: i32 = flags as i32;
990        flags |= libc::O_NOFOLLOW;
991        #[cfg(target_os = "linux")]
992        {
993            flags &= !libc::O_DIRECT;
994        }
995        if self.config.writeback {
996            if flags & libc::O_ACCMODE == libc::O_WRONLY {
997                flags &= !libc::O_ACCMODE;
998                flags |= libc::O_RDWR;
999            }
1000
1001            if flags & libc::O_APPEND != 0 {
1002                flags &= !libc::O_APPEND;
1003            }
1004        }
1005
1006        let name_str = name
1007            .to_str()
1008            .ok_or_else(|| Error::from_raw_os_error(libc::EINVAL))?;
1009        let final_handle = self
1010            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
1011            .await?;
1012        let entry = self.do_lookup(req, parent, name_str).await?;
1013        let fh = final_handle
1014            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
1015
1016        let mut opts = OpenOptions::empty();
1017        match self.config.cache_policy {
1018            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
1019            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
1020            _ => {}
1021        }
1022
1023        Ok(ReplyCreated {
1024            ttl: entry.ttl,
1025            attr: entry.attr,
1026            generation: entry.generation,
1027            fh,
1028            flags: opts.bits(),
1029        })
1030    }
1031
1032    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
1033    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
1034        for inode in inodes {
1035            self.forget_one(inode.0, inode.1).await;
1036        }
1037    }
1038
1039    /// allocate space for an open file. This function ensures that required space is allocated for
1040    /// specified file.
1041    ///
1042    /// # Notes:
1043    ///
1044    /// more information about `fallocate`, please see **`man 2 fallocate`**
1045    async fn fallocate(
1046        &self,
1047        req: Request,
1048        inode: Inode,
1049        fh: u64,
1050        offset: u64,
1051        length: u64,
1052        mode: u32,
1053    ) -> Result<()> {
1054        // Use O_RDONLY flags which indicates no copy up.
1055        let data = self
1056            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
1057            .await?;
1058
1059        match data.real_handle {
1060            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
1061            Some(ref rhd) => {
1062                if !rhd.in_upper_layer {
1063                    // TODO: in lower layer, error out or just success?
1064                    return Err(Error::from_raw_os_error(libc::EROFS).into());
1065                }
1066                rhd.layer
1067                    .fallocate(
1068                        req,
1069                        rhd.inode,
1070                        rhd.handle.load(Ordering::Relaxed),
1071                        offset,
1072                        length,
1073                        mode,
1074                    )
1075                    .await
1076            }
1077        }
1078    }
1079
1080    /// find next data or hole after the specified offset.
1081    async fn lseek(
1082        &self,
1083        req: Request,
1084        inode: Inode,
1085        fh: u64,
1086        offset: u64,
1087        whence: u32,
1088    ) -> Result<ReplyLSeek> {
1089        let node = self.lookup_node(req, inode, "").await?;
1090
1091        if node.whiteout.load(Ordering::Relaxed) {
1092            return Err(Error::from_raw_os_error(libc::ENOENT).into());
1093        }
1094
1095        let st = node.stat64(req).await?;
1096        if utils::is_dir(&st.attr.kind) {
1097            // Special handling and security restrictions for directory operations.
1098            // Use the common API to obtain the underlying layer and handle info.
1099            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1100
1101            // Verify that the underlying handle refers to a directory.
1102            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
1103                Ok(s) => s,
1104                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
1105            };
1106
1107            if !utils::is_dir(&handle_stat.attr.kind) {
1108                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
1109            }
1110
1111            // Handle directory lseek operations according to POSIX standard
1112            // This enables seekdir/telldir functionality on directories
1113            match whence {
1114                // SEEK_SET: Set the directory position to an absolute value
1115                x if x == libc::SEEK_SET as u32 => {
1116                    // Validate offset bounds to prevent overflow
1117                    // Directory offsets should not exceed i64::MAX
1118                    if offset > i64::MAX as u64 {
1119                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
1120                    }
1121
1122                    // Perform the seek operation on the underlying layer
1123                    // Delegate to the lower layer implementation
1124                    layer
1125                        .lseek(req, real_inode, real_handle, offset, whence)
1126                        .await
1127                }
1128                // SEEK_CUR: Move relative to the current directory position
1129                x if x == libc::SEEK_CUR as u32 => {
1130                    // Get current position from underlying layer
1131                    // This is needed to calculate the new position
1132                    let current = match layer
1133                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
1134                        .await
1135                    {
1136                        Ok(r) => r.offset,
1137                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
1138                    };
1139
1140                    // Check for potential overflow when adding the provided offset
1141                    // This prevents invalid position calculations
1142                    if let Some(new_offset) = current.checked_add(offset) {
1143                        // Ensure the new offset is within valid bounds
1144                        if new_offset > i64::MAX as u64 {
1145                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
1146                        }
1147
1148                        // Actually set the underlying offset to the new value so behavior
1149                        // matches passthrough which uses libc::lseek64 to set the fd offset.
1150                        match layer
1151                            .lseek(
1152                                req,
1153                                real_inode,
1154                                real_handle,
1155                                new_offset,
1156                                libc::SEEK_SET as u32,
1157                            )
1158                            .await
1159                        {
1160                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
1161                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1162                        }
1163                    } else {
1164                        Err(Error::from_raw_os_error(libc::EINVAL).into())
1165                    }
1166                }
1167                // Any other whence value is invalid for directories
1168                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1169            }
1170        } else {
1171            // Keep the original lseek behavior for regular files
1172            // Delegate directly to the underlying layer
1173            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1174            layer
1175                .lseek(req, real_inode, real_handle, offset, whence)
1176                .await
1177        }
1178    }
1179
1180    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
1181        Ok(())
1182    }
1183}
1184#[cfg(test)]
1185mod tests {
1186    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1187
1188    use rfuse3::{MountOptions, raw::Session};
1189    use tokio::signal;
1190    use tracing_subscriber::EnvFilter;
1191
1192    use crate::{
1193        overlayfs::{OverlayFs, config::Config},
1194        passthrough::{PassthroughArgs, new_passthroughfs_layer},
1195    };
1196    use rfuse3::raw::logfs::LoggingFileSystem;
1197
1198    #[tokio::test]
1199    #[ignore]
1200    async fn test_a_ovlfs() {
1201        let _ = tracing_subscriber::fmt()
1202            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1203            .try_init();
1204
1205        // Set up test environment
1206        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1207        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1208        let upperdir = PathBuf::from("/home/luxian/upper");
1209
1210        // Create lower layers
1211        let mut lower_layers = Vec::new();
1212        for lower in &lowerdir {
1213            let layer = new_passthroughfs_layer(PassthroughArgs {
1214                root_dir: lower.clone(),
1215                mapping: None::<&str>,
1216            })
1217            .await
1218            .unwrap();
1219            lower_layers.push(Arc::new(layer));
1220        }
1221        // Create upper layer
1222        let upper_layer = Arc::new(
1223            new_passthroughfs_layer(PassthroughArgs {
1224                root_dir: upperdir,
1225                mapping: None::<&str>,
1226            })
1227            .await
1228            .unwrap(),
1229        );
1230        // Create overlayfs
1231        let config = Config {
1232            mountpoint: mountpoint.clone(),
1233            do_import: true,
1234            ..Default::default()
1235        };
1236
1237        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1238
1239        let logfs = LoggingFileSystem::new(overlayfs);
1240
1241        let mount_path: OsString = OsString::from(mountpoint);
1242
1243        let uid = unsafe { libc::getuid() };
1244        let gid = unsafe { libc::getgid() };
1245
1246        let not_unprivileged = false;
1247
1248        let mut mount_options = MountOptions::default();
1249        // .allow_other(true)
1250        #[cfg(target_os = "linux")]
1251        mount_options.force_readdir_plus(true);
1252        mount_options.uid(uid).gid(gid);
1253
1254        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1255            Session::new(mount_options)
1256                .mount_with_unprivileged(logfs, mount_path)
1257                .await
1258                .unwrap()
1259        } else {
1260            Session::new(mount_options)
1261                .mount(logfs, mount_path)
1262                .await
1263                .unwrap()
1264        };
1265
1266        let handle = &mut mount_handle;
1267
1268        tokio::select! {
1269            res = handle => res.unwrap(),
1270            _ = signal::ctrl_c() => {
1271                mount_handle.unmount().await.unwrap()
1272            }
1273        }
1274    }
1275}