Skip to main content

libfuse_fs/overlayfs/
async_io.rs

1use super::Inode;
2use super::OverlayFs;
3use super::utils;
4use crate::overlayfs::HandleData;
5use crate::overlayfs::RealHandle;
6use crate::overlayfs::{AtomicU64, CachePolicy};
7use crate::util::open_options::OpenOptions;
8use rfuse3::raw::prelude::*;
9use rfuse3::*;
10use std::ffi::OsStr;
11use std::io::Error;
12use std::io::ErrorKind;
13use std::num::NonZeroU32;
14use std::sync::Arc;
15use std::sync::atomic::Ordering;
16use tokio::sync::Mutex;
17use tracing::info;
18use tracing::trace;
19
20impl Filesystem for OverlayFs {
21    /// initialize filesystem. Called before any other filesystem method.
22    async fn init(&self, _req: Request) -> Result<ReplyInit> {
23        if self.config.do_import {
24            self.import().await?;
25        }
26        #[cfg(target_os = "linux")]
27        {
28            for layer in self.lower_layers.iter() {
29                layer.init(_req).await?;
30            }
31            if let Some(upper) = &self.upper_layer {
32                upper.init(_req).await?;
33            }
34        }
35        if !self.config.do_import || self.config.writeback {
36            self.writeback.store(true, Ordering::Relaxed);
37        }
38        if !self.config.do_import || self.config.no_open {
39            self.no_open.store(true, Ordering::Relaxed);
40        }
41        if !self.config.do_import || self.config.no_opendir {
42            self.no_opendir.store(true, Ordering::Relaxed);
43        }
44        if !self.config.do_import || self.config.killpriv_v2 {
45            self.killpriv_v2.store(true, Ordering::Relaxed);
46        }
47        if self.config.perfile_dax {
48            self.perfile_dax.store(true, Ordering::Relaxed);
49        }
50
51        Ok(ReplyInit {
52            max_write: NonZeroU32::new(128 * 1024).unwrap(),
53        })
54    }
55
56    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
57    /// kernel may call forget for root. There is some discuss for this
58    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
59    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
60    async fn destroy(&self, _req: Request) {}
61
62    /// look up a directory entry by name and get its attributes.
63    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
64        let tmp = name.to_string_lossy().to_string();
65        let result = self.do_lookup(req, parent, tmp.as_str()).await;
66        match result {
67            Ok(e) => Ok(e),
68            Err(err) => Err(err.into()),
69        }
70    }
71
72    /// forget an inode. The nlookup parameter indicates the number of lookups previously
73    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
74    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
75    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
76    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
77    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
78    /// request for root and this library will stop session after call forget. There is some
79    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
80    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
81    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
82        self.forget_one(inode, nlookup).await;
83    }
84
85    /// get file attributes. If `fh` is None, means `fh` is not set.
86    async fn getattr(
87        &self,
88        req: Request,
89        inode: Inode,
90        fh: Option<u64>,
91        flags: u32,
92    ) -> Result<ReplyAttr> {
93        if !self.no_open.load(Ordering::Relaxed)
94            && let Some(h) = fh
95        {
96            let handles = self.handles.lock().await;
97            if let Some(hd) = handles.get(&h)
98                && let Some(ref rh) = hd.real_handle
99            {
100                let mut rep: ReplyAttr = rh
101                    .layer
102                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
103                    .await?;
104                rep.attr.ino = inode;
105                return Ok(rep);
106            }
107        }
108
109        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
110        let (layer, _, lower_inode) = node.first_layer_inode().await;
111        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
112        re.attr.ino = inode;
113        Ok(re)
114    }
115
116    /// set file attributes. If `fh` is None, means `fh` is not set.
117    async fn setattr(
118        &self,
119        req: Request,
120        inode: Inode,
121        fh: Option<u64>,
122        set_attr: SetAttr,
123    ) -> Result<ReplyAttr> {
124        // Check if upper layer exists.
125        self.upper_layer
126            .as_ref()
127            .cloned()
128            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
129
130        // deal with handle first
131        if !self.no_open.load(Ordering::Relaxed)
132            && let Some(h) = fh
133        {
134            let handles = self.handles.lock().await;
135            if let Some(hd) = handles.get(&h)
136                && let Some(ref rhd) = hd.real_handle
137            {
138                // handle opened in upper layer
139                if rhd.in_upper_layer {
140                    let mut rep = rhd
141                        .layer
142                        .setattr(
143                            req,
144                            rhd.inode,
145                            Some(rhd.handle.load(Ordering::Relaxed)),
146                            set_attr,
147                        )
148                        .await?;
149                    rep.attr.ino = inode;
150                    return Ok(rep);
151                }
152            }
153        }
154
155        let mut node = self.lookup_node(req, inode, "").await?;
156
157        if !node.in_upper_layer().await {
158            node = self.copy_node_up(req, node.clone()).await?
159        }
160
161        let (layer, _, real_inode) = node.first_layer_inode().await;
162        // layer.setattr(req, real_inode, None, set_attr).await
163        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
164        rep.attr.ino = inode;
165        Ok(rep)
166    }
167
168    /// read symbolic link.
169    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
170        trace!("READLINK: inode: {inode}\n");
171
172        let node = self.lookup_node(req, inode, "").await?;
173
174        if node.whiteout.load(Ordering::Relaxed) {
175            return Err(Error::from_raw_os_error(libc::ENOENT).into());
176        }
177
178        let (layer, _, inode) = node.first_layer_inode().await;
179        layer.readlink(req, inode).await
180    }
181
182    /// create a symbolic link.
183    async fn symlink(
184        &self,
185        req: Request,
186        parent: Inode,
187        name: &OsStr,
188        link: &OsStr,
189    ) -> Result<ReplyEntry> {
190        // soft link
191        let sname = name.to_string_lossy().into_owned().to_owned();
192        let slinkname = link.to_string_lossy().into_owned().to_owned();
193
194        let pnode = self.lookup_node(req, parent, "").await?;
195        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
196            .await?;
197
198        self.do_lookup(req, parent, sname.as_str())
199            .await
200            .map_err(|e| e.into())
201    }
202
203    /// create file node. Create a regular file, character device, block device, fifo or socket
204    /// node. When creating file, most cases user only need to implement
205    /// [`create`][Filesystem::create].
206    async fn mknod(
207        &self,
208        req: Request,
209        parent: Inode,
210        name: &OsStr,
211        mode: u32,
212        rdev: u32,
213    ) -> Result<ReplyEntry> {
214        let sname = name.to_string_lossy().to_string();
215
216        // Check if parent exists.
217        let pnode = self.lookup_node(req, parent, "").await?;
218        if pnode.whiteout.load(Ordering::Relaxed) {
219            return Err(Error::from_raw_os_error(libc::ENOENT).into());
220        }
221
222        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
223            .await?;
224        self.do_lookup(req, parent, sname.as_str())
225            .await
226            .map_err(|e| e.into())
227    }
228
229    /// create a directory.
230    async fn mkdir(
231        &self,
232        req: Request,
233        parent: Inode,
234        name: &OsStr,
235        mode: u32,
236        umask: u32,
237    ) -> Result<ReplyEntry> {
238        let sname = name.to_string_lossy().to_string();
239
240        // no entry or whiteout
241        let pnode = self.lookup_node(req, parent, "").await?;
242        if pnode.whiteout.load(Ordering::Relaxed) {
243            return Err(Error::from_raw_os_error(libc::ENOENT).into());
244        }
245
246        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
247            .await?;
248        self.do_lookup(req, parent, sname.as_str())
249            .await
250            .map_err(|e| e.into())
251    }
252
253    /// remove a file.
254    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
255        self.do_rm(req, parent, name, false)
256            .await
257            .map_err(|e| e.into())
258    }
259
260    /// remove a directory.
261    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
262        self.do_rm(req, parent, name, true)
263            .await
264            .map_err(|e| e.into())
265    }
266
267    /// rename a file or directory.
268    async fn rename(
269        &self,
270        req: Request,
271        parent: Inode,
272        name: &OsStr,
273        new_parent: Inode,
274        new_name: &OsStr,
275    ) -> Result<()> {
276        self.do_rename(req, parent, name, new_parent, new_name)
277            .await
278            .map_err(|e| e.into())
279    }
280
281    /// create a hard link.
282    async fn link(
283        &self,
284        req: Request,
285        inode: Inode,
286        new_parent: Inode,
287        new_name: &OsStr,
288    ) -> Result<ReplyEntry> {
289        let node = self.lookup_node(req, inode, "").await?;
290        if node.whiteout.load(Ordering::Relaxed) {
291            return Err(Error::from_raw_os_error(libc::ENOENT).into());
292        }
293
294        let newpnode = self.lookup_node(req, new_parent, "").await?;
295        if newpnode.whiteout.load(Ordering::Relaxed) {
296            return Err(Error::from_raw_os_error(libc::ENOENT).into());
297        }
298        let new_name = new_name.to_str().unwrap();
299        // trace!(
300        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
301        //     inode, new_parent, node.inode, newpnode.inode
302        // );
303        self.do_link(req, &node, &newpnode, new_name).await?;
304        // trace!("LINK: done, looking up new entry");
305        self.do_lookup(req, new_parent, new_name)
306            .await
307            .map_err(|e| e.into())
308    }
309
310    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
311    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
312    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
313    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
314    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
315    /// the file is opened. A filesystem need not implement this method if it
316    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
317    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
318    ///
319    /// # Notes:
320    ///
321    /// See `fuse_file_info` structure in
322    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
323    /// more details.
324    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
325        if self.no_open.load(Ordering::Relaxed) {
326            info!("fuse: open is not supported.");
327            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
328        }
329
330        let readonly: bool = flags
331            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
332                as u32
333            == 0;
334        // toggle flags
335        let mut flags: i32 = flags as i32;
336
337        flags |= libc::O_NOFOLLOW;
338
339        if self.config.writeback {
340            if flags & libc::O_ACCMODE == libc::O_WRONLY {
341                flags &= !libc::O_ACCMODE;
342                flags |= libc::O_RDWR;
343            }
344
345            if flags & libc::O_APPEND != 0 {
346                flags &= !libc::O_APPEND;
347            }
348        }
349        // lookup node
350        let node = self.lookup_node(req, inode, "").await?;
351
352        // whiteout node
353        if node.whiteout.load(Ordering::Relaxed) {
354            return Err(Error::from_raw_os_error(libc::ENOENT).into());
355        }
356
357        if !readonly {
358            // copy up to upper layer
359            self.copy_node_up(req, node.clone()).await?;
360        }
361
362        // assign a handle in overlayfs and open it
363        let (_l, h) = node.open(req, flags as u32, 0).await?;
364
365        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
366        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
367        let handle_data = HandleData {
368            node: node.clone(),
369            real_handle: Some(RealHandle {
370                layer,
371                in_upper_layer,
372                inode,
373                handle: AtomicU64::new(h.fh),
374            }),
375            dir_snapshot: Mutex::new(None),
376        };
377
378        self.handles.lock().await.insert(hd, Arc::new(handle_data));
379
380        let mut opts = OpenOptions::empty();
381        match self.config.cache_policy {
382            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
383            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
384            _ => {}
385        }
386
387        // trace!("OPEN: returning handle: {hd}");
388
389        Ok(ReplyOpen {
390            fh: hd,
391            flags: opts.bits(),
392        })
393    }
394
395    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
396    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
397    /// when the file has been opened in `direct_io` mode, in which case the return value of the
398    /// read system call will reflect the return value of this operation. `fh` will contain the
399    /// value set by the open method, or will be undefined if the open method didn't set any value.
400    async fn read(
401        &self,
402        req: Request,
403        inode: Inode,
404        fh: u64,
405        offset: u64,
406        size: u32,
407    ) -> Result<ReplyData> {
408        let data = self.get_data(req, Some(fh), inode, 0).await?;
409
410        match data.real_handle {
411            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
412            Some(ref hd) => {
413                hd.layer
414                    .read(
415                        req,
416                        hd.inode,
417                        hd.handle.load(Ordering::Relaxed),
418                        offset,
419                        size,
420                    )
421                    .await
422            }
423        }
424    }
425
426    /// write data. Write should return exactly the number of bytes requested except on error. An
427    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
428    /// return value of the write system call will reflect the return value of this operation. `fh`
429    /// will contain the value set by the open method, or will be undefined if the open method
430    /// didn't set any value. When `write_flags` contains
431    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
432    /// delay write.
433    #[allow(clippy::too_many_arguments)]
434    async fn write(
435        &self,
436        req: Request,
437        inode: Inode,
438        fh: u64,
439        offset: u64,
440        data: &[u8],
441        write_flags: u32,
442        flags: u32,
443    ) -> Result<ReplyWrite> {
444        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
445
446        match handle_data.real_handle {
447            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
448            Some(ref hd) => {
449                hd.layer
450                    .write(
451                        req,
452                        hd.inode,
453                        hd.handle.load(Ordering::Relaxed),
454                        offset,
455                        data,
456                        write_flags,
457                        flags,
458                    )
459                    .await
460            }
461        }
462    }
463
464    /// Copy a range of data from one file to another. This can improve performance because it
465    /// reduces data copying: normally, data will be copied from FUSE server to kernel, then to
466    /// user-space, then to kernel, and finally sent back to FUSE server. By implementing this
467    /// method, data will only be copied internally within the FUSE server.
468    #[allow(clippy::too_many_arguments)]
469    async fn copy_file_range(
470        &self,
471        req: Request,
472        inode_in: Inode,
473        fh_in: u64,
474        offset_in: u64,
475        inode_out: Inode,
476        fh_out: u64,
477        offset_out: u64,
478        length: u64,
479        flags: u64,
480    ) -> Result<ReplyCopyFileRange> {
481        // Get handle data for source file
482        let data_in = self.get_data(req, Some(fh_in), inode_in, 0).await?;
483        let handle_in = match data_in.real_handle {
484            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
485            Some(ref hd) => hd,
486        };
487
488        // Get handle data for destination file
489        let data_out = self.get_data(req, Some(fh_out), inode_out, 0).await?;
490        let handle_out = match data_out.real_handle {
491            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
492            Some(ref hd) => hd,
493        };
494
495        // Both files must be on the same layer for copy_file_range to work
496        if !Arc::ptr_eq(&handle_in.layer, &handle_out.layer) {
497            // Different layers - return EXDEV to trigger fallback to read/write
498            return Err(Error::from_raw_os_error(libc::EXDEV).into());
499        }
500
501        // Delegate to the underlying PassthroughFs layer
502        handle_in
503            .layer
504            .copy_file_range(
505                req,
506                handle_in.inode,
507                handle_in.handle.load(Ordering::Relaxed),
508                offset_in,
509                handle_out.inode,
510                handle_out.handle.load(Ordering::Relaxed),
511                offset_out,
512                length,
513                flags,
514            )
515            .await
516    }
517
518    /// get filesystem statistics.
519    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
520        self.do_statvfs(req, inode).await.map_err(|e| e.into())
521    }
522
523    /// release an open file. Release is called when there are no more references to an open file:
524    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
525    /// there will be exactly one release call. The filesystem may reply with an error, but error
526    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
527    /// contain the value set by the open method, or will be undefined if the open method didn't
528    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
529    /// data or not when closing file.
530    async fn release(
531        &self,
532        req: Request,
533        _inode: Inode,
534        fh: u64,
535        flags: u32,
536        lock_owner: u64,
537        flush: bool,
538    ) -> Result<()> {
539        if self.no_open.load(Ordering::Relaxed) {
540            info!("fuse: release is not supported.");
541            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
542        }
543
544        if let Some(hd) = self.handles.lock().await.get(&fh) {
545            let rh = if let Some(ref h) = hd.real_handle {
546                h
547            } else {
548                return Err(
549                    Error::other(format!("no real handle found for file handle {fh}")).into(),
550                );
551            };
552            let real_handle = rh.handle.load(Ordering::Relaxed);
553            let real_inode = rh.inode;
554            rh.layer
555                .release(req, real_inode, real_handle, flags, lock_owner, flush)
556                .await?;
557        }
558
559        self.handles.lock().await.remove(&fh);
560
561        Ok(())
562    }
563
564    /// synchronize file contents. If the `datasync` is true, then only the user data should be
565    /// flushed, not the metadata.
566    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
567        self.do_fsync(req, inode, datasync, fh, false)
568            .await
569            .map_err(|e| e.into())
570    }
571
572    /// set an extended attribute.
573    async fn setxattr(
574        &self,
575        req: Request,
576        inode: Inode,
577        name: &OsStr,
578        value: &[u8],
579        flags: u32,
580        position: u32,
581    ) -> Result<()> {
582        let node = self.lookup_node(req, inode, "").await?;
583
584        if node.whiteout.load(Ordering::Relaxed) {
585            return Err(Error::from_raw_os_error(libc::ENOENT).into());
586        }
587
588        if !node.in_upper_layer().await {
589            // Copy node up.
590            self.copy_node_up(req, node.clone()).await?;
591        }
592
593        let (layer, _, real_inode) = node.first_layer_inode().await;
594
595        layer
596            .setxattr(req, real_inode, name, value, flags, position)
597            .await
598    }
599
600    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
601    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
602    /// return an error.
603    async fn getxattr(
604        &self,
605        req: Request,
606        inode: Inode,
607        name: &OsStr,
608        size: u32,
609    ) -> Result<ReplyXAttr> {
610        let node = self.lookup_node(req, inode, "").await?;
611
612        if node.whiteout.load(Ordering::Relaxed) {
613            return Err(Error::from_raw_os_error(libc::ENOENT).into());
614        }
615
616        let (layer, real_inode) = self.find_real_inode(inode).await?;
617
618        layer.getxattr(req, real_inode, name, size).await
619    }
620
621    /// List extended attribute names.
622    ///
623    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
624    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
625    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
626        let node = self.lookup_node(req, inode, "").await?;
627        if node.whiteout.load(Ordering::Relaxed) {
628            return Err(Error::from_raw_os_error(libc::ENOENT).into());
629        }
630        let (layer, real_inode) = self.find_real_inode(inode).await?;
631        layer.listxattr(req, real_inode, size).await
632    }
633
634    /// remove an extended attribute.
635    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
636        let node = self.lookup_node(req, inode, "").await?;
637
638        if node.whiteout.load(Ordering::Relaxed) {
639            return Err(Error::from_raw_os_error(libc::ENOENT).into());
640        }
641
642        if !node.in_upper_layer().await {
643            // copy node into upper layer
644            self.copy_node_up(req, node.clone()).await?;
645        }
646
647        let (layer, _, ino) = node.first_layer_inode().await;
648        layer.removexattr(req, ino, name).await
649
650        // TODO: recreate the node since removexattr may remove the opaque xattr.
651    }
652
653    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
654    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
655    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
656    /// will be called at all. `fh` will contain the value set by the open method, or will be
657    /// undefined if the open method didn't set any value.
658    ///
659    /// # Notes:
660    ///
661    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
662    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
663    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
664    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
665    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
666        if self.no_open.load(Ordering::Relaxed) {
667            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
668        }
669
670        let node = self.lookup_node(req, inode, "").await;
671        match node {
672            Ok(n) => {
673                if n.whiteout.load(Ordering::Relaxed) {
674                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
675                }
676            }
677            Err(e) => {
678                if e.raw_os_error() == Some(libc::ENOENT) {
679                    trace!("flush: inode {inode} is stale");
680                } else {
681                    return Err(e.into());
682                }
683            }
684        }
685
686        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
687
688        // FIXME: need to test if inode matches corresponding handle?
689        if inode
690            != self
691                .handles
692                .lock()
693                .await
694                .get(&fh)
695                .map(|h| h.node.inode)
696                .unwrap_or(0)
697        {
698            return Err(Error::other("inode does not match handle").into());
699        }
700
701        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
702        layer.flush(req, real_inode, real_handle, lock_owner).await
703    }
704
705    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
706    /// `fh`, and use this in other all other directory stream operations
707    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
708    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
709    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
710    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
711    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
712    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
713        if self.no_opendir.load(Ordering::Relaxed) {
714            info!("fuse: opendir is not supported.");
715            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
716        }
717
718        // lookup node
719        let node = self.lookup_node(req, inode, ".").await?;
720
721        if node.whiteout.load(Ordering::Relaxed) {
722            return Err(Error::from_raw_os_error(libc::ENOENT).into());
723        }
724
725        let st = node.stat64(req).await?;
726        if !utils::is_dir(&st.attr.kind) {
727            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
728        }
729
730        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
731        // Get the layer information and open directory in the underlying layer
732        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
733        let reply = layer.opendir(req, real_inode, flags).await?;
734
735        self.handles.lock().await.insert(
736            handle,
737            Arc::new(HandleData {
738                node: Arc::clone(&node),
739                real_handle: Some(RealHandle {
740                    layer,
741                    in_upper_layer,
742                    inode: real_inode,
743                    handle: AtomicU64::new(reply.fh),
744                }),
745                dir_snapshot: Mutex::new(None),
746            }),
747        );
748
749        Ok(ReplyOpen { fh: handle, flags })
750    }
751
752    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
753    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
754    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
755    async fn readdir<'a>(
756        &'a self,
757        req: Request,
758        parent: Inode,
759        fh: u64,
760        offset: i64,
761    ) -> Result<
762        ReplyDirectory<
763            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
764        >,
765    > {
766        if self.config.no_readdir {
767            info!("fuse: readdir is not supported.");
768            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
769        }
770        let entries = self
771            .do_readdir(req, parent, fh, offset.try_into().unwrap())
772            .await?;
773        Ok(ReplyDirectory { entries })
774    }
775
776    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
777    /// + [`lookup`][Filesystem::lookup] at the same time.
778    async fn readdirplus<'a>(
779        &'a self,
780        req: Request,
781        parent: Inode,
782        fh: u64,
783        offset: u64,
784        _lock_owner: u64,
785    ) -> Result<
786        ReplyDirectoryPlus<
787            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
788        >,
789    > {
790        if self.config.no_readdir {
791            info!("fuse: readdir is not supported.");
792            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
793        }
794        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
795        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
796        match self.handles.lock().await.get(&fh) {
797            Some(h) => {
798                trace!(
799                    "after readdirplus: found handle, seeing real_handle: {}",
800                    h.real_handle.is_some()
801                );
802            }
803            None => trace!("after readdirplus: no handle found: {fh}"),
804        }
805        Ok(ReplyDirectoryPlus { entries })
806    }
807    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
808    /// be exactly one `releasedir` call. `fh` will contain the value set by the
809    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
810    /// [`opendir`][Filesystem::opendir] method didn't set any value.
811    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
812        if self.no_opendir.load(Ordering::Relaxed) {
813            info!("fuse: releasedir is not supported.");
814            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
815        }
816
817        if let Some(hd) = self.handles.lock().await.get(&fh) {
818            let rh = if let Some(ref h) = hd.real_handle {
819                h
820            } else {
821                return Err(
822                    Error::other(format!("no real handle found for file handle {fh}")).into(),
823                );
824            };
825            let real_handle = rh.handle.load(Ordering::Relaxed);
826            let real_inode = rh.inode;
827            rh.layer
828                .releasedir(req, real_inode, real_handle, flags)
829                .await?;
830        }
831
832        self.handles.lock().await.remove(&fh);
833        Ok(())
834    }
835
836    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
837    /// should be flushed, not the metadata. `fh` will contain the value set by the
838    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
839    /// [`opendir`][Filesystem::opendir] method didn't set any value.
840    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
841        self.do_fsync(req, inode, datasync, fh, true)
842            .await
843            .map_err(|e| e.into())
844    }
845
846    #[allow(clippy::too_many_arguments)]
847    async fn getlk(
848        &self,
849        req: Request,
850        _inode: Inode,
851        fh: u64,
852        lock_owner: u64,
853        start: u64,
854        end: u64,
855        r#type: u32,
856        pid: u32,
857    ) -> Result<ReplyLock> {
858        if !self.no_open.load(Ordering::Relaxed) {
859            let handles = self.handles.lock().await;
860            if let Some(hd) = handles.get(&fh)
861                && let Some(ref rh) = hd.real_handle
862            {
863                match rh
864                    .layer
865                    .getlk(
866                        req,
867                        rh.inode,
868                        rh.handle.load(Ordering::Relaxed),
869                        lock_owner,
870                        start,
871                        end,
872                        r#type,
873                        pid,
874                    )
875                    .await
876                {
877                    Ok(reply) => return Ok(reply),
878                    Err(e) => {
879                        // If underlying layer doesn't support locking, fall through to fallback
880                        let errno: i32 = e.into();
881                        if errno != libc::ENOSYS {
882                            return Err(errno.into());
883                        }
884                    }
885                }
886            }
887        }
888
889        // Fallback: report no lock conflict
890        Ok(ReplyLock {
891            start: 0,
892            end: 0,
893            r#type: libc::F_UNLCK as u32,
894            pid: 0,
895        })
896    }
897
898    #[allow(clippy::too_many_arguments)]
899    async fn setlk(
900        &self,
901        req: Request,
902        _inode: Inode,
903        fh: u64,
904        lock_owner: u64,
905        start: u64,
906        end: u64,
907        r#type: u32,
908        pid: u32,
909        block: bool,
910    ) -> Result<()> {
911        if !self.no_open.load(Ordering::Relaxed) {
912            let handles = self.handles.lock().await;
913            if let Some(hd) = handles.get(&fh)
914                && let Some(ref rh) = hd.real_handle
915            {
916                match rh
917                    .layer
918                    .setlk(
919                        req,
920                        rh.inode,
921                        rh.handle.load(Ordering::Relaxed),
922                        lock_owner,
923                        start,
924                        end,
925                        r#type,
926                        pid,
927                        block,
928                    )
929                    .await
930                {
931                    Ok(()) => return Ok(()),
932                    Err(e) => {
933                        // If underlying layer doesn't support locking, fall through to fallback
934                        let errno: i32 = e.into();
935                        if errno != libc::ENOSYS {
936                            return Err(errno.into());
937                        }
938                    }
939                }
940            }
941        }
942
943        // Fallback: silently accept the lock request
944        Ok(())
945    }
946    /// check file access permissions. This will be called for the `access()` system call. If the
947    /// `default_permissions` mount option is given, this method is not be called. This method is
948    /// not called under Linux kernel versions 2.4.x.
949    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
950        let node = self.lookup_node(req, inode, "").await?;
951
952        if node.whiteout.load(Ordering::Relaxed) {
953            return Err(Error::from_raw_os_error(libc::ENOENT).into());
954        }
955
956        let (layer, real_inode) = self.find_real_inode(inode).await?;
957        layer.access(req, real_inode, mask).await
958    }
959
960    /// create and open a file. If the file does not exist, first create it with the specified
961    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
962    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
963    /// this in other all other file operations ([`read`][Filesystem::read],
964    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
965    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
966    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
967    /// opened. If this method is not implemented or under Linux kernel versions earlier than
968    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
969    /// called instead.
970    ///
971    /// # Notes:
972    ///
973    /// See `fuse_file_info` structure in
974    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
975    /// more details.
976    async fn create(
977        &self,
978        req: Request,
979        parent: Inode,
980        name: &OsStr,
981        mode: u32,
982        flags: u32,
983    ) -> Result<ReplyCreated> {
984        // Parent doesn't exist.
985        let pnode = self.lookup_node(req, parent, "").await?;
986        if pnode.whiteout.load(Ordering::Relaxed) {
987            return Err(Error::from_raw_os_error(libc::ENOENT).into());
988        }
989
990        let mut flags: i32 = flags as i32;
991        flags |= libc::O_NOFOLLOW;
992        #[cfg(target_os = "linux")]
993        {
994            flags &= !libc::O_DIRECT;
995        }
996        if self.config.writeback {
997            if flags & libc::O_ACCMODE == libc::O_WRONLY {
998                flags &= !libc::O_ACCMODE;
999                flags |= libc::O_RDWR;
1000            }
1001
1002            if flags & libc::O_APPEND != 0 {
1003                flags &= !libc::O_APPEND;
1004            }
1005        }
1006
1007        let final_handle = self
1008            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
1009            .await?;
1010        let entry = self.do_lookup(req, parent, name.to_str().unwrap()).await?;
1011        let fh = final_handle
1012            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
1013
1014        let mut opts = OpenOptions::empty();
1015        match self.config.cache_policy {
1016            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
1017            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
1018            _ => {}
1019        }
1020
1021        Ok(ReplyCreated {
1022            ttl: entry.ttl,
1023            attr: entry.attr,
1024            generation: entry.generation,
1025            fh,
1026            flags: opts.bits(),
1027        })
1028    }
1029
1030    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
1031    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
1032        for inode in inodes {
1033            self.forget_one(inode.0, inode.1).await;
1034        }
1035    }
1036
1037    /// allocate space for an open file. This function ensures that required space is allocated for
1038    /// specified file.
1039    ///
1040    /// # Notes:
1041    ///
1042    /// more information about `fallocate`, please see **`man 2 fallocate`**
1043    async fn fallocate(
1044        &self,
1045        req: Request,
1046        inode: Inode,
1047        fh: u64,
1048        offset: u64,
1049        length: u64,
1050        mode: u32,
1051    ) -> Result<()> {
1052        // Use O_RDONLY flags which indicates no copy up.
1053        let data = self
1054            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
1055            .await?;
1056
1057        match data.real_handle {
1058            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
1059            Some(ref rhd) => {
1060                if !rhd.in_upper_layer {
1061                    // TODO: in lower layer, error out or just success?
1062                    return Err(Error::from_raw_os_error(libc::EROFS).into());
1063                }
1064                rhd.layer
1065                    .fallocate(
1066                        req,
1067                        rhd.inode,
1068                        rhd.handle.load(Ordering::Relaxed),
1069                        offset,
1070                        length,
1071                        mode,
1072                    )
1073                    .await
1074            }
1075        }
1076    }
1077
1078    /// find next data or hole after the specified offset.
1079    async fn lseek(
1080        &self,
1081        req: Request,
1082        inode: Inode,
1083        fh: u64,
1084        offset: u64,
1085        whence: u32,
1086    ) -> Result<ReplyLSeek> {
1087        let node = self.lookup_node(req, inode, "").await?;
1088
1089        if node.whiteout.load(Ordering::Relaxed) {
1090            return Err(Error::from_raw_os_error(libc::ENOENT).into());
1091        }
1092
1093        let st = node.stat64(req).await?;
1094        if utils::is_dir(&st.attr.kind) {
1095            // Special handling and security restrictions for directory operations.
1096            // Use the common API to obtain the underlying layer and handle info.
1097            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1098
1099            // Verify that the underlying handle refers to a directory.
1100            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
1101                Ok(s) => s,
1102                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
1103            };
1104
1105            if !utils::is_dir(&handle_stat.attr.kind) {
1106                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
1107            }
1108
1109            // Handle directory lseek operations according to POSIX standard
1110            // This enables seekdir/telldir functionality on directories
1111            match whence {
1112                // SEEK_SET: Set the directory position to an absolute value
1113                x if x == libc::SEEK_SET as u32 => {
1114                    // Validate offset bounds to prevent overflow
1115                    // Directory offsets should not exceed i64::MAX
1116                    if offset > i64::MAX as u64 {
1117                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
1118                    }
1119
1120                    // Perform the seek operation on the underlying layer
1121                    // Delegate to the lower layer implementation
1122                    layer
1123                        .lseek(req, real_inode, real_handle, offset, whence)
1124                        .await
1125                }
1126                // SEEK_CUR: Move relative to the current directory position
1127                x if x == libc::SEEK_CUR as u32 => {
1128                    // Get current position from underlying layer
1129                    // This is needed to calculate the new position
1130                    let current = match layer
1131                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
1132                        .await
1133                    {
1134                        Ok(r) => r.offset,
1135                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
1136                    };
1137
1138                    // Check for potential overflow when adding the provided offset
1139                    // This prevents invalid position calculations
1140                    if let Some(new_offset) = current.checked_add(offset) {
1141                        // Ensure the new offset is within valid bounds
1142                        if new_offset > i64::MAX as u64 {
1143                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
1144                        }
1145
1146                        // Actually set the underlying offset to the new value so behavior
1147                        // matches passthrough which uses libc::lseek64 to set the fd offset.
1148                        match layer
1149                            .lseek(
1150                                req,
1151                                real_inode,
1152                                real_handle,
1153                                new_offset,
1154                                libc::SEEK_SET as u32,
1155                            )
1156                            .await
1157                        {
1158                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
1159                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1160                        }
1161                    } else {
1162                        Err(Error::from_raw_os_error(libc::EINVAL).into())
1163                    }
1164                }
1165                // Any other whence value is invalid for directories
1166                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1167            }
1168        } else {
1169            // Keep the original lseek behavior for regular files
1170            // Delegate directly to the underlying layer
1171            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1172            layer
1173                .lseek(req, real_inode, real_handle, offset, whence)
1174                .await
1175        }
1176    }
1177
1178    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
1179        Ok(())
1180    }
1181}
1182#[cfg(test)]
1183mod tests {
1184    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1185
1186    use rfuse3::{MountOptions, raw::Session};
1187    use tokio::signal;
1188    use tracing_subscriber::EnvFilter;
1189
1190    use crate::{
1191        overlayfs::{OverlayFs, config::Config},
1192        passthrough::{PassthroughArgs, new_passthroughfs_layer},
1193    };
1194    use rfuse3::raw::logfs::LoggingFileSystem;
1195
1196    #[tokio::test]
1197    #[ignore]
1198    async fn test_a_ovlfs() {
1199        let _ = tracing_subscriber::fmt()
1200            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1201            .try_init();
1202
1203        // Set up test environment
1204        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1205        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1206        let upperdir = PathBuf::from("/home/luxian/upper");
1207
1208        // Create lower layers
1209        let mut lower_layers = Vec::new();
1210        for lower in &lowerdir {
1211            let layer = new_passthroughfs_layer(PassthroughArgs {
1212                root_dir: lower.clone(),
1213                mapping: None::<&str>,
1214            })
1215            .await
1216            .unwrap();
1217            lower_layers.push(Arc::new(layer));
1218        }
1219        // Create upper layer
1220        let upper_layer = Arc::new(
1221            new_passthroughfs_layer(PassthroughArgs {
1222                root_dir: upperdir,
1223                mapping: None::<&str>,
1224            })
1225            .await
1226            .unwrap(),
1227        );
1228        // Create overlayfs
1229        let config = Config {
1230            mountpoint: mountpoint.clone(),
1231            do_import: true,
1232            ..Default::default()
1233        };
1234
1235        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1236
1237        let logfs = LoggingFileSystem::new(overlayfs);
1238
1239        let mount_path: OsString = OsString::from(mountpoint);
1240
1241        let uid = unsafe { libc::getuid() };
1242        let gid = unsafe { libc::getgid() };
1243
1244        let not_unprivileged = false;
1245
1246        let mut mount_options = MountOptions::default();
1247        // .allow_other(true)
1248        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
1249
1250        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1251            Session::new(mount_options)
1252                .mount_with_unprivileged(logfs, mount_path)
1253                .await
1254                .unwrap()
1255        } else {
1256            Session::new(mount_options)
1257                .mount(logfs, mount_path)
1258                .await
1259                .unwrap()
1260        };
1261
1262        let handle = &mut mount_handle;
1263
1264        tokio::select! {
1265            res = handle => res.unwrap(),
1266            _ = signal::ctrl_c() => {
1267                mount_handle.unmount().await.unwrap()
1268            }
1269        }
1270    }
1271}