Skip to main content

libfuse_fs/overlayfs/
async_io.rs

1use super::Inode;
2use super::OverlayFs;
3use super::utils;
4use crate::overlayfs::HandleData;
5use crate::overlayfs::RealHandle;
6use crate::overlayfs::{AtomicU64, CachePolicy};
7use crate::util::open_options::OpenOptions;
8use rfuse3::raw::prelude::*;
9use rfuse3::*;
10use std::ffi::OsStr;
11use std::io::Error;
12use std::io::ErrorKind;
13use std::num::NonZeroU32;
14use std::sync::Arc;
15use std::sync::atomic::Ordering;
16use tokio::sync::Mutex;
17use tracing::info;
18use tracing::trace;
19
20impl Filesystem for OverlayFs {
21    /// initialize filesystem. Called before any other filesystem method.
22    async fn init(&self, _req: Request) -> Result<ReplyInit> {
23        if self.config.do_import {
24            self.import().await?;
25        }
26        if !self.config.do_import || self.config.writeback {
27            self.writeback.store(true, Ordering::Relaxed);
28        }
29        if !self.config.do_import || self.config.no_open {
30            self.no_open.store(true, Ordering::Relaxed);
31        }
32        if !self.config.do_import || self.config.no_opendir {
33            self.no_opendir.store(true, Ordering::Relaxed);
34        }
35        if !self.config.do_import || self.config.killpriv_v2 {
36            self.killpriv_v2.store(true, Ordering::Relaxed);
37        }
38        if self.config.perfile_dax {
39            self.perfile_dax.store(true, Ordering::Relaxed);
40        }
41
42        Ok(ReplyInit {
43            max_write: NonZeroU32::new(128 * 1024).unwrap(),
44        })
45    }
46
47    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
48    /// kernel may call forget for root. There is some discuss for this
49    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
50    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
51    async fn destroy(&self, _req: Request) {}
52
53    /// look up a directory entry by name and get its attributes.
54    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
55        let tmp = name.to_string_lossy().to_string();
56        let result = self.do_lookup(req, parent, tmp.as_str()).await;
57        match result {
58            Ok(e) => Ok(e),
59            Err(err) => Err(err.into()),
60        }
61    }
62
63    /// forget an inode. The nlookup parameter indicates the number of lookups previously
64    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
65    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
66    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
67    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
68    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
69    /// request for root and this library will stop session after call forget. There is some
70    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
71    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
72    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
73        self.forget_one(inode, nlookup).await;
74    }
75
76    /// get file attributes. If `fh` is None, means `fh` is not set.
77    async fn getattr(
78        &self,
79        req: Request,
80        inode: Inode,
81        fh: Option<u64>,
82        flags: u32,
83    ) -> Result<ReplyAttr> {
84        if !self.no_open.load(Ordering::Relaxed)
85            && let Some(h) = fh
86        {
87            let handles = self.handles.lock().await;
88            if let Some(hd) = handles.get(&h)
89                && let Some(ref rh) = hd.real_handle
90            {
91                let mut rep: ReplyAttr = rh
92                    .layer
93                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
94                    .await?;
95                rep.attr.ino = inode;
96                return Ok(rep);
97            }
98        }
99
100        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
101        let (layer, _, lower_inode) = node.first_layer_inode().await;
102        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
103        re.attr.ino = inode;
104        Ok(re)
105    }
106
107    /// set file attributes. If `fh` is None, means `fh` is not set.
108    async fn setattr(
109        &self,
110        req: Request,
111        inode: Inode,
112        fh: Option<u64>,
113        set_attr: SetAttr,
114    ) -> Result<ReplyAttr> {
115        // Check if upper layer exists.
116        self.upper_layer
117            .as_ref()
118            .cloned()
119            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
120
121        // deal with handle first
122        if !self.no_open.load(Ordering::Relaxed)
123            && let Some(h) = fh
124        {
125            let handles = self.handles.lock().await;
126            if let Some(hd) = handles.get(&h)
127                && let Some(ref rhd) = hd.real_handle
128            {
129                // handle opened in upper layer
130                if rhd.in_upper_layer {
131                    let mut rep = rhd
132                        .layer
133                        .setattr(
134                            req,
135                            rhd.inode,
136                            Some(rhd.handle.load(Ordering::Relaxed)),
137                            set_attr,
138                        )
139                        .await?;
140                    rep.attr.ino = inode;
141                    return Ok(rep);
142                }
143            }
144        }
145
146        let mut node = self.lookup_node(req, inode, "").await?;
147
148        if !node.in_upper_layer().await {
149            node = self.copy_node_up(req, node.clone()).await?
150        }
151
152        let (layer, _, real_inode) = node.first_layer_inode().await;
153        // layer.setattr(req, real_inode, None, set_attr).await
154        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
155        rep.attr.ino = inode;
156        Ok(rep)
157    }
158
159    /// read symbolic link.
160    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
161        trace!("READLINK: inode: {inode}\n");
162
163        let node = self.lookup_node(req, inode, "").await?;
164
165        if node.whiteout.load(Ordering::Relaxed) {
166            return Err(Error::from_raw_os_error(libc::ENOENT).into());
167        }
168
169        let (layer, _, inode) = node.first_layer_inode().await;
170        layer.readlink(req, inode).await
171    }
172
173    /// create a symbolic link.
174    async fn symlink(
175        &self,
176        req: Request,
177        parent: Inode,
178        name: &OsStr,
179        link: &OsStr,
180    ) -> Result<ReplyEntry> {
181        // soft link
182        let sname = name.to_string_lossy().into_owned().to_owned();
183        let slinkname = link.to_string_lossy().into_owned().to_owned();
184
185        let pnode = self.lookup_node(req, parent, "").await?;
186        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
187            .await?;
188
189        self.do_lookup(req, parent, sname.as_str())
190            .await
191            .map_err(|e| e.into())
192    }
193
194    /// create file node. Create a regular file, character device, block device, fifo or socket
195    /// node. When creating file, most cases user only need to implement
196    /// [`create`][Filesystem::create].
197    async fn mknod(
198        &self,
199        req: Request,
200        parent: Inode,
201        name: &OsStr,
202        mode: u32,
203        rdev: u32,
204    ) -> Result<ReplyEntry> {
205        let sname = name.to_string_lossy().to_string();
206
207        // Check if parent exists.
208        let pnode = self.lookup_node(req, parent, "").await?;
209        if pnode.whiteout.load(Ordering::Relaxed) {
210            return Err(Error::from_raw_os_error(libc::ENOENT).into());
211        }
212
213        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
214            .await?;
215        self.do_lookup(req, parent, sname.as_str())
216            .await
217            .map_err(|e| e.into())
218    }
219
220    /// create a directory.
221    async fn mkdir(
222        &self,
223        req: Request,
224        parent: Inode,
225        name: &OsStr,
226        mode: u32,
227        umask: u32,
228    ) -> Result<ReplyEntry> {
229        let sname = name.to_string_lossy().to_string();
230
231        // no entry or whiteout
232        let pnode = self.lookup_node(req, parent, "").await?;
233        if pnode.whiteout.load(Ordering::Relaxed) {
234            return Err(Error::from_raw_os_error(libc::ENOENT).into());
235        }
236
237        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
238            .await?;
239        self.do_lookup(req, parent, sname.as_str())
240            .await
241            .map_err(|e| e.into())
242    }
243
244    /// remove a file.
245    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
246        self.do_rm(req, parent, name, false)
247            .await
248            .map_err(|e| e.into())
249    }
250
251    /// remove a directory.
252    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
253        self.do_rm(req, parent, name, true)
254            .await
255            .map_err(|e| e.into())
256    }
257
258    /// rename a file or directory.
259    async fn rename(
260        &self,
261        req: Request,
262        parent: Inode,
263        name: &OsStr,
264        new_parent: Inode,
265        new_name: &OsStr,
266    ) -> Result<()> {
267        self.do_rename(req, parent, name, new_parent, new_name)
268            .await
269            .map_err(|e| e.into())
270    }
271
272    /// create a hard link.
273    async fn link(
274        &self,
275        req: Request,
276        inode: Inode,
277        new_parent: Inode,
278        new_name: &OsStr,
279    ) -> Result<ReplyEntry> {
280        let node = self.lookup_node(req, inode, "").await?;
281        if node.whiteout.load(Ordering::Relaxed) {
282            return Err(Error::from_raw_os_error(libc::ENOENT).into());
283        }
284
285        let newpnode = self.lookup_node(req, new_parent, "").await?;
286        if newpnode.whiteout.load(Ordering::Relaxed) {
287            return Err(Error::from_raw_os_error(libc::ENOENT).into());
288        }
289        let new_name = new_name.to_str().unwrap();
290        // trace!(
291        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
292        //     inode, new_parent, node.inode, newpnode.inode
293        // );
294        self.do_link(req, &node, &newpnode, new_name).await?;
295        // trace!("LINK: done, looking up new entry");
296        self.do_lookup(req, new_parent, new_name)
297            .await
298            .map_err(|e| e.into())
299    }
300
301    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
302    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
303    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
304    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
305    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
306    /// the file is opened. A filesystem need not implement this method if it
307    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
308    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
309    ///
310    /// # Notes:
311    ///
312    /// See `fuse_file_info` structure in
313    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
314    /// more details.
315    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
316        if self.no_open.load(Ordering::Relaxed) {
317            info!("fuse: open is not supported.");
318            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
319        }
320
321        let readonly: bool = flags
322            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
323                as u32
324            == 0;
325        // toggle flags
326        let mut flags: i32 = flags as i32;
327
328        flags |= libc::O_NOFOLLOW;
329
330        if self.config.writeback {
331            if flags & libc::O_ACCMODE == libc::O_WRONLY {
332                flags &= !libc::O_ACCMODE;
333                flags |= libc::O_RDWR;
334            }
335
336            if flags & libc::O_APPEND != 0 {
337                flags &= !libc::O_APPEND;
338            }
339        }
340        // lookup node
341        let node = self.lookup_node(req, inode, "").await?;
342
343        // whiteout node
344        if node.whiteout.load(Ordering::Relaxed) {
345            return Err(Error::from_raw_os_error(libc::ENOENT).into());
346        }
347
348        if !readonly {
349            // copy up to upper layer
350            self.copy_node_up(req, node.clone()).await?;
351        }
352
353        // assign a handle in overlayfs and open it
354        let (_l, h) = node.open(req, flags as u32, 0).await?;
355
356        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
357        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
358        let handle_data = HandleData {
359            node: node.clone(),
360            real_handle: Some(RealHandle {
361                layer,
362                in_upper_layer,
363                inode,
364                handle: AtomicU64::new(h.fh),
365            }),
366            dir_snapshot: Mutex::new(None),
367        };
368
369        self.handles.lock().await.insert(hd, Arc::new(handle_data));
370
371        let mut opts = OpenOptions::empty();
372        match self.config.cache_policy {
373            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
374            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
375            _ => {}
376        }
377
378        // trace!("OPEN: returning handle: {hd}");
379
380        Ok(ReplyOpen {
381            fh: hd,
382            flags: opts.bits(),
383        })
384    }
385
386    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
387    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
388    /// when the file has been opened in `direct_io` mode, in which case the return value of the
389    /// read system call will reflect the return value of this operation. `fh` will contain the
390    /// value set by the open method, or will be undefined if the open method didn't set any value.
391    async fn read(
392        &self,
393        req: Request,
394        inode: Inode,
395        fh: u64,
396        offset: u64,
397        size: u32,
398    ) -> Result<ReplyData> {
399        let data = self.get_data(req, Some(fh), inode, 0).await?;
400
401        match data.real_handle {
402            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
403            Some(ref hd) => {
404                hd.layer
405                    .read(
406                        req,
407                        hd.inode,
408                        hd.handle.load(Ordering::Relaxed),
409                        offset,
410                        size,
411                    )
412                    .await
413            }
414        }
415    }
416
417    /// write data. Write should return exactly the number of bytes requested except on error. An
418    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
419    /// return value of the write system call will reflect the return value of this operation. `fh`
420    /// will contain the value set by the open method, or will be undefined if the open method
421    /// didn't set any value. When `write_flags` contains
422    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
423    /// delay write.
424    #[allow(clippy::too_many_arguments)]
425    async fn write(
426        &self,
427        req: Request,
428        inode: Inode,
429        fh: u64,
430        offset: u64,
431        data: &[u8],
432        write_flags: u32,
433        flags: u32,
434    ) -> Result<ReplyWrite> {
435        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
436
437        match handle_data.real_handle {
438            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
439            Some(ref hd) => {
440                hd.layer
441                    .write(
442                        req,
443                        hd.inode,
444                        hd.handle.load(Ordering::Relaxed),
445                        offset,
446                        data,
447                        write_flags,
448                        flags,
449                    )
450                    .await
451            }
452        }
453    }
454
455    /// Copy a range of data from one file to another. This can improve performance because it
456    /// reduces data copying: normally, data will be copied from FUSE server to kernel, then to
457    /// user-space, then to kernel, and finally sent back to FUSE server. By implementing this
458    /// method, data will only be copied internally within the FUSE server.
459    #[allow(clippy::too_many_arguments)]
460    async fn copy_file_range(
461        &self,
462        req: Request,
463        inode_in: Inode,
464        fh_in: u64,
465        offset_in: u64,
466        inode_out: Inode,
467        fh_out: u64,
468        offset_out: u64,
469        length: u64,
470        flags: u64,
471    ) -> Result<ReplyCopyFileRange> {
472        // Get handle data for source file
473        let data_in = self.get_data(req, Some(fh_in), inode_in, 0).await?;
474        let handle_in = match data_in.real_handle {
475            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
476            Some(ref hd) => hd,
477        };
478
479        // Get handle data for destination file
480        let data_out = self.get_data(req, Some(fh_out), inode_out, 0).await?;
481        let handle_out = match data_out.real_handle {
482            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
483            Some(ref hd) => hd,
484        };
485
486        // Both files must be on the same layer for copy_file_range to work
487        if !Arc::ptr_eq(&handle_in.layer, &handle_out.layer) {
488            // Different layers - return EXDEV to trigger fallback to read/write
489            return Err(Error::from_raw_os_error(libc::EXDEV).into());
490        }
491
492        // Delegate to the underlying PassthroughFs layer
493        handle_in
494            .layer
495            .copy_file_range(
496                req,
497                handle_in.inode,
498                handle_in.handle.load(Ordering::Relaxed),
499                offset_in,
500                handle_out.inode,
501                handle_out.handle.load(Ordering::Relaxed),
502                offset_out,
503                length,
504                flags,
505            )
506            .await
507    }
508
509    /// get filesystem statistics.
510    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
511        self.do_statvfs(req, inode).await.map_err(|e| e.into())
512    }
513
514    /// release an open file. Release is called when there are no more references to an open file:
515    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
516    /// there will be exactly one release call. The filesystem may reply with an error, but error
517    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
518    /// contain the value set by the open method, or will be undefined if the open method didn't
519    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
520    /// data or not when closing file.
521    async fn release(
522        &self,
523        req: Request,
524        _inode: Inode,
525        fh: u64,
526        flags: u32,
527        lock_owner: u64,
528        flush: bool,
529    ) -> Result<()> {
530        if self.no_open.load(Ordering::Relaxed) {
531            info!("fuse: release is not supported.");
532            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
533        }
534
535        if let Some(hd) = self.handles.lock().await.get(&fh) {
536            let rh = if let Some(ref h) = hd.real_handle {
537                h
538            } else {
539                return Err(
540                    Error::other(format!("no real handle found for file handle {fh}")).into(),
541                );
542            };
543            let real_handle = rh.handle.load(Ordering::Relaxed);
544            let real_inode = rh.inode;
545            rh.layer
546                .release(req, real_inode, real_handle, flags, lock_owner, flush)
547                .await?;
548        }
549
550        self.handles.lock().await.remove(&fh);
551
552        Ok(())
553    }
554
555    /// synchronize file contents. If the `datasync` is true, then only the user data should be
556    /// flushed, not the metadata.
557    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
558        self.do_fsync(req, inode, datasync, fh, false)
559            .await
560            .map_err(|e| e.into())
561    }
562
563    /// set an extended attribute.
564    async fn setxattr(
565        &self,
566        req: Request,
567        inode: Inode,
568        name: &OsStr,
569        value: &[u8],
570        flags: u32,
571        position: u32,
572    ) -> Result<()> {
573        let node = self.lookup_node(req, inode, "").await?;
574
575        if node.whiteout.load(Ordering::Relaxed) {
576            return Err(Error::from_raw_os_error(libc::ENOENT).into());
577        }
578
579        if !node.in_upper_layer().await {
580            // Copy node up.
581            self.copy_node_up(req, node.clone()).await?;
582        }
583
584        let (layer, _, real_inode) = node.first_layer_inode().await;
585
586        layer
587            .setxattr(req, real_inode, name, value, flags, position)
588            .await
589    }
590
591    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
592    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
593    /// return an error.
594    async fn getxattr(
595        &self,
596        req: Request,
597        inode: Inode,
598        name: &OsStr,
599        size: u32,
600    ) -> Result<ReplyXAttr> {
601        let node = self.lookup_node(req, inode, "").await?;
602
603        if node.whiteout.load(Ordering::Relaxed) {
604            return Err(Error::from_raw_os_error(libc::ENOENT).into());
605        }
606
607        let (layer, real_inode) = self.find_real_inode(inode).await?;
608
609        layer.getxattr(req, real_inode, name, size).await
610    }
611
612    /// List extended attribute names.
613    ///
614    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
615    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
616    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
617        let node = self.lookup_node(req, inode, "").await?;
618        if node.whiteout.load(Ordering::Relaxed) {
619            return Err(Error::from_raw_os_error(libc::ENOENT).into());
620        }
621        let (layer, real_inode) = self.find_real_inode(inode).await?;
622        layer.listxattr(req, real_inode, size).await
623    }
624
625    /// remove an extended attribute.
626    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
627        let node = self.lookup_node(req, inode, "").await?;
628
629        if node.whiteout.load(Ordering::Relaxed) {
630            return Err(Error::from_raw_os_error(libc::ENOENT).into());
631        }
632
633        if !node.in_upper_layer().await {
634            // copy node into upper layer
635            self.copy_node_up(req, node.clone()).await?;
636        }
637
638        let (layer, _, ino) = node.first_layer_inode().await;
639        layer.removexattr(req, ino, name).await
640
641        // TODO: recreate the node since removexattr may remove the opaque xattr.
642    }
643
644    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
645    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
646    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
647    /// will be called at all. `fh` will contain the value set by the open method, or will be
648    /// undefined if the open method didn't set any value.
649    ///
650    /// # Notes:
651    ///
652    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
653    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
654    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
655    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
656    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
657        if self.no_open.load(Ordering::Relaxed) {
658            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
659        }
660
661        let node = self.lookup_node(req, inode, "").await;
662        match node {
663            Ok(n) => {
664                if n.whiteout.load(Ordering::Relaxed) {
665                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
666                }
667            }
668            Err(e) => {
669                if e.raw_os_error() == Some(libc::ENOENT) {
670                    trace!("flush: inode {inode} is stale");
671                } else {
672                    return Err(e.into());
673                }
674            }
675        }
676
677        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
678
679        // FIXME: need to test if inode matches corresponding handle?
680        if inode
681            != self
682                .handles
683                .lock()
684                .await
685                .get(&fh)
686                .map(|h| h.node.inode)
687                .unwrap_or(0)
688        {
689            return Err(Error::other("inode does not match handle").into());
690        }
691
692        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
693        layer.flush(req, real_inode, real_handle, lock_owner).await
694    }
695
696    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
697    /// `fh`, and use this in other all other directory stream operations
698    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
699    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
700    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
701    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
702    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
703    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
704        if self.no_opendir.load(Ordering::Relaxed) {
705            info!("fuse: opendir is not supported.");
706            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
707        }
708
709        // lookup node
710        let node = self.lookup_node(req, inode, ".").await?;
711
712        if node.whiteout.load(Ordering::Relaxed) {
713            return Err(Error::from_raw_os_error(libc::ENOENT).into());
714        }
715
716        let st = node.stat64(req).await?;
717        if !utils::is_dir(&st.attr.kind) {
718            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
719        }
720
721        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
722        // Get the layer information and open directory in the underlying layer
723        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
724        let reply = layer.opendir(req, real_inode, flags).await?;
725
726        self.handles.lock().await.insert(
727            handle,
728            Arc::new(HandleData {
729                node: Arc::clone(&node),
730                real_handle: Some(RealHandle {
731                    layer,
732                    in_upper_layer,
733                    inode: real_inode,
734                    handle: AtomicU64::new(reply.fh),
735                }),
736                dir_snapshot: Mutex::new(None),
737            }),
738        );
739
740        Ok(ReplyOpen { fh: handle, flags })
741    }
742
743    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
744    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
745    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
746    async fn readdir<'a>(
747        &'a self,
748        req: Request,
749        parent: Inode,
750        fh: u64,
751        offset: i64,
752    ) -> Result<
753        ReplyDirectory<
754            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
755        >,
756    > {
757        if self.config.no_readdir {
758            info!("fuse: readdir is not supported.");
759            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
760        }
761        let entries = self
762            .do_readdir(req, parent, fh, offset.try_into().unwrap())
763            .await?;
764        Ok(ReplyDirectory { entries })
765    }
766
767    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
768    /// + [`lookup`][Filesystem::lookup] at the same time.
769    async fn readdirplus<'a>(
770        &'a self,
771        req: Request,
772        parent: Inode,
773        fh: u64,
774        offset: u64,
775        _lock_owner: u64,
776    ) -> Result<
777        ReplyDirectoryPlus<
778            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
779        >,
780    > {
781        if self.config.no_readdir {
782            info!("fuse: readdir is not supported.");
783            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
784        }
785        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
786        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
787        match self.handles.lock().await.get(&fh) {
788            Some(h) => {
789                trace!(
790                    "after readdirplus: found handle, seeing real_handle: {}",
791                    h.real_handle.is_some()
792                );
793            }
794            None => trace!("after readdirplus: no handle found: {fh}"),
795        }
796        Ok(ReplyDirectoryPlus { entries })
797    }
798    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
799    /// be exactly one `releasedir` call. `fh` will contain the value set by the
800    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
801    /// [`opendir`][Filesystem::opendir] method didn't set any value.
802    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
803        if self.no_opendir.load(Ordering::Relaxed) {
804            info!("fuse: releasedir is not supported.");
805            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
806        }
807
808        if let Some(hd) = self.handles.lock().await.get(&fh) {
809            let rh = if let Some(ref h) = hd.real_handle {
810                h
811            } else {
812                return Err(
813                    Error::other(format!("no real handle found for file handle {fh}")).into(),
814                );
815            };
816            let real_handle = rh.handle.load(Ordering::Relaxed);
817            let real_inode = rh.inode;
818            rh.layer
819                .releasedir(req, real_inode, real_handle, flags)
820                .await?;
821        }
822
823        self.handles.lock().await.remove(&fh);
824        Ok(())
825    }
826
827    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
828    /// should be flushed, not the metadata. `fh` will contain the value set by the
829    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
830    /// [`opendir`][Filesystem::opendir] method didn't set any value.
831    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
832        self.do_fsync(req, inode, datasync, fh, true)
833            .await
834            .map_err(|e| e.into())
835    }
836    /// check file access permissions. This will be called for the `access()` system call. If the
837    /// `default_permissions` mount option is given, this method is not be called. This method is
838    /// not called under Linux kernel versions 2.4.x.
839    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
840        let node = self.lookup_node(req, inode, "").await?;
841
842        if node.whiteout.load(Ordering::Relaxed) {
843            return Err(Error::from_raw_os_error(libc::ENOENT).into());
844        }
845
846        let (layer, real_inode) = self.find_real_inode(inode).await?;
847        layer.access(req, real_inode, mask).await
848    }
849
850    /// create and open a file. If the file does not exist, first create it with the specified
851    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
852    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
853    /// this in other all other file operations ([`read`][Filesystem::read],
854    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
855    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
856    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
857    /// opened. If this method is not implemented or under Linux kernel versions earlier than
858    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
859    /// called instead.
860    ///
861    /// # Notes:
862    ///
863    /// See `fuse_file_info` structure in
864    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
865    /// more details.
866    async fn create(
867        &self,
868        req: Request,
869        parent: Inode,
870        name: &OsStr,
871        mode: u32,
872        flags: u32,
873    ) -> Result<ReplyCreated> {
874        // Parent doesn't exist.
875        let pnode = self.lookup_node(req, parent, "").await?;
876        if pnode.whiteout.load(Ordering::Relaxed) {
877            return Err(Error::from_raw_os_error(libc::ENOENT).into());
878        }
879
880        let mut flags: i32 = flags as i32;
881        flags |= libc::O_NOFOLLOW;
882        flags &= !libc::O_DIRECT;
883        if self.config.writeback {
884            if flags & libc::O_ACCMODE == libc::O_WRONLY {
885                flags &= !libc::O_ACCMODE;
886                flags |= libc::O_RDWR;
887            }
888
889            if flags & libc::O_APPEND != 0 {
890                flags &= !libc::O_APPEND;
891            }
892        }
893
894        let final_handle = self
895            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
896            .await?;
897        let entry = self.do_lookup(req, parent, name.to_str().unwrap()).await?;
898        let fh = final_handle
899            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
900
901        let mut opts = OpenOptions::empty();
902        match self.config.cache_policy {
903            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
904            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
905            _ => {}
906        }
907
908        Ok(ReplyCreated {
909            ttl: entry.ttl,
910            attr: entry.attr,
911            generation: entry.generation,
912            fh,
913            flags: opts.bits(),
914        })
915    }
916
917    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
918    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
919        for inode in inodes {
920            self.forget_one(inode.0, inode.1).await;
921        }
922    }
923
924    /// allocate space for an open file. This function ensures that required space is allocated for
925    /// specified file.
926    ///
927    /// # Notes:
928    ///
929    /// more information about `fallocate`, please see **`man 2 fallocate`**
930    async fn fallocate(
931        &self,
932        req: Request,
933        inode: Inode,
934        fh: u64,
935        offset: u64,
936        length: u64,
937        mode: u32,
938    ) -> Result<()> {
939        // Use O_RDONLY flags which indicates no copy up.
940        let data = self
941            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
942            .await?;
943
944        match data.real_handle {
945            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
946            Some(ref rhd) => {
947                if !rhd.in_upper_layer {
948                    // TODO: in lower layer, error out or just success?
949                    return Err(Error::from_raw_os_error(libc::EROFS).into());
950                }
951                rhd.layer
952                    .fallocate(
953                        req,
954                        rhd.inode,
955                        rhd.handle.load(Ordering::Relaxed),
956                        offset,
957                        length,
958                        mode,
959                    )
960                    .await
961            }
962        }
963    }
964
965    /// find next data or hole after the specified offset.
966    async fn lseek(
967        &self,
968        req: Request,
969        inode: Inode,
970        fh: u64,
971        offset: u64,
972        whence: u32,
973    ) -> Result<ReplyLSeek> {
974        let node = self.lookup_node(req, inode, "").await?;
975
976        if node.whiteout.load(Ordering::Relaxed) {
977            return Err(Error::from_raw_os_error(libc::ENOENT).into());
978        }
979
980        let st = node.stat64(req).await?;
981        if utils::is_dir(&st.attr.kind) {
982            // Special handling and security restrictions for directory operations.
983            // Use the common API to obtain the underlying layer and handle info.
984            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
985
986            // Verify that the underlying handle refers to a directory.
987            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
988                Ok(s) => s,
989                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
990            };
991
992            if !utils::is_dir(&handle_stat.attr.kind) {
993                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
994            }
995
996            // Handle directory lseek operations according to POSIX standard
997            // This enables seekdir/telldir functionality on directories
998            match whence {
999                // SEEK_SET: Set the directory position to an absolute value
1000                x if x == libc::SEEK_SET as u32 => {
1001                    // Validate offset bounds to prevent overflow
1002                    // Directory offsets should not exceed i64::MAX
1003                    if offset > i64::MAX as u64 {
1004                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
1005                    }
1006
1007                    // Perform the seek operation on the underlying layer
1008                    // Delegate to the lower layer implementation
1009                    layer
1010                        .lseek(req, real_inode, real_handle, offset, whence)
1011                        .await
1012                }
1013                // SEEK_CUR: Move relative to the current directory position
1014                x if x == libc::SEEK_CUR as u32 => {
1015                    // Get current position from underlying layer
1016                    // This is needed to calculate the new position
1017                    let current = match layer
1018                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
1019                        .await
1020                    {
1021                        Ok(r) => r.offset,
1022                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
1023                    };
1024
1025                    // Check for potential overflow when adding the provided offset
1026                    // This prevents invalid position calculations
1027                    if let Some(new_offset) = current.checked_add(offset) {
1028                        // Ensure the new offset is within valid bounds
1029                        if new_offset > i64::MAX as u64 {
1030                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
1031                        }
1032
1033                        // Actually set the underlying offset to the new value so behavior
1034                        // matches passthrough which uses libc::lseek64 to set the fd offset.
1035                        match layer
1036                            .lseek(
1037                                req,
1038                                real_inode,
1039                                real_handle,
1040                                new_offset,
1041                                libc::SEEK_SET as u32,
1042                            )
1043                            .await
1044                        {
1045                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
1046                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1047                        }
1048                    } else {
1049                        Err(Error::from_raw_os_error(libc::EINVAL).into())
1050                    }
1051                }
1052                // Any other whence value is invalid for directories
1053                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1054            }
1055        } else {
1056            // Keep the original lseek behavior for regular files
1057            // Delegate directly to the underlying layer
1058            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1059            layer
1060                .lseek(req, real_inode, real_handle, offset, whence)
1061                .await
1062        }
1063    }
1064
1065    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
1066        Ok(())
1067    }
1068}
1069#[cfg(test)]
1070mod tests {
1071    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1072
1073    use rfuse3::{MountOptions, raw::Session};
1074    use tokio::signal;
1075    use tracing_subscriber::EnvFilter;
1076
1077    use crate::{
1078        overlayfs::{OverlayFs, config::Config},
1079        passthrough::{PassthroughArgs, new_passthroughfs_layer, newlogfs::LoggingFileSystem},
1080    };
1081
1082    #[tokio::test]
1083    #[ignore]
1084    async fn test_a_ovlfs() {
1085        let _ = tracing_subscriber::fmt()
1086            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1087            .try_init();
1088
1089        // Set up test environment
1090        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1091        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1092        let upperdir = PathBuf::from("/home/luxian/upper");
1093
1094        // Create lower layers
1095        let mut lower_layers = Vec::new();
1096        for lower in &lowerdir {
1097            let layer = new_passthroughfs_layer(PassthroughArgs {
1098                root_dir: lower.clone(),
1099                mapping: None::<&str>,
1100            })
1101            .await
1102            .unwrap();
1103            lower_layers.push(Arc::new(layer));
1104        }
1105        // Create upper layer
1106        let upper_layer = Arc::new(
1107            new_passthroughfs_layer(PassthroughArgs {
1108                root_dir: upperdir,
1109                mapping: None::<&str>,
1110            })
1111            .await
1112            .unwrap(),
1113        );
1114        // Create overlayfs
1115        let config = Config {
1116            mountpoint: mountpoint.clone(),
1117            do_import: true,
1118            ..Default::default()
1119        };
1120
1121        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1122
1123        let logfs = LoggingFileSystem::new(overlayfs);
1124
1125        let mount_path: OsString = OsString::from(mountpoint);
1126
1127        let uid = unsafe { libc::getuid() };
1128        let gid = unsafe { libc::getgid() };
1129
1130        let not_unprivileged = false;
1131
1132        let mut mount_options = MountOptions::default();
1133        // .allow_other(true)
1134        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
1135
1136        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1137            Session::new(mount_options)
1138                .mount_with_unprivileged(logfs, mount_path)
1139                .await
1140                .unwrap()
1141        } else {
1142            Session::new(mount_options)
1143                .mount(logfs, mount_path)
1144                .await
1145                .unwrap()
1146        };
1147
1148        let handle = &mut mount_handle;
1149
1150        tokio::select! {
1151            res = handle => res.unwrap(),
1152            _ = signal::ctrl_c() => {
1153                mount_handle.unmount().await.unwrap()
1154            }
1155        }
1156    }
1157}