Skip to main content

libfuse_fs/unionfs/
async_io.rs

1use super::utils;
2use super::{CachePolicy, HandleData, Inode, OverlayFs, RealHandle};
3use crate::util::open_options::OpenOptions;
4use rfuse3::raw::prelude::*;
5use rfuse3::*;
6use std::ffi::OsStr;
7use std::io::Error;
8use std::io::ErrorKind;
9use std::num::NonZeroU32;
10use std::sync::Arc;
11use std::sync::atomic::{AtomicU64, Ordering};
12use tokio::sync::Mutex;
13use tracing::info;
14use tracing::trace;
15
16impl Filesystem for OverlayFs {
17    /// initialize filesystem. Called before any other filesystem method.
18    async fn init(&self, _req: Request) -> Result<ReplyInit> {
19        if self.config.do_import {
20            self.import().await?;
21        }
22        #[cfg(target_os = "linux")]
23        {
24            for layer in self.lower_layers.iter() {
25                layer.init(_req).await?;
26            }
27            if let Some(upper) = &self.upper_layer {
28                upper.init(_req).await?;
29            }
30        }
31        if !self.config.do_import || self.config.writeback {
32            self.writeback.store(true, Ordering::Relaxed);
33        }
34        if !self.config.do_import || self.config.no_open {
35            self.no_open.store(true, Ordering::Relaxed);
36        }
37        if !self.config.do_import || self.config.no_opendir {
38            self.no_opendir.store(true, Ordering::Relaxed);
39        }
40        if !self.config.do_import || self.config.killpriv_v2 {
41            self.killpriv_v2.store(true, Ordering::Relaxed);
42        }
43        if self.config.perfile_dax {
44            self.perfile_dax.store(true, Ordering::Relaxed);
45        }
46
47        Ok(ReplyInit {
48            max_write: NonZeroU32::new(128 * 1024).unwrap(),
49        })
50    }
51
52    /// clean up filesystem. Called on filesystem exit which is fuseblk, in normal fuse filesystem,
53    /// kernel may call forget for root. There is some discuss for this
54    /// <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
55    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
56    async fn destroy(&self, _req: Request) {}
57
58    /// look up a directory entry by name and get its attributes.
59    async fn lookup(&self, req: Request, parent: Inode, name: &OsStr) -> Result<ReplyEntry> {
60        let tmp = name.to_string_lossy().to_string();
61        let result = self.do_lookup(req, parent, tmp.as_str()).await;
62        match result {
63            Ok(e) => Ok(e),
64            Err(err) => Err(err.into()),
65        }
66    }
67
68    /// forget an inode. The nlookup parameter indicates the number of lookups previously
69    /// performed on this inode. If the filesystem implements inode lifetimes, it is recommended
70    /// that inodes acquire a single reference on each lookup, and lose nlookup references on each
71    /// forget. The filesystem may ignore forget calls, if the inodes don't need to have a limited
72    /// lifetime. On unmount it is not guaranteed, that all referenced inodes will receive a forget
73    /// message. When filesystem is normal(not fuseblk) and unmounting, kernel may send forget
74    /// request for root and this library will stop session after call forget. There is some
75    /// discussion for this <https://github.com/bazil/fuse/issues/82#issuecomment-88126886>,
76    /// <https://sourceforge.net/p/fuse/mailman/message/31995737/>
77    async fn forget(&self, _req: Request, inode: Inode, nlookup: u64) {
78        self.forget_one(inode, nlookup).await;
79    }
80
81    /// get file attributes. If `fh` is None, means `fh` is not set.
82    async fn getattr(
83        &self,
84        req: Request,
85        inode: Inode,
86        fh: Option<u64>,
87        flags: u32,
88    ) -> Result<ReplyAttr> {
89        if !self.no_open.load(Ordering::Relaxed)
90            && let Some(h) = fh
91        {
92            let handles = self.handles.lock().await;
93            if let Some(hd) = handles.get(&h)
94                && let Some(ref rh) = hd.real_handle
95            {
96                let mut rep: ReplyAttr = rh
97                    .layer
98                    .getattr(req, rh.inode, Some(rh.handle.load(Ordering::Relaxed)), 0)
99                    .await?;
100                rep.attr.ino = inode;
101                return Ok(rep);
102            }
103        }
104
105        let node: Arc<super::OverlayInode> = self.lookup_node(req, inode, "").await?;
106        let (layer, _, lower_inode) = node.first_layer_inode().await;
107        let mut re = layer.getattr(req, lower_inode, None, flags).await?;
108        re.attr.ino = inode;
109        Ok(re)
110    }
111
112    /// set file attributes. If `fh` is None, means `fh` is not set.
113    async fn setattr(
114        &self,
115        req: Request,
116        inode: Inode,
117        fh: Option<u64>,
118        set_attr: SetAttr,
119    ) -> Result<ReplyAttr> {
120        // Check if upper layer exists.
121        self.upper_layer
122            .as_ref()
123            .cloned()
124            .ok_or_else(|| Error::from_raw_os_error(libc::EROFS))?;
125
126        // deal with handle first
127        if !self.no_open.load(Ordering::Relaxed)
128            && let Some(h) = fh
129        {
130            let handles = self.handles.lock().await;
131            if let Some(hd) = handles.get(&h)
132                && let Some(ref rhd) = hd.real_handle
133            {
134                // handle opened in upper layer
135                if rhd.in_upper_layer {
136                    let mut rep = rhd
137                        .layer
138                        .setattr(
139                            req,
140                            rhd.inode,
141                            Some(rhd.handle.load(Ordering::Relaxed)),
142                            set_attr,
143                        )
144                        .await?;
145                    rep.attr.ino = inode;
146                    return Ok(rep);
147                }
148            }
149        }
150
151        let mut node = self.lookup_node(req, inode, "").await?;
152
153        if !node.in_upper_layer().await {
154            node = self.copy_node_up(req, node.clone()).await?
155        }
156
157        let (layer, _, real_inode) = node.first_layer_inode().await;
158        // layer.setattr(req, real_inode, None, set_attr).await
159        let mut rep = layer.setattr(req, real_inode, None, set_attr).await?;
160        rep.attr.ino = inode;
161        Ok(rep)
162    }
163
164    /// read symbolic link.
165    async fn readlink(&self, req: Request, inode: Inode) -> Result<ReplyData> {
166        trace!("READLINK: inode: {inode}\n");
167
168        let node = self.lookup_node(req, inode, "").await?;
169
170        if node.whiteout.load(Ordering::Relaxed) {
171            return Err(Error::from_raw_os_error(libc::ENOENT).into());
172        }
173
174        let (layer, _, inode) = node.first_layer_inode().await;
175        layer.readlink(req, inode).await
176    }
177
178    /// create a symbolic link.
179    async fn symlink(
180        &self,
181        req: Request,
182        parent: Inode,
183        name: &OsStr,
184        link: &OsStr,
185    ) -> Result<ReplyEntry> {
186        // soft link
187        let sname = name.to_string_lossy().into_owned().to_owned();
188        let slinkname = link.to_string_lossy().into_owned().to_owned();
189
190        let pnode = self.lookup_node(req, parent, "").await?;
191        self.do_symlink(req, slinkname.as_str(), &pnode, sname.as_str())
192            .await?;
193
194        self.do_lookup(req, parent, sname.as_str())
195            .await
196            .map_err(|e| e.into())
197    }
198
199    /// create file node. Create a regular file, character device, block device, fifo or socket
200    /// node. When creating file, most cases user only need to implement
201    /// [`create`][Filesystem::create].
202    async fn mknod(
203        &self,
204        req: Request,
205        parent: Inode,
206        name: &OsStr,
207        mode: u32,
208        rdev: u32,
209    ) -> Result<ReplyEntry> {
210        let sname = name.to_string_lossy().to_string();
211
212        // Check if parent exists.
213        let pnode = self.lookup_node(req, parent, "").await?;
214        if pnode.whiteout.load(Ordering::Relaxed) {
215            return Err(Error::from_raw_os_error(libc::ENOENT).into());
216        }
217
218        self.do_mknod(req, &pnode, sname.as_str(), mode, rdev, 0)
219            .await?;
220        self.do_lookup(req, parent, sname.as_str())
221            .await
222            .map_err(|e| e.into())
223    }
224
225    /// create a directory.
226    async fn mkdir(
227        &self,
228        req: Request,
229        parent: Inode,
230        name: &OsStr,
231        mode: u32,
232        umask: u32,
233    ) -> Result<ReplyEntry> {
234        let sname = name.to_string_lossy().to_string();
235
236        // no entry or whiteout
237        let pnode = self.lookup_node(req, parent, "").await?;
238        if pnode.whiteout.load(Ordering::Relaxed) {
239            return Err(Error::from_raw_os_error(libc::ENOENT).into());
240        }
241
242        self.do_mkdir(req, pnode, sname.as_str(), mode, umask)
243            .await?;
244        self.do_lookup(req, parent, sname.as_str())
245            .await
246            .map_err(|e| e.into())
247    }
248
249    /// remove a file.
250    async fn unlink(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
251        self.do_rm(req, parent, name, false)
252            .await
253            .map_err(|e| e.into())
254    }
255
256    /// remove a directory.
257    async fn rmdir(&self, req: Request, parent: Inode, name: &OsStr) -> Result<()> {
258        self.do_rm(req, parent, name, true)
259            .await
260            .map_err(|e| e.into())
261    }
262
263    /// rename a file or directory.
264    async fn rename(
265        &self,
266        req: Request,
267        parent: Inode,
268        name: &OsStr,
269        new_parent: Inode,
270        new_name: &OsStr,
271    ) -> Result<()> {
272        self.do_rename(req, parent, name, new_parent, new_name)
273            .await
274            .map_err(|e| e.into())
275    }
276
277    /// create a hard link.
278    async fn link(
279        &self,
280        req: Request,
281        inode: Inode,
282        new_parent: Inode,
283        new_name: &OsStr,
284    ) -> Result<ReplyEntry> {
285        let node = self.lookup_node(req, inode, "").await?;
286        if node.whiteout.load(Ordering::Relaxed) {
287            return Err(Error::from_raw_os_error(libc::ENOENT).into());
288        }
289
290        let newpnode = self.lookup_node(req, new_parent, "").await?;
291        if newpnode.whiteout.load(Ordering::Relaxed) {
292            return Err(Error::from_raw_os_error(libc::ENOENT).into());
293        }
294        let new_name = new_name.to_str().unwrap();
295        // trace!(
296        //     "LINK: inode: {}, new_parent: {}, trying to do_link: src_inode: {}, newpnode: {}",
297        //     inode, new_parent, node.inode, newpnode.inode
298        // );
299        self.do_link(req, &node, &newpnode, new_name).await?;
300        // trace!("LINK: done, looking up new entry");
301        self.do_lookup(req, new_parent, new_name)
302            .await
303            .map_err(|e| e.into())
304    }
305
306    /// open a file. Open flags (with the exception of `O_CREAT`, `O_EXCL` and `O_NOCTTY`) are
307    /// available in flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in
308    /// fh, and use this in other all other file operations (read, write, flush, release, fsync).
309    /// Filesystem may also implement stateless file I/O and not store anything in fh. There are
310    /// also some flags (`direct_io`, `keep_cache`) which the filesystem may set, to change the way
311    /// the file is opened. A filesystem need not implement this method if it
312    /// sets [`MountOptions::no_open_support`][crate::MountOptions::no_open_support] and if the
313    /// kernel supports `FUSE_NO_OPEN_SUPPORT`.
314    ///
315    /// # Notes:
316    ///
317    /// See `fuse_file_info` structure in
318    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
319    /// more details.
320    async fn open(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
321        if self.no_open.load(Ordering::Relaxed) {
322            info!("fuse: open is not supported.");
323            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
324        }
325
326        let readonly: bool = flags
327            & (libc::O_APPEND | libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR | libc::O_WRONLY)
328                as u32
329            == 0;
330        // toggle flags
331        let mut flags: i32 = flags as i32;
332
333        flags |= libc::O_NOFOLLOW;
334
335        if self.config.writeback {
336            if flags & libc::O_ACCMODE == libc::O_WRONLY {
337                flags &= !libc::O_ACCMODE;
338                flags |= libc::O_RDWR;
339            }
340
341            if flags & libc::O_APPEND != 0 {
342                flags &= !libc::O_APPEND;
343            }
344        }
345        // lookup node
346        let node = self.lookup_node(req, inode, "").await?;
347
348        // whiteout node
349        if node.whiteout.load(Ordering::Relaxed) {
350            return Err(Error::from_raw_os_error(libc::ENOENT).into());
351        }
352
353        if !readonly {
354            // copy up to upper layer
355            self.copy_node_up(req, node.clone()).await?;
356        }
357
358        // assign a handle in overlayfs and open it
359        let (_l, h) = node.open(req, flags as u32, 0).await?;
360
361        let hd = self.next_handle.fetch_add(1, Ordering::Relaxed);
362        let (layer, in_upper_layer, inode) = node.first_layer_inode().await;
363        let handle_data = HandleData {
364            node: node.clone(),
365            real_handle: Some(RealHandle {
366                layer,
367                in_upper_layer,
368                inode,
369                handle: AtomicU64::new(h.fh),
370            }),
371            dir_snapshot: Mutex::new(None),
372        };
373
374        self.handles.lock().await.insert(hd, Arc::new(handle_data));
375
376        let mut opts = OpenOptions::empty();
377        match self.config.cache_policy {
378            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
379            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
380            _ => {}
381        }
382        // trace!("OPEN: returning handle: {hd}");
383
384        Ok(ReplyOpen {
385            fh: hd,
386            flags: opts.bits(),
387        })
388    }
389
390    /// read data. Read should send exactly the number of bytes requested except on EOF or error,
391    /// otherwise the rest of the data will be substituted with zeroes. An exception to this is
392    /// when the file has been opened in `direct_io` mode, in which case the return value of the
393    /// read system call will reflect the return value of this operation. `fh` will contain the
394    /// value set by the open method, or will be undefined if the open method didn't set any value.
395    async fn read(
396        &self,
397        req: Request,
398        inode: Inode,
399        fh: u64,
400        offset: u64,
401        size: u32,
402    ) -> Result<ReplyData> {
403        let data = self.get_data(req, Some(fh), inode, 0).await?;
404
405        match data.real_handle {
406            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
407            Some(ref hd) => {
408                hd.layer
409                    .read(
410                        req,
411                        hd.inode,
412                        hd.handle.load(Ordering::Relaxed),
413                        offset,
414                        size,
415                    )
416                    .await
417            }
418        }
419    }
420
421    /// write data. Write should return exactly the number of bytes requested except on error. An
422    /// exception to this is when the file has been opened in `direct_io` mode, in which case the
423    /// return value of the write system call will reflect the return value of this operation. `fh`
424    /// will contain the value set by the open method, or will be undefined if the open method
425    /// didn't set any value. When `write_flags` contains
426    /// [`FUSE_WRITE_CACHE`](crate::raw::flags::FUSE_WRITE_CACHE), means the write operation is a
427    /// delay write.
428    #[allow(clippy::too_many_arguments)]
429    async fn write(
430        &self,
431        req: Request,
432        inode: Inode,
433        fh: u64,
434        offset: u64,
435        data: &[u8],
436        write_flags: u32,
437        flags: u32,
438    ) -> Result<ReplyWrite> {
439        let handle_data: Arc<HandleData> = self.get_data(req, Some(fh), inode, flags).await?;
440
441        match handle_data.real_handle {
442            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
443            Some(ref hd) => {
444                hd.layer
445                    .write(
446                        req,
447                        hd.inode,
448                        hd.handle.load(Ordering::Relaxed),
449                        offset,
450                        data,
451                        write_flags,
452                        flags,
453                    )
454                    .await
455            }
456        }
457    }
458
459    /// Copy a range of data from one file to another. This can improve performance because it
460    /// reduces data copying: normally, data will be copied from FUSE server to kernel, then to
461    /// user-space, then to kernel, and finally sent back to FUSE server. By implementing this
462    /// method, data will only be copied internally within the FUSE server.
463    #[allow(clippy::too_many_arguments)]
464    async fn copy_file_range(
465        &self,
466        req: Request,
467        inode_in: Inode,
468        fh_in: u64,
469        offset_in: u64,
470        inode_out: Inode,
471        fh_out: u64,
472        offset_out: u64,
473        length: u64,
474        flags: u64,
475    ) -> Result<ReplyCopyFileRange> {
476        // Get handle data for source file
477        let data_in = self.get_data(req, Some(fh_in), inode_in, 0).await?;
478        let handle_in = match data_in.real_handle {
479            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
480            Some(ref hd) => hd,
481        };
482
483        // Get handle data for destination file
484        let data_out = self.get_data(req, Some(fh_out), inode_out, 0).await?;
485        let handle_out = match data_out.real_handle {
486            None => return Err(Error::from_raw_os_error(libc::ENOENT).into()),
487            Some(ref hd) => hd,
488        };
489
490        // Both files must be on the same layer for copy_file_range to work
491        if !Arc::ptr_eq(&handle_in.layer, &handle_out.layer) {
492            // Different layers - return EXDEV to trigger fallback to read/write
493            return Err(Error::from_raw_os_error(libc::EXDEV).into());
494        }
495
496        // Delegate to the underlying PassthroughFs layer
497        handle_in
498            .layer
499            .copy_file_range(
500                req,
501                handle_in.inode,
502                handle_in.handle.load(Ordering::Relaxed),
503                offset_in,
504                handle_out.inode,
505                handle_out.handle.load(Ordering::Relaxed),
506                offset_out,
507                length,
508                flags,
509            )
510            .await
511    }
512
513    /// get filesystem statistics.
514    async fn statfs(&self, req: Request, inode: Inode) -> Result<ReplyStatFs> {
515        self.do_statvfs(req, inode).await.map_err(|e| e.into())
516    }
517
518    /// release an open file. Release is called when there are no more references to an open file:
519    /// all file descriptors are closed and all memory mappings are unmapped. For every open call
520    /// there will be exactly one release call. The filesystem may reply with an error, but error
521    /// values are not returned to `close()` or `munmap()` which triggered the release. `fh` will
522    /// contain the value set by the open method, or will be undefined if the open method didn't
523    /// set any value. `flags` will contain the same flags as for open. `flush` means flush the
524    /// data or not when closing file.
525    async fn release(
526        &self,
527        req: Request,
528        _inode: Inode,
529        fh: u64,
530        flags: u32,
531        lock_owner: u64,
532        flush: bool,
533    ) -> Result<()> {
534        if self.no_open.load(Ordering::Relaxed) {
535            info!("fuse: release is not supported.");
536            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
537        }
538
539        if let Some(hd) = self.handles.lock().await.get(&fh) {
540            let rh = if let Some(ref h) = hd.real_handle {
541                h
542            } else {
543                return Err(
544                    Error::other(format!("no real handle found for file handle {fh}")).into(),
545                );
546            };
547            let real_handle = rh.handle.load(Ordering::Relaxed);
548            let real_inode = rh.inode;
549            rh.layer
550                .release(req, real_inode, real_handle, flags, lock_owner, flush)
551                .await?;
552        }
553
554        self.handles.lock().await.remove(&fh);
555
556        Ok(())
557    }
558
559    /// synchronize file contents. If the `datasync` is true, then only the user data should be
560    /// flushed, not the metadata.
561    async fn fsync(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
562        self.do_fsync(req, inode, datasync, fh, false)
563            .await
564            .map_err(|e| e.into())
565    }
566
567    /// set an extended attribute.
568    async fn setxattr(
569        &self,
570        req: Request,
571        inode: Inode,
572        name: &OsStr,
573        value: &[u8],
574        flags: u32,
575        position: u32,
576    ) -> Result<()> {
577        let node = self.lookup_node(req, inode, "").await?;
578
579        if node.whiteout.load(Ordering::Relaxed) {
580            return Err(Error::from_raw_os_error(libc::ENOENT).into());
581        }
582
583        if !node.in_upper_layer().await {
584            // Copy node up.
585            self.copy_node_up(req, node.clone()).await?;
586        }
587
588        let (layer, _, real_inode) = node.first_layer_inode().await;
589
590        layer
591            .setxattr(req, real_inode, name, value, flags, position)
592            .await
593    }
594
595    /// Get an extended attribute. If `size` is too small, return `Err<ERANGE>`.
596    /// Otherwise, use [`ReplyXAttr::Data`] to send the attribute data, or
597    /// return an error.
598    async fn getxattr(
599        &self,
600        req: Request,
601        inode: Inode,
602        name: &OsStr,
603        size: u32,
604    ) -> Result<ReplyXAttr> {
605        let node = self.lookup_node(req, inode, "").await?;
606
607        if node.whiteout.load(Ordering::Relaxed) {
608            return Err(Error::from_raw_os_error(libc::ENOENT).into());
609        }
610
611        let (layer, real_inode) = self.find_real_inode(inode).await?;
612
613        layer.getxattr(req, real_inode, name, size).await
614    }
615
616    /// List extended attribute names.
617    ///
618    /// If `size` is too small, return `Err<ERANGE>`.  Otherwise, use
619    /// [`ReplyXAttr::Data`] to send the attribute list, or return an error.
620    async fn listxattr(&self, req: Request, inode: Inode, size: u32) -> Result<ReplyXAttr> {
621        let node = self.lookup_node(req, inode, "").await?;
622        if node.whiteout.load(Ordering::Relaxed) {
623            return Err(Error::from_raw_os_error(libc::ENOENT).into());
624        }
625        let (layer, real_inode) = self.find_real_inode(inode).await?;
626        layer.listxattr(req, real_inode, size).await
627    }
628
629    /// remove an extended attribute.
630    async fn removexattr(&self, req: Request, inode: Inode, name: &OsStr) -> Result<()> {
631        let node = self.lookup_node(req, inode, "").await?;
632
633        if node.whiteout.load(Ordering::Relaxed) {
634            return Err(Error::from_raw_os_error(libc::ENOENT).into());
635        }
636
637        if !node.in_upper_layer().await {
638            // copy node into upper layer
639            self.copy_node_up(req, node.clone()).await?;
640        }
641
642        let (layer, _, ino) = node.first_layer_inode().await;
643        layer.removexattr(req, ino, name).await
644
645        // TODO: recreate the node since removexattr may remove the opaque xattr.
646    }
647
648    /// flush method. This is called on each `close()` of the opened file. Since file descriptors
649    /// can be duplicated (`dup`, `dup2`, `fork`), for one open call there may be many flush calls.
650    /// Filesystems shouldn't assume that flush will always be called after some writes, or that if
651    /// will be called at all. `fh` will contain the value set by the open method, or will be
652    /// undefined if the open method didn't set any value.
653    ///
654    /// # Notes:
655    ///
656    /// the name of the method is misleading, since (unlike fsync) the filesystem is not forced to
657    /// flush pending writes. One reason to flush data, is if the filesystem wants to return write
658    /// errors. If the filesystem supports file locking operations ([`setlk`][Filesystem::setlk],
659    /// [`getlk`][Filesystem::getlk]) it should remove all locks belonging to `lock_owner`.
660    async fn flush(&self, req: Request, inode: Inode, fh: u64, lock_owner: u64) -> Result<()> {
661        if self.no_open.load(Ordering::Relaxed) {
662            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
663        }
664
665        let node = self.lookup_node(req, inode, "").await;
666        match node {
667            Ok(n) => {
668                if n.whiteout.load(Ordering::Relaxed) {
669                    return Err(Error::from_raw_os_error(libc::ENOENT).into());
670                }
671            }
672            Err(e) => {
673                if e.raw_os_error() == Some(libc::ENOENT) {
674                    trace!("flush: inode {inode} is stale");
675                } else {
676                    return Err(e.into());
677                }
678            }
679        }
680
681        let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
682
683        // FIXME: need to test if inode matches corresponding handle?
684        if inode
685            != self
686                .handles
687                .lock()
688                .await
689                .get(&fh)
690                .map(|h| h.node.inode)
691                .unwrap_or(0)
692        {
693            return Err(Error::other("inode does not match handle").into());
694        }
695
696        trace!("flushing, real_inode: {real_inode}, real_handle: {real_handle}");
697        layer.flush(req, real_inode, real_handle, lock_owner).await
698    }
699
700    /// open a directory. Filesystem may store an arbitrary file handle (pointer, index, etc) in
701    /// `fh`, and use this in other all other directory stream operations
702    /// ([`readdir`][Filesystem::readdir], [`releasedir`][Filesystem::releasedir],
703    /// [`fsyncdir`][Filesystem::fsyncdir]). Filesystem may also implement stateless directory
704    /// I/O and not store anything in `fh`.  A file system need not implement this method if it
705    /// sets [`MountOptions::no_open_dir_support`][crate::MountOptions::no_open_dir_support] and
706    /// if the kernel supports `FUSE_NO_OPENDIR_SUPPORT`.
707    async fn opendir(&self, req: Request, inode: Inode, flags: u32) -> Result<ReplyOpen> {
708        if self.no_opendir.load(Ordering::Relaxed) {
709            info!("fuse: opendir is not supported.");
710            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
711        }
712
713        // lookup node
714        let node = self.lookup_node(req, inode, ".").await?;
715
716        if node.whiteout.load(Ordering::Relaxed) {
717            return Err(Error::from_raw_os_error(libc::ENOENT).into());
718        }
719
720        let st = node.stat64(req).await?;
721        if !utils::is_dir(&st.attr.kind) {
722            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
723        }
724
725        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
726        // Get the layer information and open directory in the underlying layer
727        let (layer, in_upper_layer, real_inode) = node.first_layer_inode().await;
728        let reply = layer.opendir(req, real_inode, flags).await?;
729
730        self.handles.lock().await.insert(
731            handle,
732            Arc::new(HandleData {
733                node: Arc::clone(&node),
734                real_handle: Some(RealHandle {
735                    layer,
736                    in_upper_layer,
737                    inode: real_inode,
738                    handle: AtomicU64::new(reply.fh),
739                }),
740                dir_snapshot: Mutex::new(None),
741            }),
742        );
743
744        Ok(ReplyOpen { fh: handle, flags })
745    }
746
747    /// read directory. `offset` is used to track the offset of the directory entries. `fh` will
748    /// contain the value set by the [`opendir`][Filesystem::opendir] method, or will be
749    /// undefined if the [`opendir`][Filesystem::opendir] method didn't set any value.
750    async fn readdir<'a>(
751        &'a self,
752        req: Request,
753        parent: Inode,
754        fh: u64,
755        offset: i64,
756    ) -> Result<
757        ReplyDirectory<
758            impl futures_util::stream::Stream<Item = Result<DirectoryEntry>> + Send + 'a,
759        >,
760    > {
761        if self.config.no_readdir {
762            info!("fuse: readdir is not supported.");
763            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
764        }
765        let entries = self
766            .do_readdir(req, parent, fh, offset.try_into().unwrap())
767            .await?;
768        Ok(ReplyDirectory { entries })
769    }
770
771    /// read directory entries, but with their attribute, like [`readdir`][Filesystem::readdir]
772    /// + [`lookup`][Filesystem::lookup] at the same time.
773    async fn readdirplus<'a>(
774        &'a self,
775        req: Request,
776        parent: Inode,
777        fh: u64,
778        offset: u64,
779        _lock_owner: u64,
780    ) -> Result<
781        ReplyDirectoryPlus<
782            impl futures_util::stream::Stream<Item = Result<DirectoryEntryPlus>> + Send + 'a,
783        >,
784    > {
785        if self.config.no_readdir {
786            info!("fuse: readdir is not supported.");
787            return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
788        }
789        trace!("readdirplus: parent: {parent}, fh: {fh}, offset: {offset}");
790        let entries = self.do_readdirplus(req, parent, fh, offset).await?;
791        match self.handles.lock().await.get(&fh) {
792            Some(h) => {
793                trace!(
794                    "after readdirplus: found handle, seeing real_handle: {}",
795                    h.real_handle.is_some()
796                );
797            }
798            None => trace!("after readdirplus: no handle found: {fh}"),
799        }
800        Ok(ReplyDirectoryPlus { entries })
801    }
802    /// release an open directory. For every [`opendir`][Filesystem::opendir] call there will
803    /// be exactly one `releasedir` call. `fh` will contain the value set by the
804    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
805    /// [`opendir`][Filesystem::opendir] method didn't set any value.
806    async fn releasedir(&self, req: Request, _inode: Inode, fh: u64, flags: u32) -> Result<()> {
807        if self.no_opendir.load(Ordering::Relaxed) {
808            info!("fuse: releasedir is not supported.");
809            return Err(Error::from_raw_os_error(libc::ENOSYS).into());
810        }
811
812        if let Some(hd) = self.handles.lock().await.get(&fh) {
813            let rh = if let Some(ref h) = hd.real_handle {
814                h
815            } else {
816                return Err(
817                    Error::other(format!("no real handle found for file handle {fh}")).into(),
818                );
819            };
820            let real_handle = rh.handle.load(Ordering::Relaxed);
821            let real_inode = rh.inode;
822            rh.layer
823                .releasedir(req, real_inode, real_handle, flags)
824                .await?;
825        }
826
827        self.handles.lock().await.remove(&fh);
828        Ok(())
829    }
830
831    /// synchronize directory contents. If the `datasync` is true, then only the directory contents
832    /// should be flushed, not the metadata. `fh` will contain the value set by the
833    /// [`opendir`][Filesystem::opendir] method, or will be undefined if the
834    /// [`opendir`][Filesystem::opendir] method didn't set any value.
835    async fn fsyncdir(&self, req: Request, inode: Inode, fh: u64, datasync: bool) -> Result<()> {
836        self.do_fsync(req, inode, datasync, fh, true)
837            .await
838            .map_err(|e| e.into())
839    }
840
841    #[allow(clippy::too_many_arguments)]
842    async fn getlk(
843        &self,
844        req: Request,
845        _inode: Inode,
846        fh: u64,
847        lock_owner: u64,
848        start: u64,
849        end: u64,
850        r#type: u32,
851        pid: u32,
852    ) -> Result<ReplyLock> {
853        if !self.no_open.load(Ordering::Relaxed) {
854            let handles = self.handles.lock().await;
855            if let Some(hd) = handles.get(&fh)
856                && let Some(ref rh) = hd.real_handle
857            {
858                match rh
859                    .layer
860                    .getlk(
861                        req,
862                        rh.inode,
863                        rh.handle.load(Ordering::Relaxed),
864                        lock_owner,
865                        start,
866                        end,
867                        r#type,
868                        pid,
869                    )
870                    .await
871                {
872                    Ok(reply) => return Ok(reply),
873                    Err(e) => {
874                        // If underlying layer doesn't support locking, fall through to fallback
875                        let errno: i32 = e.into();
876                        if errno != libc::ENOSYS {
877                            return Err(errno.into());
878                        }
879                    }
880                }
881            }
882        }
883
884        // Fallback: report no lock conflict
885        Ok(ReplyLock {
886            start: 0,
887            end: 0,
888            r#type: libc::F_UNLCK as u32,
889            pid: 0,
890        })
891    }
892
893    #[allow(clippy::too_many_arguments)]
894    async fn setlk(
895        &self,
896        req: Request,
897        _inode: Inode,
898        fh: u64,
899        lock_owner: u64,
900        start: u64,
901        end: u64,
902        r#type: u32,
903        pid: u32,
904        block: bool,
905    ) -> Result<()> {
906        if !self.no_open.load(Ordering::Relaxed) {
907            let handles = self.handles.lock().await;
908            if let Some(hd) = handles.get(&fh)
909                && let Some(ref rh) = hd.real_handle
910            {
911                match rh
912                    .layer
913                    .setlk(
914                        req,
915                        rh.inode,
916                        rh.handle.load(Ordering::Relaxed),
917                        lock_owner,
918                        start,
919                        end,
920                        r#type,
921                        pid,
922                        block,
923                    )
924                    .await
925                {
926                    Ok(()) => return Ok(()),
927                    Err(e) => {
928                        // If underlying layer doesn't support locking, fall through to fallback
929                        let errno: i32 = e.into();
930                        if errno != libc::ENOSYS {
931                            return Err(errno.into());
932                        }
933                    }
934                }
935            }
936        }
937
938        // Fallback: silently accept the lock request
939        Ok(())
940    }
941    /// check file access permissions. This will be called for the `access()` system call. If the
942    /// `default_permissions` mount option is given, this method is not be called. This method is
943    /// not called under Linux kernel versions 2.4.x.
944    async fn access(&self, req: Request, inode: Inode, mask: u32) -> Result<()> {
945        let node = self.lookup_node(req, inode, "").await?;
946
947        if node.whiteout.load(Ordering::Relaxed) {
948            return Err(Error::from_raw_os_error(libc::ENOENT).into());
949        }
950
951        let (layer, real_inode) = self.find_real_inode(inode).await?;
952        layer.access(req, real_inode, mask).await
953    }
954
955    /// create and open a file. If the file does not exist, first create it with the specified
956    /// mode, and then open it. Open flags (with the exception of `O_NOCTTY`) are available in
957    /// flags. Filesystem may store an arbitrary file handle (pointer, index, etc) in `fh`, and use
958    /// this in other all other file operations ([`read`][Filesystem::read],
959    /// [`write`][Filesystem::write], [`flush`][Filesystem::flush],
960    /// [`release`][Filesystem::release], [`fsync`][Filesystem::fsync]). There are also some flags
961    /// (`direct_io`, `keep_cache`) which the filesystem may set, to change the way the file is
962    /// opened. If this method is not implemented or under Linux kernel versions earlier than
963    /// 2.6.15, the [`mknod`][Filesystem::mknod] and [`open`][Filesystem::open] methods will be
964    /// called instead.
965    ///
966    /// # Notes:
967    ///
968    /// See `fuse_file_info` structure in
969    /// [fuse_common.h](https://libfuse.github.io/doxygen/include_2fuse__common_8h_source.html) for
970    /// more details.
971    async fn create(
972        &self,
973        req: Request,
974        parent: Inode,
975        name: &OsStr,
976        mode: u32,
977        flags: u32,
978    ) -> Result<ReplyCreated> {
979        // Parent doesn't exist.
980        let pnode = self.lookup_node(req, parent, "").await?;
981        if pnode.whiteout.load(Ordering::Relaxed) {
982            return Err(Error::from_raw_os_error(libc::ENOENT).into());
983        }
984
985        let mut flags: i32 = flags as i32;
986        flags |= libc::O_NOFOLLOW;
987        #[cfg(target_os = "linux")]
988        {
989            flags &= !libc::O_DIRECT;
990        }
991        if self.config.writeback {
992            if flags & libc::O_ACCMODE == libc::O_WRONLY {
993                flags &= !libc::O_ACCMODE;
994                flags |= libc::O_RDWR;
995            }
996
997            if flags & libc::O_APPEND != 0 {
998                flags &= !libc::O_APPEND;
999            }
1000        }
1001
1002        let final_handle = self
1003            .do_create(req, &pnode, name, mode, flags.try_into().unwrap())
1004            .await?;
1005        let entry = self.do_lookup(req, parent, name.to_str().unwrap()).await?;
1006        let fh = final_handle
1007            .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "Handle not found"))?;
1008
1009        let mut opts = OpenOptions::empty();
1010        match self.config.cache_policy {
1011            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
1012            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
1013            _ => {}
1014        }
1015
1016        Ok(ReplyCreated {
1017            ttl: entry.ttl,
1018            attr: entry.attr,
1019            generation: entry.generation,
1020            fh,
1021            flags: opts.bits(),
1022        })
1023    }
1024
1025    /// forget more than one inode. This is a batch version [`forget`][Filesystem::forget]
1026    async fn batch_forget(&self, _req: Request, inodes: &[(Inode, u64)]) {
1027        for inode in inodes {
1028            self.forget_one(inode.0, inode.1).await;
1029        }
1030    }
1031
1032    /// allocate space for an open file. This function ensures that required space is allocated for
1033    /// specified file.
1034    ///
1035    /// # Notes:
1036    ///
1037    /// more information about `fallocate`, please see **`man 2 fallocate`**
1038    async fn fallocate(
1039        &self,
1040        req: Request,
1041        inode: Inode,
1042        fh: u64,
1043        offset: u64,
1044        length: u64,
1045        mode: u32,
1046    ) -> Result<()> {
1047        // Use O_RDONLY flags which indicates no copy up.
1048        let data = self
1049            .get_data(req, Some(fh), inode, libc::O_RDONLY as u32)
1050            .await?;
1051
1052        match data.real_handle {
1053            None => Err(Error::from_raw_os_error(libc::ENOENT).into()),
1054            Some(ref rhd) => {
1055                if !rhd.in_upper_layer {
1056                    // TODO: in lower layer, error out or just success?
1057                    return Err(Error::from_raw_os_error(libc::EROFS).into());
1058                }
1059                rhd.layer
1060                    .fallocate(
1061                        req,
1062                        rhd.inode,
1063                        rhd.handle.load(Ordering::Relaxed),
1064                        offset,
1065                        length,
1066                        mode,
1067                    )
1068                    .await
1069            }
1070        }
1071    }
1072
1073    /// find next data or hole after the specified offset.
1074    async fn lseek(
1075        &self,
1076        req: Request,
1077        inode: Inode,
1078        fh: u64,
1079        offset: u64,
1080        whence: u32,
1081    ) -> Result<ReplyLSeek> {
1082        let node = self.lookup_node(req, inode, "").await?;
1083
1084        if node.whiteout.load(Ordering::Relaxed) {
1085            return Err(Error::from_raw_os_error(libc::ENOENT).into());
1086        }
1087
1088        let st = node.stat64(req).await?;
1089        if utils::is_dir(&st.attr.kind) {
1090            // Special handling and security restrictions for directory operations.
1091            // Use the common API to obtain the underlying layer and handle info.
1092            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1093
1094            // Verify that the underlying handle refers to a directory.
1095            let handle_stat = match layer.getattr(req, real_inode, Some(real_handle), 0).await {
1096                Ok(s) => s,
1097                Err(_) => return Err(Error::from_raw_os_error(libc::EBADF).into()),
1098            };
1099
1100            if !utils::is_dir(&handle_stat.attr.kind) {
1101                return Err(Error::from_raw_os_error(libc::ENOTDIR).into());
1102            }
1103
1104            // Handle directory lseek operations according to POSIX standard
1105            // This enables seekdir/telldir functionality on directories
1106            match whence {
1107                // SEEK_SET: Set the directory position to an absolute value
1108                x if x == libc::SEEK_SET as u32 => {
1109                    // Validate offset bounds to prevent overflow
1110                    // Directory offsets should not exceed i64::MAX
1111                    if offset > i64::MAX as u64 {
1112                        return Err(Error::from_raw_os_error(libc::EINVAL).into());
1113                    }
1114
1115                    // Perform the seek operation on the underlying layer
1116                    // Delegate to the lower layer implementation
1117                    layer
1118                        .lseek(req, real_inode, real_handle, offset, whence)
1119                        .await
1120                }
1121                // SEEK_CUR: Move relative to the current directory position
1122                x if x == libc::SEEK_CUR as u32 => {
1123                    // Get current position from underlying layer
1124                    // This is needed to calculate the new position
1125                    let current = match layer
1126                        .lseek(req, real_inode, real_handle, 0, libc::SEEK_CUR as u32)
1127                        .await
1128                    {
1129                        Ok(r) => r.offset,
1130                        Err(_) => return Err(Error::from_raw_os_error(libc::EINVAL).into()),
1131                    };
1132
1133                    // Check for potential overflow when adding the provided offset
1134                    // This prevents invalid position calculations
1135                    if let Some(new_offset) = current.checked_add(offset) {
1136                        // Ensure the new offset is within valid bounds
1137                        if new_offset > i64::MAX as u64 {
1138                            return Err(Error::from_raw_os_error(libc::EINVAL).into());
1139                        }
1140
1141                        // Actually set the underlying offset to the new value so behavior
1142                        // matches passthrough which uses libc::lseek64 to set the fd offset.
1143                        match layer
1144                            .lseek(
1145                                req,
1146                                real_inode,
1147                                real_handle,
1148                                new_offset,
1149                                libc::SEEK_SET as u32,
1150                            )
1151                            .await
1152                        {
1153                            Ok(_) => Ok(ReplyLSeek { offset: new_offset }),
1154                            Err(_) => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1155                        }
1156                    } else {
1157                        Err(Error::from_raw_os_error(libc::EINVAL).into())
1158                    }
1159                }
1160                // Any other whence value is invalid for directories
1161                _ => Err(Error::from_raw_os_error(libc::EINVAL).into()),
1162            }
1163        } else {
1164            // Keep the original lseek behavior for regular files
1165            // Delegate directly to the underlying layer
1166            let (layer, real_inode, real_handle) = self.find_real_info_from_handle(fh).await?;
1167            layer
1168                .lseek(req, real_inode, real_handle, offset, whence)
1169                .await
1170        }
1171    }
1172
1173    async fn interrupt(&self, _req: Request, _unique: u64) -> Result<()> {
1174        Ok(())
1175    }
1176}
1177
1178#[cfg(test)]
1179mod tests {
1180    use std::{ffi::OsString, path::PathBuf, sync::Arc};
1181
1182    use rfuse3::{MountOptions, raw::Session};
1183    use tokio::signal;
1184    use tracing_subscriber::EnvFilter;
1185
1186    use crate::unionfs::BoxedLayer;
1187    use crate::{
1188        passthrough::{PassthroughArgs, new_passthroughfs_layer},
1189        unionfs::{OverlayFs, config::Config},
1190    };
1191    use rfuse3::raw::logfs::LoggingFileSystem;
1192
1193    #[tokio::test]
1194    #[ignore]
1195    async fn test_a_ovlfs() {
1196        let _ = tracing_subscriber::fmt()
1197            .with_env_filter(EnvFilter::from_default_env().add_directive("trace".parse().unwrap()))
1198            .try_init();
1199
1200        // Set up test environment
1201        let mountpoint = PathBuf::from("/home/luxian/megatest/true_temp");
1202        let lowerdir = vec![PathBuf::from("/home/luxian/github/buck2-rust-third-party")];
1203        let upperdir = PathBuf::from("/home/luxian/upper");
1204
1205        // Create lower layers
1206        let mut lower_layers: Vec<Arc<BoxedLayer>> = Vec::new();
1207        for lower in &lowerdir {
1208            let layer = new_passthroughfs_layer(PassthroughArgs {
1209                root_dir: lower.clone(),
1210                mapping: None::<&str>,
1211            })
1212            .await
1213            .unwrap();
1214            lower_layers.push(Arc::new(layer) as Arc<BoxedLayer>);
1215        }
1216        // Create upper layer
1217        let upper_layer: Arc<BoxedLayer> = Arc::new(
1218            new_passthroughfs_layer(PassthroughArgs {
1219                root_dir: upperdir,
1220                mapping: None::<&str>,
1221            })
1222            .await
1223            .unwrap(),
1224        );
1225        // Create overlayfs
1226        let config = Config {
1227            mountpoint: mountpoint.clone(),
1228            do_import: true,
1229            ..Default::default()
1230        };
1231
1232        let overlayfs = OverlayFs::new(Some(upper_layer), lower_layers, config, 1).unwrap();
1233
1234        let logfs = LoggingFileSystem::new(overlayfs);
1235
1236        let mount_path: OsString = OsString::from(mountpoint);
1237
1238        let uid = unsafe { libc::getuid() };
1239        let gid = unsafe { libc::getgid() };
1240
1241        let not_unprivileged = false;
1242
1243        let mut mount_options = MountOptions::default();
1244        // .allow_other(true)
1245        mount_options.force_readdir_plus(true).uid(uid).gid(gid);
1246
1247        let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
1248            Session::new(mount_options)
1249                .mount_with_unprivileged(logfs, mount_path)
1250                .await
1251                .unwrap()
1252        } else {
1253            Session::new(mount_options)
1254                .mount(logfs, mount_path)
1255                .await
1256                .unwrap()
1257        };
1258
1259        let handle = &mut mount_handle;
1260
1261        tokio::select! {
1262            res = handle => res.unwrap(),
1263            _ = signal::ctrl_c() => {
1264                mount_handle.unmount().await.unwrap()
1265            }
1266        }
1267    }
1268}