Skip to main content

libfuse_fs/passthrough/
util.rs

1#![allow(clippy::unnecessary_cast)]
2#![allow(clippy::useless_conversion)]
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE-BSD-3-Clause file.
5// Copyright (C) 2023 Alibaba Cloud. All rights reserved.
6
7use std::collections::{BTreeMap, btree_map};
8use std::ffi::{CStr, CString, OsStr};
9use std::fs::File;
10use std::io;
11use std::mem::MaybeUninit;
12use std::os::unix::ffi::OsStrExt;
13use std::os::unix::io::{AsRawFd, FromRawFd};
14use std::sync::Mutex;
15use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
16
17use rfuse3::{FileType, Timestamp, raw::reply::FileAttr};
18use tracing::error;
19
20#[cfg(target_os = "macos")]
21#[allow(non_camel_case_types)]
22pub type stat64 = libc::stat;
23
24#[cfg(target_os = "macos")]
25pub const AT_EMPTY_PATH: i32 = 0;
26
27#[cfg(target_os = "linux")]
28pub use libc::{AT_EMPTY_PATH, stat64};
29
30use super::inode_store::InodeId;
31use super::{CURRENT_DIR_CSTR, EMPTY_CSTR, MAX_HOST_INO, PARENT_DIR_CSTR};
32
33/// the 56th bit used to set the inode to 1 indicates virtual inode
34const VIRTUAL_INODE_FLAG: u64 = 1 << 55;
35
36/// Used to form a pair of dev and mntid as the key of the map
37#[derive(Clone, Copy, Default, PartialOrd, Ord, PartialEq, Eq, Debug)]
38struct DevMntIDPair(libc::dev_t, u64);
39
40// Used to generate a unique inode with a maximum of 56 bits. the format is
41// |1bit|8bit|47bit
42// when the highest bit is equal to 0, it means the host inode format, and the lower 47 bits normally store no more than 47-bit inode
43// When the highest bit is equal to 1, it indicates the virtual inode format,
44// which is used to store more than 47 bits of inodes
45// the middle 8bit is used to store the unique ID produced by the combination of dev+mntid
46pub struct UniqueInodeGenerator {
47    // Mapping (dev, mnt_id) pair to another small unique id
48    dev_mntid_map: Mutex<BTreeMap<DevMntIDPair, u8>>,
49    next_unique_id: AtomicU8,
50    next_virtual_inode: AtomicU64,
51}
52
53impl Default for UniqueInodeGenerator {
54    fn default() -> Self {
55        Self::new()
56    }
57}
58
59impl UniqueInodeGenerator {
60    pub fn new() -> Self {
61        UniqueInodeGenerator {
62            dev_mntid_map: Mutex::new(Default::default()),
63            next_unique_id: AtomicU8::new(1),
64            next_virtual_inode: AtomicU64::new(1),
65        }
66    }
67
68    #[cfg(target_os = "linux")]
69    pub fn get_unique_inode(&self, id: &InodeId) -> io::Result<libc::ino64_t> {
70        self.get_unique_inode_impl(id)
71    }
72    #[cfg(target_os = "macos")]
73    pub fn get_unique_inode(&self, id: &InodeId) -> io::Result<libc::ino_t> {
74        self.get_unique_inode_impl(id)
75    }
76    fn get_unique_inode_impl(&self, id: &InodeId) -> io::Result<u64> {
77        let unique_id = {
78            let id: DevMntIDPair = DevMntIDPair(id.dev, id.mnt);
79            let mut id_map_guard = self.dev_mntid_map.lock().unwrap();
80            match id_map_guard.entry(id) {
81                btree_map::Entry::Occupied(v) => *v.get(),
82                btree_map::Entry::Vacant(v) => {
83                    if self.next_unique_id.load(Ordering::Relaxed) == u8::MAX {
84                        return Err(io::Error::other(
85                            "the number of combinations of dev and mntid exceeds 255",
86                        ));
87                    }
88                    let next_id = self.next_unique_id.fetch_add(1, Ordering::Relaxed);
89                    v.insert(next_id);
90                    next_id
91                }
92            }
93        };
94
95        let inode = if id.ino <= MAX_HOST_INO {
96            id.ino
97        } else {
98            if self.next_virtual_inode.load(Ordering::Relaxed) > MAX_HOST_INO {
99                return Err(io::Error::other(format!(
100                    "the virtual inode excess {MAX_HOST_INO}"
101                )));
102            }
103            self.next_virtual_inode.fetch_add(1, Ordering::Relaxed) | VIRTUAL_INODE_FLAG
104        };
105
106        Ok(((unique_id as u64) << 47) | inode)
107    }
108
109    #[cfg(test)]
110    fn decode_unique_inode(&self, inode: u64) -> io::Result<InodeId> {
111        use super::VFS_MAX_INO;
112
113        if inode > VFS_MAX_INO {
114            return Err(io::Error::new(
115                io::ErrorKind::InvalidInput,
116                format!("the inode {inode} excess {VFS_MAX_INO}"),
117            ));
118        }
119
120        let dev_mntid = (inode >> 47) as u8;
121        if dev_mntid == u8::MAX {
122            return Err(io::Error::new(
123                io::ErrorKind::InvalidInput,
124                format!("invalid dev and mntid {dev_mntid} excess 255"),
125            ));
126        }
127
128        let mut dev: libc::dev_t = 0;
129        let mut mnt: u64 = 0;
130
131        let mut found = false;
132        let id_map_guard = self.dev_mntid_map.lock().unwrap();
133        for (k, v) in id_map_guard.iter() {
134            if *v == dev_mntid {
135                found = true;
136                dev = k.0;
137                mnt = k.1;
138                break;
139            }
140        }
141
142        if !found {
143            return Err(io::Error::new(
144                io::ErrorKind::InvalidInput,
145                format!("invalid dev and mntid {dev_mntid},there is no record in memory "),
146            ));
147        }
148        Ok(InodeId {
149            ino: inode & MAX_HOST_INO,
150            dev,
151            mnt,
152        })
153    }
154}
155
156/// Safe wrapper around libc::openat().
157pub fn openat(
158    dir_fd: &impl AsRawFd,
159    path: &CStr,
160    flags: libc::c_int,
161    mode: u32,
162) -> io::Result<File> {
163    // Safe because:
164    // - CString::new() has returned success and thus guarantees `path_cstr` is a valid
165    //   NUL-terminated string
166    // - this does not modify any memory
167    // - we check the return value
168    // We do not check `flags` because if the kernel cannot handle poorly specified flags then we
169    // have much bigger problems.
170    let fd = if flags & libc::O_CREAT == libc::O_CREAT {
171        // The mode argument is used only when O_CREAT is specified
172        unsafe { libc::openat(dir_fd.as_raw_fd(), path.as_ptr(), flags, mode) }
173    } else {
174        unsafe { libc::openat(dir_fd.as_raw_fd(), path.as_ptr(), flags) }
175    };
176    if fd >= 0 {
177        // Safe because we just opened this fd
178        Ok(unsafe { File::from_raw_fd(fd) })
179    } else {
180        Err(io::Error::last_os_error())
181    }
182}
183
184/// Open `/proc/self/fd/{fd}` with the given flags to effectively duplicate the given `fd` with new
185/// flags (e.g. to turn an `O_PATH` file descriptor into one that can be used for I/O).
186pub fn reopen_fd_through_proc(
187    fd: &impl AsRawFd,
188    flags: libc::c_int,
189    proc_self_fd: &impl AsRawFd,
190) -> io::Result<File> {
191    // Clear the `O_NOFOLLOW` flag if it is set since we need to follow the `/proc/self/fd` symlink
192    // to get the file.
193    #[cfg(target_os = "macos")]
194    {
195        let mut buf = [0u8; libc::MAXPATHLEN as usize];
196        let res = unsafe { libc::fcntl(fd.as_raw_fd(), libc::F_GETPATH, buf.as_mut_ptr()) };
197        if res < 0 {
198            return Err(io::Error::last_os_error());
199        }
200        let path = unsafe { CStr::from_ptr(buf.as_ptr() as *const libc::c_char) };
201        let flags = flags & !libc::O_NOFOLLOW & !libc::O_CREAT & !libc::O_DIRECTORY;
202        // On macOS, F_GETPATH returns the absolute path, so openat will ignore the dir_fd.
203        // We use proc_self_fd as a valid FD placeholder.
204        openat(proc_self_fd, path, flags, 0)
205    }
206    #[cfg(target_os = "linux")]
207    {
208        let name = CString::new(format!("{}", fd.as_raw_fd()).as_str())?;
209        let flags = flags & !libc::O_NOFOLLOW & !libc::O_CREAT;
210        openat(proc_self_fd, &name, flags, 0)
211    }
212}
213
214pub fn stat_fd(dir: &impl AsRawFd, path: Option<&CStr>) -> io::Result<stat64> {
215    // Safe because this is a constant value and a valid C string.
216    let pathname =
217        path.unwrap_or_else(|| unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) });
218    let mut stat = MaybeUninit::<stat64>::zeroed();
219    let dir_fd = dir.as_raw_fd();
220    // Safe because the kernel will only write data in `stat` and we check the return value.
221    let res = match () {
222        #[cfg(target_os = "linux")]
223        () => unsafe {
224            libc::fstatat64(
225                dir_fd,
226                pathname.as_ptr(),
227                stat.as_mut_ptr(),
228                libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
229            )
230        },
231        #[cfg(target_os = "macos")]
232        () => unsafe {
233            if pathname.to_bytes().is_empty() {
234                libc::fstat(dir_fd, stat.as_mut_ptr())
235            } else {
236                libc::fstatat(
237                    dir_fd,
238                    pathname.as_ptr(),
239                    stat.as_mut_ptr(),
240                    AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
241                )
242            }
243        },
244    };
245    if res >= 0 {
246        // Safe because the kernel guarantees that the struct is now fully initialized.
247        Ok(unsafe { stat.assume_init() })
248    } else {
249        Err(io::Error::last_os_error())
250    }
251}
252
253/// Returns true if it's safe to open this inode without O_PATH.
254pub fn is_safe_inode(mode: u32) -> bool {
255    // Only regular files and directories are considered safe to be opened from the file
256    // server without O_PATH.
257    let kind = mode & (libc::S_IFMT as u32);
258    kind == (libc::S_IFREG as u32) || kind == (libc::S_IFDIR as u32)
259}
260
261/// Returns true if the mode is for a directory.
262pub fn is_dir(mode: u32) -> bool {
263    (mode & (libc::S_IFMT as u32)) == (libc::S_IFDIR as u32)
264}
265
266pub fn ebadf() -> io::Error {
267    io::Error::from_raw_os_error(libc::EBADF)
268}
269
270pub fn einval() -> io::Error {
271    io::Error::from_raw_os_error(libc::EINVAL)
272}
273
274pub fn enosys() -> io::Error {
275    io::Error::from_raw_os_error(libc::ENOSYS)
276}
277#[allow(unused)]
278pub fn eperm() -> io::Error {
279    io::Error::from_raw_os_error(libc::EPERM)
280}
281#[allow(unused)]
282pub fn convert_stat64_to_file_attr(stat: stat64) -> FileAttr {
283    FileAttr {
284        ino: stat.st_ino,
285        size: stat.st_size as u64,
286        blocks: stat.st_blocks as u64,
287        atime: Timestamp::new(stat.st_atime, stat.st_atime_nsec.try_into().unwrap()),
288        mtime: Timestamp::new(stat.st_mtime, stat.st_mtime_nsec.try_into().unwrap()),
289        ctime: Timestamp::new(stat.st_ctime, stat.st_ctime_nsec.try_into().unwrap()),
290        #[cfg(target_os = "macos")]
291        crtime: Timestamp::new(0, 0), // Set crtime to 0 for non-macOS platforms
292        kind: filetype_from_mode(stat.st_mode.into()),
293        perm: (stat.st_mode & 0o7777) as u16,
294        nlink: stat.st_nlink as u32,
295        uid: stat.st_uid,
296        gid: stat.st_gid,
297        rdev: stat.st_rdev as u32,
298        #[cfg(target_os = "macos")]
299        flags: 0, // Set flags to 0 for non-macOS platforms
300        blksize: stat.st_blksize as u32,
301    }
302}
303
304pub fn filetype_from_mode(st_mode: u32) -> FileType {
305    let st_mode = st_mode & (libc::S_IFMT as u32);
306    if st_mode == (libc::S_IFIFO as u32) {
307        return FileType::NamedPipe;
308    }
309    if st_mode == (libc::S_IFCHR as u32) {
310        return FileType::CharDevice;
311    }
312    if st_mode == (libc::S_IFBLK as u32) {
313        return FileType::BlockDevice;
314    }
315    if st_mode == (libc::S_IFDIR as u32) {
316        return FileType::Directory;
317    }
318    if st_mode == (libc::S_IFREG as u32) {
319        return FileType::RegularFile;
320    }
321    if st_mode == (libc::S_IFLNK as u32) {
322        return FileType::Symlink;
323    }
324    if st_mode == (libc::S_IFSOCK as u32) {
325        return FileType::Socket;
326    }
327    error!("wrong st mode : {st_mode}");
328    unreachable!();
329}
330
331/// Validate a path component. A well behaved FUSE client should never send dot, dotdot and path
332/// components containing slash ('/'). The only exception is that LOOKUP might contain dot and
333/// dotdot to support NFS export.
334#[inline]
335pub fn validate_path_component(name: &CStr) -> io::Result<()> {
336    match is_safe_path_component(name) {
337        true => Ok(()),
338        false => Err(io::Error::from_raw_os_error(libc::EINVAL)),
339    }
340}
341/// ASCII for slash('/')
342pub const SLASH_ASCII: u8 = 47;
343// Is `path` a single path component that is not "." or ".."?
344fn is_safe_path_component(name: &CStr) -> bool {
345    let bytes = name.to_bytes_with_nul();
346
347    if bytes.contains(&SLASH_ASCII) {
348        return false;
349    }
350    !is_dot_or_dotdot(name)
351}
352#[inline]
353fn is_dot_or_dotdot(name: &CStr) -> bool {
354    let bytes = name.to_bytes_with_nul();
355    bytes.starts_with(CURRENT_DIR_CSTR) || bytes.starts_with(PARENT_DIR_CSTR)
356}
357
358pub fn osstr_to_cstr(os_str: &OsStr) -> Result<CString, std::ffi::NulError> {
359    let bytes = os_str.as_bytes();
360    let c_string = CString::new(bytes)?;
361    Ok(c_string)
362}
363
364#[cfg(target_os = "linux")]
365macro_rules! scoped_cred {
366    ($name:ident, $ty:ty, $syscall_nr:expr) => {
367        #[derive(Debug)]
368        pub struct $name;
369
370        impl $name {
371            // Changes the effective uid/gid of the current thread to `val`.  Changes
372            // the thread's credentials back to root when the returned struct is dropped.
373            fn new(val: $ty) -> io::Result<Option<$name>> {
374                if val == 0 {
375                    // Nothing to do since we are already uid 0.
376                    return Ok(None);
377                }
378
379                // We want credential changes to be per-thread because otherwise
380                // we might interfere with operations being carried out on other
381                // threads with different uids/gids.  However, posix requires that
382                // all threads in a process share the same credentials.  To do this
383                // libc uses signals to ensure that when one thread changes its
384                // credentials the other threads do the same thing.
385                //
386                // So instead we invoke the syscall directly in order to get around
387                // this limitation.  Another option is to use the setfsuid and
388                // setfsgid systems calls.   However since those calls have no way to
389                // return an error, it's preferable to do this instead.
390
391                // This call is safe because it doesn't modify any memory and we
392                // check the return value.
393                let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) };
394                if res == 0 {
395                    Ok(Some($name))
396                } else {
397                    Err(io::Error::last_os_error())
398                }
399            }
400        }
401
402        impl Drop for $name {
403            fn drop(&mut self) {
404                let res = unsafe { libc::syscall($syscall_nr, -1, 0, -1) };
405                if res < 0 {
406                    error!(
407                        "fuse: failed to change credentials back to root: {}",
408                        io::Error::last_os_error(),
409                    );
410                }
411            }
412        }
413    };
414}
415#[cfg(target_os = "linux")]
416scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid);
417#[cfg(target_os = "linux")]
418scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid);
419
420// Dummy implementation for macOS (or use setreuid/setregid if needed, but for now stub to compile)
421#[cfg(target_os = "macos")]
422pub struct ScopedUid;
423#[cfg(target_os = "macos")]
424impl ScopedUid {
425    fn new(_: libc::uid_t) -> io::Result<Option<Self>> {
426        Ok(None)
427    }
428}
429#[cfg(target_os = "macos")]
430pub struct ScopedGid;
431#[cfg(target_os = "macos")]
432impl ScopedGid {
433    fn new(_: libc::gid_t) -> io::Result<Option<Self>> {
434        Ok(None)
435    }
436}
437
438pub fn set_creds(
439    uid: libc::uid_t,
440    gid: libc::gid_t,
441) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)> {
442    // We have to change the gid before we change the uid because if we change the uid first then we
443    // lose the capability to change the gid.  However changing back can happen in any order.
444    ScopedGid::new(gid).and_then(|gid| Ok((ScopedUid::new(uid)?, gid)))
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    #[test]
452    fn test_is_safe_inode() {
453        let mut mode = (libc::S_IFDIR as u32) | 0o755;
454        assert!(is_safe_inode(mode));
455
456        mode = (libc::S_IFREG as u32) | 0o755;
457        assert!(is_safe_inode(mode));
458
459        mode = (libc::S_IFLNK as u32) | 0o755;
460        assert!(!is_safe_inode(mode));
461
462        mode = (libc::S_IFCHR as u32) | 0o755;
463        assert!(!is_safe_inode(mode));
464
465        mode = (libc::S_IFBLK as u32) | 0o755;
466        assert!(!is_safe_inode(mode));
467
468        mode = (libc::S_IFIFO as u32) | 0o755;
469        assert!(!is_safe_inode(mode));
470
471        mode = (libc::S_IFSOCK as u32) | 0o755;
472        assert!(!is_safe_inode(mode));
473
474        assert_eq!(
475            filetype_from_mode((libc::S_IFIFO as u32) | 0o755),
476            FileType::NamedPipe
477        );
478        assert_eq!(
479            filetype_from_mode((libc::S_IFCHR as u32) | 0o755),
480            FileType::CharDevice
481        );
482        assert_eq!(
483            filetype_from_mode((libc::S_IFBLK as u32) | 0o755),
484            FileType::BlockDevice
485        );
486        assert_eq!(
487            filetype_from_mode((libc::S_IFDIR as u32) | 0o755),
488            FileType::Directory
489        );
490        assert_eq!(
491            filetype_from_mode((libc::S_IFREG as u32) | 0o755),
492            FileType::RegularFile
493        );
494        assert_eq!(
495            filetype_from_mode((libc::S_IFLNK as u32) | 0o755),
496            FileType::Symlink
497        );
498        assert_eq!(
499            filetype_from_mode((libc::S_IFSOCK as u32) | 0o755),
500            FileType::Socket
501        );
502    }
503
504    #[test]
505    fn test_is_dir() {
506        let mode = libc::S_IFREG as u32;
507        assert!(!is_dir(mode));
508
509        let mode = libc::S_IFDIR as u32;
510        assert!(is_dir(mode));
511    }
512
513    #[test]
514    fn test_generate_unique_inode() {
515        // use normal inode format
516        {
517            let generator = UniqueInodeGenerator::new();
518
519            let inode_alt_key = InodeId {
520                ino: 1,
521                dev: 0,
522                mnt: 0,
523            };
524            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
525            // 56 bit = 0
526            // 55~48 bit = 0000 0001
527            // 47~1 bit  = 1
528            assert_eq!(unique_inode, 0x00800000000001);
529            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
530            assert_eq!(expect_inode_alt_key, inode_alt_key);
531
532            let inode_alt_key = InodeId {
533                ino: 1,
534                dev: 0,
535                mnt: 1,
536            };
537            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
538            // 56 bit = 0
539            // 55~48 bit = 0000 0010
540            // 47~1 bit  = 1
541            assert_eq!(unique_inode, 0x01000000000001);
542            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
543            assert_eq!(expect_inode_alt_key, inode_alt_key);
544
545            let inode_alt_key = InodeId {
546                ino: 2,
547                dev: 0,
548                mnt: 1,
549            };
550            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
551            // 56 bit = 0
552            // 55~48 bit = 0000 0010
553            // 47~1 bit  = 2
554            assert_eq!(unique_inode, 0x01000000000002);
555            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
556            assert_eq!(expect_inode_alt_key, inode_alt_key);
557
558            let inode_alt_key = InodeId {
559                ino: MAX_HOST_INO,
560                dev: 0,
561                mnt: 1,
562            };
563            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
564            // 56 bit = 0
565            // 55~48 bit = 0000 0010
566            // 47~1 bit  = 0x7fffffffffff
567            assert_eq!(unique_inode, 0x017fffffffffff);
568            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
569            assert_eq!(expect_inode_alt_key, inode_alt_key);
570        }
571
572        // use virtual inode format
573        {
574            let generator = UniqueInodeGenerator::new();
575            let inode_alt_key = InodeId {
576                ino: MAX_HOST_INO + 1,
577                dev: u64::MAX as libc::dev_t,
578                mnt: u64::MAX,
579            };
580            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
581            // 56 bit = 1
582            // 55~48 bit = 0000 0001
583            // 47~1 bit  = 2 virtual inode start from 2~MAX_HOST_INO
584            assert_eq!(unique_inode, 0x80800000000001);
585
586            let inode_alt_key = InodeId {
587                ino: MAX_HOST_INO + 2,
588                dev: u64::MAX as libc::dev_t,
589                mnt: u64::MAX,
590            };
591            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
592            // 56 bit = 1
593            // 55~48 bit = 0000 0001
594            // 47~1 bit  = 2
595            assert_eq!(unique_inode, 0x80800000000002);
596
597            let inode_alt_key = InodeId {
598                ino: MAX_HOST_INO + 3,
599                dev: u64::MAX as libc::dev_t,
600                mnt: 0,
601            };
602            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
603            // 56 bit = 1
604            // 55~48 bit = 0000 0010
605            // 47~1 bit  = 3
606            assert_eq!(unique_inode, 0x81000000000003);
607
608            let inode_alt_key = InodeId {
609                ino: u64::MAX,
610                dev: u64::MAX as libc::dev_t,
611                mnt: u64::MAX,
612            };
613            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
614            // 56 bit = 1
615            // 55~48 bit = 0000 0001
616            // 47~1 bit  = 4
617            assert_eq!(unique_inode, 0x80800000000004);
618        }
619    }
620
621    #[test]
622    fn test_stat_fd() {
623        let topdir = std::env::current_dir().unwrap();
624        let dir = File::open(&topdir).unwrap();
625        let filename = CString::new("Cargo.toml").unwrap();
626
627        let st1 = stat_fd(&dir, None).unwrap();
628        let st2 = stat_fd(&dir, Some(&filename)).unwrap();
629
630        assert_eq!(st1.st_dev, st2.st_dev);
631        assert_ne!(st1.st_ino, st2.st_ino);
632    }
633}