Skip to main content

libfuse_fs/passthrough/
util.rs

1#![allow(clippy::unnecessary_cast)]
2#![allow(clippy::useless_conversion)]
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE-BSD-3-Clause file.
5// Copyright (C) 2023 Alibaba Cloud. All rights reserved.
6
7use std::collections::{BTreeMap, btree_map};
8use std::ffi::{CStr, CString, OsStr};
9use std::fs::File;
10use std::io;
11use std::mem::MaybeUninit;
12use std::os::unix::ffi::OsStrExt;
13use std::os::unix::io::{AsRawFd, FromRawFd};
14use std::sync::Mutex;
15use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
16
17use rfuse3::{FileType, Timestamp, raw::reply::FileAttr};
18use tracing::error;
19
20#[cfg(target_os = "macos")]
21#[allow(non_camel_case_types)]
22pub type stat64 = libc::stat;
23
24#[cfg(target_os = "macos")]
25pub const AT_EMPTY_PATH: i32 = 0;
26
27#[cfg(target_os = "linux")]
28pub use libc::{AT_EMPTY_PATH, stat64};
29
30use super::inode_store::InodeId;
31use super::{CURRENT_DIR_CSTR, EMPTY_CSTR, MAX_HOST_INO, PARENT_DIR_CSTR};
32
33/// the 56th bit used to set the inode to 1 indicates virtual inode
34const VIRTUAL_INODE_FLAG: u64 = 1 << 55;
35
36/// Used to form a pair of dev and mntid as the key of the map
37#[derive(Clone, Copy, Default, PartialOrd, Ord, PartialEq, Eq, Debug)]
38struct DevMntIDPair(libc::dev_t, u64);
39
40// Used to generate a unique inode with a maximum of 56 bits. the format is
41// |1bit|8bit|47bit
42// when the highest bit is equal to 0, it means the host inode format, and the lower 47 bits normally store no more than 47-bit inode
43// When the highest bit is equal to 1, it indicates the virtual inode format,
44// which is used to store more than 47 bits of inodes
45// the middle 8bit is used to store the unique ID produced by the combination of dev+mntid
46pub struct UniqueInodeGenerator {
47    // Mapping (dev, mnt_id) pair to another small unique id
48    dev_mntid_map: Mutex<BTreeMap<DevMntIDPair, u8>>,
49    next_unique_id: AtomicU8,
50    next_virtual_inode: AtomicU64,
51}
52
53impl Default for UniqueInodeGenerator {
54    fn default() -> Self {
55        Self::new()
56    }
57}
58
59impl UniqueInodeGenerator {
60    pub fn new() -> Self {
61        UniqueInodeGenerator {
62            dev_mntid_map: Mutex::new(Default::default()),
63            next_unique_id: AtomicU8::new(1),
64            next_virtual_inode: AtomicU64::new(1),
65        }
66    }
67
68    #[cfg(target_os = "linux")]
69    pub fn get_unique_inode(&self, id: &InodeId) -> io::Result<libc::ino64_t> {
70        self.get_unique_inode_impl(id)
71    }
72    #[cfg(target_os = "macos")]
73    pub fn get_unique_inode(&self, id: &InodeId) -> io::Result<libc::ino_t> {
74        self.get_unique_inode_impl(id)
75    }
76    fn get_unique_inode_impl(&self, id: &InodeId) -> io::Result<u64> {
77        let unique_id = {
78            let id: DevMntIDPair = DevMntIDPair(id.dev, id.mnt);
79            let mut id_map_guard = self.dev_mntid_map.lock().unwrap();
80            match id_map_guard.entry(id) {
81                btree_map::Entry::Occupied(v) => *v.get(),
82                btree_map::Entry::Vacant(v) => {
83                    if self.next_unique_id.load(Ordering::Relaxed) == u8::MAX {
84                        return Err(io::Error::other(
85                            "the number of combinations of dev and mntid exceeds 255",
86                        ));
87                    }
88                    let next_id = self.next_unique_id.fetch_add(1, Ordering::Relaxed);
89                    v.insert(next_id);
90                    next_id
91                }
92            }
93        };
94
95        let inode = if id.ino <= MAX_HOST_INO {
96            id.ino
97        } else {
98            if self.next_virtual_inode.load(Ordering::Relaxed) > MAX_HOST_INO {
99                return Err(io::Error::other(format!(
100                    "the virtual inode excess {MAX_HOST_INO}"
101                )));
102            }
103            self.next_virtual_inode.fetch_add(1, Ordering::Relaxed) | VIRTUAL_INODE_FLAG
104        };
105
106        Ok(((unique_id as u64) << 47) | inode)
107    }
108
109    #[cfg(test)]
110    fn decode_unique_inode(&self, inode: u64) -> io::Result<InodeId> {
111        use super::VFS_MAX_INO;
112
113        if inode > VFS_MAX_INO {
114            return Err(io::Error::new(
115                io::ErrorKind::InvalidInput,
116                format!("the inode {inode} excess {VFS_MAX_INO}"),
117            ));
118        }
119
120        let dev_mntid = (inode >> 47) as u8;
121        if dev_mntid == u8::MAX {
122            return Err(io::Error::new(
123                io::ErrorKind::InvalidInput,
124                format!("invalid dev and mntid {dev_mntid} excess 255"),
125            ));
126        }
127
128        let mut dev: libc::dev_t = 0;
129        let mut mnt: u64 = 0;
130
131        let mut found = false;
132        let id_map_guard = self.dev_mntid_map.lock().unwrap();
133        for (k, v) in id_map_guard.iter() {
134            if *v == dev_mntid {
135                found = true;
136                dev = k.0;
137                mnt = k.1;
138                break;
139            }
140        }
141
142        if !found {
143            return Err(io::Error::new(
144                io::ErrorKind::InvalidInput,
145                format!("invalid dev and mntid {dev_mntid},there is no record in memory "),
146            ));
147        }
148        Ok(InodeId {
149            ino: inode & MAX_HOST_INO,
150            dev,
151            mnt,
152        })
153    }
154}
155
156/// Safe wrapper around libc::openat().
157pub fn openat(
158    dir_fd: &impl AsRawFd,
159    path: &CStr,
160    flags: libc::c_int,
161    mode: u32,
162) -> io::Result<File> {
163    // Safe because:
164    // - CString::new() has returned success and thus guarantees `path_cstr` is a valid
165    //   NUL-terminated string
166    // - this does not modify any memory
167    // - we check the return value
168    // We do not check `flags` because if the kernel cannot handle poorly specified flags then we
169    // have much bigger problems.
170    let fd = if flags & libc::O_CREAT == libc::O_CREAT {
171        // The mode argument is used only when O_CREAT is specified
172        unsafe { libc::openat(dir_fd.as_raw_fd(), path.as_ptr(), flags, mode) }
173    } else {
174        unsafe { libc::openat(dir_fd.as_raw_fd(), path.as_ptr(), flags) }
175    };
176    if fd >= 0 {
177        // Safe because we just opened this fd
178        Ok(unsafe { File::from_raw_fd(fd) })
179    } else {
180        Err(io::Error::last_os_error())
181    }
182}
183
184/// Return a C-string path that, when opened, refers to the same underlying file as `fd`.
185///
186/// On Linux this is `/proc/self/fd/{fd}` — works for any fd including `O_PATH` ones.
187/// On macOS this resolves the absolute path via `fcntl(F_GETPATH)`. The returned path
188/// is suitable for passing to path-based syscalls like `setxattr`/`getxattr` which
189/// don't have well-behaved `f*` variants on every platform.
190pub fn fd_path_cstr(fd: std::os::unix::io::RawFd) -> io::Result<CString> {
191    #[cfg(target_os = "linux")]
192    {
193        CString::new(format!("/proc/self/fd/{fd}"))
194            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
195    }
196    #[cfg(target_os = "macos")]
197    {
198        let mut buf = [0u8; libc::MAXPATHLEN as usize];
199        let res = unsafe { libc::fcntl(fd, libc::F_GETPATH, buf.as_mut_ptr()) };
200        if res < 0 {
201            return Err(io::Error::last_os_error());
202        }
203        let path = unsafe { CStr::from_ptr(buf.as_ptr() as *const libc::c_char) };
204        Ok(path.to_owned())
205    }
206}
207
208/// Concatenate a directory C-string path and a single component C-string into
209/// a NUL-terminated absolute path. Used by macOS callers (e.g. `renamex_np`)
210/// that don't have a dirfd-relative variant.
211pub fn join_dir_and_name(dir: &CStr, name: &CStr) -> io::Result<CString> {
212    let dir_bytes = dir.to_bytes();
213    let name_bytes = name.to_bytes();
214    let mut out = Vec::with_capacity(dir_bytes.len() + 1 + name_bytes.len() + 1);
215    out.extend_from_slice(dir_bytes);
216    if !dir_bytes.ends_with(b"/") {
217        out.push(b'/');
218    }
219    out.extend_from_slice(name_bytes);
220    CString::new(out).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
221}
222
223/// Open `/proc/self/fd/{fd}` with the given flags to effectively duplicate the given `fd` with new
224/// flags (e.g. to turn an `O_PATH` file descriptor into one that can be used for I/O).
225pub fn reopen_fd_through_proc(
226    fd: &impl AsRawFd,
227    flags: libc::c_int,
228    proc_self_fd: &impl AsRawFd,
229) -> io::Result<File> {
230    // Clear the `O_NOFOLLOW` flag if it is set since we need to follow the `/proc/self/fd` symlink
231    // to get the file.
232    #[cfg(target_os = "macos")]
233    {
234        let mut buf = [0u8; libc::MAXPATHLEN as usize];
235        let res = unsafe { libc::fcntl(fd.as_raw_fd(), libc::F_GETPATH, buf.as_mut_ptr()) };
236        if res < 0 {
237            return Err(io::Error::last_os_error());
238        }
239        let path = unsafe { CStr::from_ptr(buf.as_ptr() as *const libc::c_char) };
240        let flags = flags & !libc::O_NOFOLLOW & !libc::O_CREAT & !libc::O_DIRECTORY;
241        // On macOS, F_GETPATH returns the absolute path, so openat will ignore the dir_fd.
242        // We use proc_self_fd as a valid FD placeholder.
243        openat(proc_self_fd, path, flags, 0)
244    }
245    #[cfg(target_os = "linux")]
246    {
247        let name = CString::new(format!("{}", fd.as_raw_fd()).as_str())?;
248        let flags = flags & !libc::O_NOFOLLOW & !libc::O_CREAT;
249        openat(proc_self_fd, &name, flags, 0)
250    }
251}
252
253pub fn stat_fd(dir: &impl AsRawFd, path: Option<&CStr>) -> io::Result<stat64> {
254    // Safe because this is a constant value and a valid C string.
255    let pathname =
256        path.unwrap_or_else(|| unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) });
257    let mut stat = MaybeUninit::<stat64>::zeroed();
258    let dir_fd = dir.as_raw_fd();
259    // Safe because the kernel will only write data in `stat` and we check the return value.
260    let res = match () {
261        #[cfg(target_os = "linux")]
262        () => unsafe {
263            libc::fstatat64(
264                dir_fd,
265                pathname.as_ptr(),
266                stat.as_mut_ptr(),
267                libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
268            )
269        },
270        #[cfg(target_os = "macos")]
271        () => unsafe {
272            if pathname.to_bytes().is_empty() {
273                libc::fstat(dir_fd, stat.as_mut_ptr())
274            } else {
275                libc::fstatat(
276                    dir_fd,
277                    pathname.as_ptr(),
278                    stat.as_mut_ptr(),
279                    AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
280                )
281            }
282        },
283    };
284    if res >= 0 {
285        // Safe because the kernel guarantees that the struct is now fully initialized.
286        Ok(unsafe { stat.assume_init() })
287    } else {
288        Err(io::Error::last_os_error())
289    }
290}
291
292/// Returns true if it's safe to open this inode without O_PATH.
293pub fn is_safe_inode(mode: u32) -> bool {
294    // Only regular files and directories are considered safe to be opened from the file
295    // server without O_PATH.
296    let kind = mode & (libc::S_IFMT as u32);
297    kind == (libc::S_IFREG as u32) || kind == (libc::S_IFDIR as u32)
298}
299
300/// Returns true if the mode is for a directory.
301pub fn is_dir(mode: u32) -> bool {
302    (mode & (libc::S_IFMT as u32)) == (libc::S_IFDIR as u32)
303}
304
305pub fn ebadf() -> io::Error {
306    io::Error::from_raw_os_error(libc::EBADF)
307}
308
309pub fn einval() -> io::Error {
310    io::Error::from_raw_os_error(libc::EINVAL)
311}
312
313pub fn enosys() -> io::Error {
314    io::Error::from_raw_os_error(libc::ENOSYS)
315}
316
317/// True if this xattr name belongs to a Linux-only namespace that the macOS
318/// kernel will refuse anyway. Reject these early so callers see ENOTSUP from
319/// us instead of a confusing kernel error after the syscall fails.
320#[cfg(target_os = "macos")]
321pub fn is_linux_only_xattr(name: &CStr) -> bool {
322    let bytes = name.to_bytes();
323    bytes.starts_with(b"security.")
324        || bytes.starts_with(b"trusted.")
325        || bytes.starts_with(b"system.")
326}
327#[allow(unused)]
328pub fn eperm() -> io::Error {
329    io::Error::from_raw_os_error(libc::EPERM)
330}
331#[allow(unused)]
332pub fn convert_stat64_to_file_attr(stat: stat64) -> FileAttr {
333    FileAttr {
334        ino: stat.st_ino,
335        size: stat.st_size as u64,
336        blocks: stat.st_blocks as u64,
337        atime: Timestamp::new(stat.st_atime, stat.st_atime_nsec.try_into().unwrap()),
338        mtime: Timestamp::new(stat.st_mtime, stat.st_mtime_nsec.try_into().unwrap()),
339        ctime: Timestamp::new(stat.st_ctime, stat.st_ctime_nsec.try_into().unwrap()),
340        #[cfg(target_os = "macos")]
341        crtime: Timestamp::new(0, 0), // Set crtime to 0 for non-macOS platforms
342        kind: filetype_from_mode(stat.st_mode.into()),
343        perm: (stat.st_mode & 0o7777) as u16,
344        nlink: stat.st_nlink as u32,
345        uid: stat.st_uid,
346        gid: stat.st_gid,
347        rdev: stat.st_rdev as u32,
348        #[cfg(target_os = "macos")]
349        flags: 0, // Set flags to 0 for non-macOS platforms
350        blksize: stat.st_blksize as u32,
351    }
352}
353
354pub fn filetype_from_mode(st_mode: u32) -> FileType {
355    let st_mode = st_mode & (libc::S_IFMT as u32);
356    if st_mode == (libc::S_IFIFO as u32) {
357        return FileType::NamedPipe;
358    }
359    if st_mode == (libc::S_IFCHR as u32) {
360        return FileType::CharDevice;
361    }
362    if st_mode == (libc::S_IFBLK as u32) {
363        return FileType::BlockDevice;
364    }
365    if st_mode == (libc::S_IFDIR as u32) {
366        return FileType::Directory;
367    }
368    if st_mode == (libc::S_IFREG as u32) {
369        return FileType::RegularFile;
370    }
371    if st_mode == (libc::S_IFLNK as u32) {
372        return FileType::Symlink;
373    }
374    if st_mode == (libc::S_IFSOCK as u32) {
375        return FileType::Socket;
376    }
377    error!("wrong st mode : {st_mode}");
378    unreachable!();
379}
380
381/// Validate a path component. A well behaved FUSE client should never send dot, dotdot and path
382/// components containing slash ('/'). The only exception is that LOOKUP might contain dot and
383/// dotdot to support NFS export.
384#[inline]
385pub fn validate_path_component(name: &CStr) -> io::Result<()> {
386    match is_safe_path_component(name) {
387        true => Ok(()),
388        false => Err(io::Error::from_raw_os_error(libc::EINVAL)),
389    }
390}
391/// ASCII for slash('/')
392pub const SLASH_ASCII: u8 = 47;
393// Is `path` a single path component that is not "." or ".."?
394fn is_safe_path_component(name: &CStr) -> bool {
395    let bytes = name.to_bytes_with_nul();
396
397    if bytes.contains(&SLASH_ASCII) {
398        return false;
399    }
400    !is_dot_or_dotdot(name)
401}
402#[inline]
403fn is_dot_or_dotdot(name: &CStr) -> bool {
404    let bytes = name.to_bytes_with_nul();
405    bytes.starts_with(CURRENT_DIR_CSTR) || bytes.starts_with(PARENT_DIR_CSTR)
406}
407
408pub fn osstr_to_cstr(os_str: &OsStr) -> Result<CString, std::ffi::NulError> {
409    let bytes = os_str.as_bytes();
410    let c_string = CString::new(bytes)?;
411    Ok(c_string)
412}
413
414#[cfg(target_os = "linux")]
415macro_rules! scoped_cred {
416    ($name:ident, $ty:ty, $syscall_nr:expr) => {
417        #[derive(Debug)]
418        pub struct $name;
419
420        impl $name {
421            // Changes the effective uid/gid of the current thread to `val`.  Changes
422            // the thread's credentials back to root when the returned struct is dropped.
423            fn new(val: $ty) -> io::Result<Option<$name>> {
424                if val == 0 {
425                    // Nothing to do since we are already uid 0.
426                    return Ok(None);
427                }
428
429                // We want credential changes to be per-thread because otherwise
430                // we might interfere with operations being carried out on other
431                // threads with different uids/gids.  However, posix requires that
432                // all threads in a process share the same credentials.  To do this
433                // libc uses signals to ensure that when one thread changes its
434                // credentials the other threads do the same thing.
435                //
436                // So instead we invoke the syscall directly in order to get around
437                // this limitation.  Another option is to use the setfsuid and
438                // setfsgid systems calls.   However since those calls have no way to
439                // return an error, it's preferable to do this instead.
440
441                // This call is safe because it doesn't modify any memory and we
442                // check the return value.
443                let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) };
444                if res == 0 {
445                    Ok(Some($name))
446                } else {
447                    Err(io::Error::last_os_error())
448                }
449            }
450        }
451
452        impl Drop for $name {
453            fn drop(&mut self) {
454                let res = unsafe { libc::syscall($syscall_nr, -1, 0, -1) };
455                if res < 0 {
456                    error!(
457                        "fuse: failed to change credentials back to root: {}",
458                        io::Error::last_os_error(),
459                    );
460                }
461            }
462        }
463    };
464}
465#[cfg(target_os = "linux")]
466scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid);
467#[cfg(target_os = "linux")]
468scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid);
469
470// Dummy implementation for macOS (or use setreuid/setregid if needed, but for now stub to compile)
471#[cfg(target_os = "macos")]
472pub struct ScopedUid;
473#[cfg(target_os = "macos")]
474impl ScopedUid {
475    fn new(_: libc::uid_t) -> io::Result<Option<Self>> {
476        Ok(None)
477    }
478}
479#[cfg(target_os = "macos")]
480pub struct ScopedGid;
481#[cfg(target_os = "macos")]
482impl ScopedGid {
483    fn new(_: libc::gid_t) -> io::Result<Option<Self>> {
484        Ok(None)
485    }
486}
487
488pub fn set_creds(
489    uid: libc::uid_t,
490    gid: libc::gid_t,
491) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)> {
492    // We have to change the gid before we change the uid because if we change the uid first then we
493    // lose the capability to change the gid.  However changing back can happen in any order.
494    ScopedGid::new(gid).and_then(|gid| Ok((ScopedUid::new(uid)?, gid)))
495}
496
497/// macOS-only: attempt to APFS-clone `src` into `dst` via `clonefile(2)`.
498///
499/// Returns:
500/// * `Ok(true)` — clone succeeded; on APFS this is an O(1) copy-on-write.
501/// * `Ok(false)` — the underlying filesystem rejects clones (cross-volume,
502///   non-APFS, etc.). Caller falls back to `read`/`write`.
503/// * `Err(_)` — destination exists, source unreadable, or other error
504///   the caller must surface.
505///
506/// `dst` must NOT exist; `clonefile` rejects an existing path with EEXIST.
507/// The caller is expected to ensure exclusive creation. Mode/uid/gid/xattrs
508/// are preserved by the clone (see `clonefile(2)`), so no chmod/chown is
509/// required after a successful return.
510#[cfg(target_os = "macos")]
511pub fn try_apfs_clonefile(src: &CStr, dst: &CStr) -> io::Result<bool> {
512    // 0 = no flags. CLONE_NOFOLLOW is also acceptable but the existing
513    // overlayfs flow only triggers copy-up for regular files, not symlinks.
514    let res = unsafe { libc::clonefile(src.as_ptr(), dst.as_ptr(), 0) };
515    if res == 0 {
516        return Ok(true);
517    }
518    let err = io::Error::last_os_error();
519    match err.raw_os_error() {
520        // ENOTSUP: filesystem doesn't support cloning (e.g. ExFAT, NFS).
521        // EXDEV: cross-volume — clone requires the same APFS volume.
522        Some(libc::ENOTSUP) | Some(libc::EXDEV) => Ok(false),
523        _ => Err(err),
524    }
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    #[test]
532    fn test_is_safe_inode() {
533        let mut mode = (libc::S_IFDIR as u32) | 0o755;
534        assert!(is_safe_inode(mode));
535
536        mode = (libc::S_IFREG as u32) | 0o755;
537        assert!(is_safe_inode(mode));
538
539        mode = (libc::S_IFLNK as u32) | 0o755;
540        assert!(!is_safe_inode(mode));
541
542        mode = (libc::S_IFCHR as u32) | 0o755;
543        assert!(!is_safe_inode(mode));
544
545        mode = (libc::S_IFBLK as u32) | 0o755;
546        assert!(!is_safe_inode(mode));
547
548        mode = (libc::S_IFIFO as u32) | 0o755;
549        assert!(!is_safe_inode(mode));
550
551        mode = (libc::S_IFSOCK as u32) | 0o755;
552        assert!(!is_safe_inode(mode));
553
554        assert_eq!(
555            filetype_from_mode((libc::S_IFIFO as u32) | 0o755),
556            FileType::NamedPipe
557        );
558        assert_eq!(
559            filetype_from_mode((libc::S_IFCHR as u32) | 0o755),
560            FileType::CharDevice
561        );
562        assert_eq!(
563            filetype_from_mode((libc::S_IFBLK as u32) | 0o755),
564            FileType::BlockDevice
565        );
566        assert_eq!(
567            filetype_from_mode((libc::S_IFDIR as u32) | 0o755),
568            FileType::Directory
569        );
570        assert_eq!(
571            filetype_from_mode((libc::S_IFREG as u32) | 0o755),
572            FileType::RegularFile
573        );
574        assert_eq!(
575            filetype_from_mode((libc::S_IFLNK as u32) | 0o755),
576            FileType::Symlink
577        );
578        assert_eq!(
579            filetype_from_mode((libc::S_IFSOCK as u32) | 0o755),
580            FileType::Socket
581        );
582    }
583
584    #[test]
585    fn test_is_dir() {
586        let mode = libc::S_IFREG as u32;
587        assert!(!is_dir(mode));
588
589        let mode = libc::S_IFDIR as u32;
590        assert!(is_dir(mode));
591    }
592
593    #[test]
594    fn test_generate_unique_inode() {
595        // use normal inode format
596        {
597            let generator = UniqueInodeGenerator::new();
598
599            let inode_alt_key = InodeId {
600                ino: 1,
601                dev: 0,
602                mnt: 0,
603            };
604            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
605            // 56 bit = 0
606            // 55~48 bit = 0000 0001
607            // 47~1 bit  = 1
608            assert_eq!(unique_inode, 0x00800000000001);
609            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
610            assert_eq!(expect_inode_alt_key, inode_alt_key);
611
612            let inode_alt_key = InodeId {
613                ino: 1,
614                dev: 0,
615                mnt: 1,
616            };
617            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
618            // 56 bit = 0
619            // 55~48 bit = 0000 0010
620            // 47~1 bit  = 1
621            assert_eq!(unique_inode, 0x01000000000001);
622            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
623            assert_eq!(expect_inode_alt_key, inode_alt_key);
624
625            let inode_alt_key = InodeId {
626                ino: 2,
627                dev: 0,
628                mnt: 1,
629            };
630            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
631            // 56 bit = 0
632            // 55~48 bit = 0000 0010
633            // 47~1 bit  = 2
634            assert_eq!(unique_inode, 0x01000000000002);
635            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
636            assert_eq!(expect_inode_alt_key, inode_alt_key);
637
638            let inode_alt_key = InodeId {
639                ino: MAX_HOST_INO,
640                dev: 0,
641                mnt: 1,
642            };
643            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
644            // 56 bit = 0
645            // 55~48 bit = 0000 0010
646            // 47~1 bit  = 0x7fffffffffff
647            assert_eq!(unique_inode, 0x017fffffffffff);
648            let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap();
649            assert_eq!(expect_inode_alt_key, inode_alt_key);
650        }
651
652        // use virtual inode format
653        {
654            let generator = UniqueInodeGenerator::new();
655            let inode_alt_key = InodeId {
656                ino: MAX_HOST_INO + 1,
657                dev: u64::MAX as libc::dev_t,
658                mnt: u64::MAX,
659            };
660            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
661            // 56 bit = 1
662            // 55~48 bit = 0000 0001
663            // 47~1 bit  = 2 virtual inode start from 2~MAX_HOST_INO
664            assert_eq!(unique_inode, 0x80800000000001);
665
666            let inode_alt_key = InodeId {
667                ino: MAX_HOST_INO + 2,
668                dev: u64::MAX as libc::dev_t,
669                mnt: u64::MAX,
670            };
671            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
672            // 56 bit = 1
673            // 55~48 bit = 0000 0001
674            // 47~1 bit  = 2
675            assert_eq!(unique_inode, 0x80800000000002);
676
677            let inode_alt_key = InodeId {
678                ino: MAX_HOST_INO + 3,
679                dev: u64::MAX as libc::dev_t,
680                mnt: 0,
681            };
682            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
683            // 56 bit = 1
684            // 55~48 bit = 0000 0010
685            // 47~1 bit  = 3
686            assert_eq!(unique_inode, 0x81000000000003);
687
688            let inode_alt_key = InodeId {
689                ino: u64::MAX,
690                dev: u64::MAX as libc::dev_t,
691                mnt: u64::MAX,
692            };
693            let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap();
694            // 56 bit = 1
695            // 55~48 bit = 0000 0001
696            // 47~1 bit  = 4
697            assert_eq!(unique_inode, 0x80800000000004);
698        }
699    }
700
701    #[test]
702    fn test_stat_fd() {
703        let topdir = std::env::current_dir().unwrap();
704        let dir = File::open(&topdir).unwrap();
705        let filename = CString::new("Cargo.toml").unwrap();
706
707        let st1 = stat_fd(&dir, None).unwrap();
708        let st2 = stat_fd(&dir, Some(&filename)).unwrap();
709
710        assert_eq!(st1.st_dev, st2.st_dev);
711        assert_ne!(st1.st_ino, st2.st_ino);
712    }
713
714    /// `clonefile(2)` round-trip on macOS APFS. macOS tempdirs default to
715    /// the APFS root volume, so the call should succeed and produce a
716    /// byte-identical copy.
717    #[cfg(target_os = "macos")]
718    #[test]
719    fn macos_apfs_clone_roundtrip() {
720        let dir = tempfile::tempdir().unwrap();
721        let src = dir.path().join("src.bin");
722        let dst = dir.path().join("dst.bin");
723        // 4 MiB payload with deterministic content. Big enough that a
724        // byte-by-byte copy would take measurable time; small enough not
725        // to stress CI tmpfs quotas.
726        let payload: Vec<u8> = (0..(4 * 1024 * 1024)).map(|i| (i % 251) as u8).collect();
727        std::fs::write(&src, &payload).unwrap();
728        let src_c = CString::new(src.as_os_str().as_bytes()).unwrap();
729        let dst_c = CString::new(dst.as_os_str().as_bytes()).unwrap();
730
731        let cloned = try_apfs_clonefile(&src_c, &dst_c).expect("clone failed");
732        assert!(
733            cloned,
734            "macOS tempdir defaults to APFS — clone should succeed"
735        );
736
737        let read_back = std::fs::read(&dst).unwrap();
738        assert_eq!(
739            read_back, payload,
740            "clone produced different bytes than source"
741        );
742
743        // Cloning into an existing path must surface EEXIST as Err — the
744        // helper is not allowed to silently overwrite.
745        let again = try_apfs_clonefile(&src_c, &dst_c);
746        assert!(
747            matches!(&again, Err(e) if e.raw_os_error() == Some(libc::EEXIST)),
748            "second clone should EEXIST, got {again:?}",
749        );
750    }
751}