dirwalk 1.1.1

Platform-optimized recursive directory walker with metadata
Documentation
use crate::entry::Entry;
use crate::error::Error;
use crate::walk::StorageHint;
use std::os::fd::AsRawFd;
use std::path::Path;

// From <sys/attr.h>
const ATTR_BIT_MAP_COUNT: u16 = 5;
const ATTR_CMN_NAME: u32 = 0x0000_0001;
const ATTR_CMN_OBJTYPE: u32 = 0x0000_0008;
const ATTR_CMN_MODTIME: u32 = 0x0000_0400;
const ATTR_FILE_DATALENGTH: u32 = 0x0000_0001; // same value as ATTR_CMN_NAME — different namespace (fileattr vs commonattr)

// vnode object types from <sys/vnode.h>
const VDIR: u32 = 2;
const VLNK: u32 = 5;

// getattrlistbulk option flags from <sys/attr.h>
const FSOPT_NOFOLLOW: u64 = 0x0000_0008;
const FSOPT_PACK_INVAL_ATTRS: u64 = 0x0000_0040;

const BUF_SIZE: usize = 256 * 1024;

pub fn scan_dir_platform(
    path: &Path,
    prefix: &str,
    _hint: StorageHint,
) -> Result<Vec<Entry>, Error> {
    match scan_getattrlistbulk(path, prefix) {
        Ok(entries) => Ok(entries),
        // Fall back to the generic Unix path on any failure (unsupported FS, network mount, etc.)
        Err(_) => super::unix::scan_dir_platform(path, prefix, _hint),
    }
}

fn scan_getattrlistbulk(path: &Path, prefix: &str) -> Result<Vec<Entry>, std::io::Error> {
    let dir = std::fs::File::open(path)?;
    let fd = dir.as_raw_fd();

    let mut alist = libc::attrlist {
        bitmapcount: ATTR_BIT_MAP_COUNT,
        reserved: 0,
        commonattr: ATTR_CMN_NAME | ATTR_CMN_OBJTYPE | ATTR_CMN_MODTIME,
        volattr: 0,
        dirattr: 0,
        fileattr: ATTR_FILE_DATALENGTH,
        forkattr: 0,
    };

    let mut buf = vec![0u8; BUF_SIZE];
    let mut entries = Vec::with_capacity(32);
    let options = FSOPT_NOFOLLOW | FSOPT_PACK_INVAL_ATTRS;

    loop {
        let count = loop {
            let ret = unsafe {
                libc::getattrlistbulk(
                    fd,
                    &mut alist as *mut libc::attrlist as *mut libc::c_void,
                    buf.as_mut_ptr() as *mut libc::c_void,
                    buf.len() as libc::size_t,
                    options,
                )
            };
            if ret < 0 {
                let e = std::io::Error::last_os_error();
                if e.raw_os_error() == Some(libc::EINTR) {
                    continue;
                }
                return Err(e);
            }
            break ret;
        };
        if count == 0 {
            break;
        }

        let buf_end = unsafe { buf.as_ptr().add(buf.len()) };
        let mut cursor = buf.as_ptr();

        for _ in 0..count as usize {
            // Safety: parse_entry checks all bounds before reads and advances cursor to the next record.
            let entry = unsafe { parse_entry(&mut cursor, buf_end, prefix) };
            if let Some(e) = entry {
                entries.push(e);
            }
        }
    }

    Ok(entries)
}

/// Parse one record from the getattrlistbulk buffer.
///
/// Buffer layout per record (64-bit macOS, FSOPT_PACK_INVAL_ATTRS set):
/// - [0..4]   u32 entry_len (total record size including this field)
/// - [4..12]  attrreference_t for ATTR_CMN_NAME: {i32 attr_dataoffset, u32 attr_length}
///   name string is at ptr_to_attrref + attr_dataoffset (null-terminated, length includes \0)
/// - [12..16] u32 ATTR_CMN_OBJTYPE (VDIR=2, VLNK=5, VREG=1)
/// - [16..32] struct timespec ATTR_CMN_MODTIME: {i64 tv_sec, i64 tv_nsec}
/// - [32..40] i64 ATTR_FILE_DATALENGTH (off_t, 0 for dirs/symlinks)
///
/// All reads use read_unaligned to tolerate records at non-8-byte-aligned offsets
/// (can happen after the first record depending on record sizes).
unsafe fn parse_entry(cursor: &mut *const u8, buf_end: *const u8, prefix: &str) -> Option<Entry> {
    unsafe {
        let start = *cursor;

        // Record length
        if start.add(4) > buf_end {
            return None;
        }
        let entry_len = (start as *const u32).read_unaligned() as usize;
        // Compute entry_end and advance cursor first, before any further validation.
        // If we returned None without advancing, the outer loop would re-parse the same
        // record for all remaining `count` iterations, silently dropping every entry after this one.
        let entry_end = start.add(entry_len.max(4)); // max(4) ensures we always make forward progress
        if entry_end > buf_end {
            *cursor = buf_end; // clamp to buf_end (valid pointer) so subsequent calls return None
            return None;
        }
        *cursor = entry_end;
        // Minimum valid record: 4 (len) + 8 (name ref) + 4 (objtype) + 16 (timespec) + 8 (datalength)
        if entry_len < 40 {
            return None;
        }

        let mut p = start.add(4);

        // ATTR_CMN_NAME: attrreference_t
        let name_dataoffset = (p as *const i32).read_unaligned();
        let name_len = (p.add(4) as *const u32).read_unaligned() as usize;
        let actual_name_len = name_len.saturating_sub(1); // strip null terminator
        // name_dataoffset is relative to p (the attrref field); valid kernel offsets are always >= 8.
        // Check sign before calling .add() — pointer::offset with an out-of-bounds result is UB at
        // the call site, before any subsequent comparison, so the bounds check below cannot save us.
        if name_dataoffset < 0 {
            return None;
        }
        let name_ptr = p.add(name_dataoffset as usize);
        if name_ptr.add(actual_name_len) > entry_end {
            return None;
        }
        let name_bytes = std::slice::from_raw_parts(name_ptr, actual_name_len);
        let name_str = String::from_utf8_lossy(name_bytes);
        p = p.add(8);

        // ATTR_CMN_OBJTYPE: u32
        let obj_type = (p as *const u32).read_unaligned();
        p = p.add(4);

        // ATTR_CMN_MODTIME: struct timespec {i64 tv_sec, i64 tv_nsec}
        let tv_sec = (p as *const i64).read_unaligned();
        p = p.add(16);

        // ATTR_FILE_DATALENGTH: off_t (i64)
        // Present for files; FSOPT_PACK_INVAL_ATTRS packs it as 0 for dirs/symlinks.
        let raw_size = if p.add(8) <= entry_end {
            (p as *const i64).read_unaligned().max(0) as u64
        } else {
            0
        };

        let is_dir = obj_type == VDIR;
        let is_symlink = obj_type == VLNK;
        let size = if is_dir || is_symlink { 0 } else { raw_size };
        let is_hidden = name_str.starts_with('.');

        let relative_path = super::unix::build_relative_path(prefix, &name_str);

        Some(Entry {
            relative_path,
            depth: 0,
            size,
            is_dir,
            is_symlink,
            is_hidden,
            modified: tv_sec,
        })
    }
}