dirwalk 1.1.1

Platform-optimized recursive directory walker with metadata
Documentation
use crate::entry::Entry;
use crate::error::Error;
use crate::walk::StorageHint;
use std::ffi::CStr;
use std::os::fd::AsRawFd;
use std::path::Path;

const GETDENTS_BUF_SIZE: usize = 32 * 1024;

/// linux_dirent64 layout from the kernel (variable-length).
/// We parse this manually since libc doesn't expose a struct for it.
#[repr(C)]
struct LinuxDirent64 {
    d_ino: u64,
    d_off: i64,
    d_reclen: u16,
    d_type: u8,
    // d_name follows (null-terminated, variable length)
}

// d_name starts immediately after d_type with no padding (char[] has alignment 1).
// size_of::<LinuxDirent64>() is 24 due to trailing padding — wrong for the kernel ABI.
const DIRENT64_NAME_OFFSET: usize = std::mem::offset_of!(LinuxDirent64, d_type) + 1;

/// Packed directory listing: all filenames stored in a single contiguous buffer.
/// Each entry is recorded as (offset, len, d_type) into `name_buf`.
/// Names are stored with a trailing nul so they can be used as C string pointers directly.
struct DirNames {
    name_buf: Vec<u8>,
    entries: Vec<(usize, u16, u8)>, // (offset into name_buf, length excluding nul, d_type)
}

impl DirNames {
    fn len(&self) -> usize {
        self.entries.len()
    }

    fn is_empty(&self) -> bool {
        self.entries.is_empty()
    }

    /// Pointer to the null-terminated name — usable as a C string for syscalls.
    fn name_ptr(&self, i: usize) -> *const libc::c_char {
        let (off, _, _) = self.entries[i];
        unsafe { self.name_buf.as_ptr().add(off) as *const libc::c_char }
    }

    /// Name bytes without the trailing nul.
    fn name_bytes(&self, i: usize) -> &[u8] {
        let (off, len, _) = self.entries[i];
        &self.name_buf[off..off + len as usize]
    }

    fn d_type(&self, i: usize) -> u8 {
        self.entries[i].2
    }
}

/// Reads all filenames from a directory using raw getdents64 syscall.
/// Names are packed into a single buffer to avoid per-entry heap allocations.
/// Filters out "." and "..".
fn read_dir_names(dirfd: i32) -> Result<DirNames, std::io::Error> {
    let mut buf = vec![0u8; GETDENTS_BUF_SIZE];
    let mut name_buf = Vec::with_capacity(4096);
    let mut entries = Vec::new();

    loop {
        let nread = unsafe {
            libc::syscall(
                libc::SYS_getdents64,
                dirfd,
                buf.as_mut_ptr() as *mut libc::c_void,
                buf.len() as libc::c_int,
            )
        };

        if nread < 0 {
            return Err(std::io::Error::last_os_error());
        }
        if nread == 0 {
            break;
        }

        let mut offset = 0usize;
        while offset < nread as usize {
            let dirent = unsafe { &*(buf.as_ptr().add(offset) as *const LinuxDirent64) };
            if dirent.d_reclen == 0 {
                break;
            }
            let name_ptr = unsafe { buf.as_ptr().add(offset + DIRENT64_NAME_OFFSET) };
            let name = unsafe { CStr::from_ptr(name_ptr as *const libc::c_char) };
            let name_bytes = name.to_bytes();

            if name_bytes != b"." && name_bytes != b".." {
                let buf_offset = name_buf.len();
                name_buf.extend_from_slice(name_bytes);
                name_buf.push(0);
                entries.push((buf_offset, name_bytes.len() as u16, dirent.d_type));
            }

            offset += dirent.d_reclen as usize;
        }
    }

    Ok(DirNames { name_buf, entries })
}

fn build_entry(
    stx: &libc::statx,
    name_bytes: &[u8],
    prefix: &str,
    is_dir: bool,
    is_symlink: bool,
) -> Entry {
    let size = if is_dir || is_symlink {
        0
    } else {
        stx.stx_size
    };
    let modified = stx.stx_mtime.tv_sec;

    let name_str = String::from_utf8_lossy(name_bytes);
    let is_hidden = name_bytes.first() == Some(&b'.');

    let relative_path = super::unix::build_relative_path(prefix, &name_str);

    Entry {
        relative_path,
        depth: 0,
        size,
        is_dir,
        is_symlink,
        is_hidden,
        modified,
    }
}

pub fn scan_dir_platform(
    path: &Path,
    prefix: &str,
    _hint: StorageHint,
) -> Result<Vec<Entry>, Error> {
    match scan_getdents(path, prefix) {
        Ok(entries) => Ok(entries),
        Err(e) => {
            #[cfg(debug_assertions)]
            eprintln!(
                "dirwalk: scan_getdents fallback ({e}), path={}",
                path.display()
            );
            super::unix::scan_dir_platform(path, prefix, _hint)
        }
    }
}

fn scan_getdents(path: &Path, prefix: &str) -> Result<Vec<Entry>, std::io::Error> {
    let dir = std::fs::File::open(path)?;
    let dirfd = dir.as_raw_fd();

    let dir_names = read_dir_names(dirfd)?;
    if dir_names.is_empty() {
        return Ok(Vec::new());
    }

    let mut all_entries = Vec::with_capacity(dir_names.len());

    for i in 0..dir_names.len() {
        let d_type = dir_names.d_type(i);
        // If getdents64 told us the type, skip STATX_TYPE — we already have it.
        let type_known = d_type != libc::DT_UNKNOWN;
        let statx_mask = if type_known {
            libc::STATX_SIZE | libc::STATX_MTIME
        } else {
            libc::STATX_TYPE | libc::STATX_SIZE | libc::STATX_MTIME
        };

        let mut stx: libc::statx = unsafe { std::mem::zeroed() };
        let ret = unsafe {
            libc::statx(
                dirfd,
                dir_names.name_ptr(i),
                libc::AT_SYMLINK_NOFOLLOW,
                statx_mask,
                &mut stx,
            )
        };
        if ret < 0 {
            continue;
        }

        let (is_dir, is_symlink) = if type_known {
            (d_type == libc::DT_DIR, d_type == libc::DT_LNK)
        } else {
            let mode = stx.stx_mode as u32;
            (
                (mode & libc::S_IFMT) == libc::S_IFDIR,
                (mode & libc::S_IFMT) == libc::S_IFLNK,
            )
        };

        all_entries.push(build_entry(
            &stx,
            dir_names.name_bytes(i),
            prefix,
            is_dir,
            is_symlink,
        ));
    }

    Ok(all_entries)
}