Skip to main content

socket_patch_core/utils/
fs.rs

1//! Filesystem helpers shared by the ecosystem crawlers.
2//!
3//! Each crawler walks one or more package directories and decides
4//! whether each entry is a candidate package. The two operations that
5//! all eight crawlers repeat are:
6//!
7//! - listing entries in a directory while tolerating permission /
8//!   I/O errors (we treat an unreadable directory as "no entries");
9//! - asking whether an entry is a directory while tolerating
10//!   `file_type()` failures (we treat a stat error as "not a dir").
11//!
12//! Centralizing both keeps each crawler free of the
13//! `match read_dir { Ok(rd) => rd, Err(_) => return … }` boilerplate
14//! and gives integration tests a single function to drive when they
15//! want to exercise the read_dir Err arm via `chmod 000`.
16//!
17//! Both helpers are async because the rest of the crawler code is —
18//! they delegate to `tokio::fs`.
19//!
20//! # Symlinks
21//!
22//! `entry_is_dir` follows symlinks (uses `metadata()`, not
23//! `symlink_metadata()`), matching the historical behavior of the
24//! crawlers (pnpm's content-addressed store relies on resolving
25//! symlinks into `node_modules/.pnpm/*`).
26
27use std::path::Path;
28
29use tokio::fs::DirEntry;
30use std::fs::FileType;
31
32/// List the immediate children of `path`.
33///
34/// Returns an empty vector if the directory cannot be read (does not
35/// exist, permission denied, etc.) or if any individual `next_entry`
36/// call fails. The crawlers treat both cases the same way: surface
37/// no packages from the unreadable subtree, but don't abort the
38/// whole crawl.
39pub async fn list_dir_entries(path: &Path) -> Vec<DirEntry> {
40    let mut entries = match tokio::fs::read_dir(path).await {
41        Ok(rd) => rd,
42        Err(_) => return Vec::new(),
43    };
44
45    let mut out = Vec::new();
46    while let Ok(Some(entry)) = entries.next_entry().await {
47        out.push(entry);
48    }
49    out
50}
51
52/// Resolve whether `entry` is a directory, following symlinks.
53///
54/// Returns `false` if `file_type()` errors — the caller then skips
55/// the entry rather than aborting the walk.
56pub async fn entry_is_dir(entry: &DirEntry) -> bool {
57    entry
58        .metadata()
59        .await
60        .map(|m| m.is_dir())
61        .unwrap_or(false)
62}
63
64/// Return the raw `FileType` for `entry`, swallowing stat errors.
65///
66/// Use this instead of `entry_is_dir` when the caller needs to
67/// distinguish real directories from symlinks (e.g. npm's pnpm
68/// support: symlinks point into the content-addressed store and must
69/// be treated as scannable-but-non-recurseable). The returned
70/// `FileType` is the symlink-aware kind from `entry.file_type()`,
71/// not the resolved-target kind from `metadata()`.
72pub async fn entry_file_type(entry: &DirEntry) -> Option<FileType> {
73    entry.file_type().await.ok()
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[tokio::test]
81    async fn list_dir_entries_empty_dir() {
82        let tmp = tempfile::tempdir().unwrap();
83        let entries = list_dir_entries(tmp.path()).await;
84        assert!(entries.is_empty());
85    }
86
87    #[tokio::test]
88    async fn list_dir_entries_missing_path_returns_empty() {
89        let tmp = tempfile::tempdir().unwrap();
90        let entries = list_dir_entries(&tmp.path().join("does-not-exist")).await;
91        assert!(entries.is_empty());
92    }
93
94    #[tokio::test]
95    async fn list_dir_entries_returns_children() {
96        let tmp = tempfile::tempdir().unwrap();
97        tokio::fs::create_dir(tmp.path().join("a")).await.unwrap();
98        tokio::fs::create_dir(tmp.path().join("b")).await.unwrap();
99        tokio::fs::write(tmp.path().join("c.txt"), b"").await.unwrap();
100        let mut names: Vec<String> = list_dir_entries(tmp.path())
101            .await
102            .into_iter()
103            .map(|e| e.file_name().to_string_lossy().to_string())
104            .collect();
105        names.sort();
106        assert_eq!(names, vec!["a", "b", "c.txt"]);
107    }
108
109    #[tokio::test]
110    async fn entry_is_dir_distinguishes_dir_and_file() {
111        let tmp = tempfile::tempdir().unwrap();
112        tokio::fs::create_dir(tmp.path().join("d")).await.unwrap();
113        tokio::fs::write(tmp.path().join("f"), b"x").await.unwrap();
114        let entries = list_dir_entries(tmp.path()).await;
115        for entry in entries {
116            let name = entry.file_name().to_string_lossy().to_string();
117            let is_dir = entry_is_dir(&entry).await;
118            match name.as_str() {
119                "d" => assert!(is_dir),
120                "f" => assert!(!is_dir),
121                other => panic!("unexpected entry: {other}"),
122            }
123        }
124    }
125}