socket_patch_core/utils/fs.rs
1//! Filesystem helpers shared by the ecosystem crawlers.
2//!
3//! Each crawler walks one or more package directories and decides
4//! whether each entry is a candidate package. The two operations that
5//! all eight crawlers repeat are:
6//!
7//! - listing entries in a directory while tolerating permission /
8//! I/O errors (we treat an unreadable directory as "no entries");
9//! - asking whether an entry is a directory while tolerating
10//! `file_type()` failures (we treat a stat error as "not a dir").
11//!
12//! Centralizing both keeps each crawler free of the
13//! `match read_dir { Ok(rd) => rd, Err(_) => return … }` boilerplate
14//! and gives integration tests a single function to drive when they
15//! want to exercise the read_dir Err arm via `chmod 000`.
16//!
17//! Both helpers are async because the rest of the crawler code is —
18//! they delegate to `tokio::fs`.
19//!
20//! # Symlinks
21//!
22//! `entry_is_dir` follows symlinks (uses `metadata()`, not
23//! `symlink_metadata()`), matching the historical behavior of the
24//! crawlers (pnpm's content-addressed store relies on resolving
25//! symlinks into `node_modules/.pnpm/*`).
26
27use std::path::Path;
28
29use tokio::fs::DirEntry;
30use std::fs::FileType;
31
32/// List the immediate children of `path`.
33///
34/// Returns an empty vector if the directory cannot be read (does not
35/// exist, permission denied, etc.) or if any individual `next_entry`
36/// call fails. The crawlers treat both cases the same way: surface
37/// no packages from the unreadable subtree, but don't abort the
38/// whole crawl.
39pub async fn list_dir_entries(path: &Path) -> Vec<DirEntry> {
40 let mut entries = match tokio::fs::read_dir(path).await {
41 Ok(rd) => rd,
42 Err(_) => return Vec::new(),
43 };
44
45 let mut out = Vec::new();
46 while let Ok(Some(entry)) = entries.next_entry().await {
47 out.push(entry);
48 }
49 out
50}
51
52/// Resolve whether `entry` is a directory, following symlinks.
53///
54/// Returns `false` if `file_type()` errors — the caller then skips
55/// the entry rather than aborting the walk.
56pub async fn entry_is_dir(entry: &DirEntry) -> bool {
57 entry
58 .metadata()
59 .await
60 .map(|m| m.is_dir())
61 .unwrap_or(false)
62}
63
64/// Return the raw `FileType` for `entry`, swallowing stat errors.
65///
66/// Use this instead of `entry_is_dir` when the caller needs to
67/// distinguish real directories from symlinks (e.g. npm's pnpm
68/// support: symlinks point into the content-addressed store and must
69/// be treated as scannable-but-non-recurseable). The returned
70/// `FileType` is the symlink-aware kind from `entry.file_type()`,
71/// not the resolved-target kind from `metadata()`.
72pub async fn entry_file_type(entry: &DirEntry) -> Option<FileType> {
73 entry.file_type().await.ok()
74}
75
76#[cfg(test)]
77mod tests {
78 use super::*;
79
80 #[tokio::test]
81 async fn list_dir_entries_empty_dir() {
82 let tmp = tempfile::tempdir().unwrap();
83 let entries = list_dir_entries(tmp.path()).await;
84 assert!(entries.is_empty());
85 }
86
87 #[tokio::test]
88 async fn list_dir_entries_missing_path_returns_empty() {
89 let tmp = tempfile::tempdir().unwrap();
90 let entries = list_dir_entries(&tmp.path().join("does-not-exist")).await;
91 assert!(entries.is_empty());
92 }
93
94 #[tokio::test]
95 async fn list_dir_entries_returns_children() {
96 let tmp = tempfile::tempdir().unwrap();
97 tokio::fs::create_dir(tmp.path().join("a")).await.unwrap();
98 tokio::fs::create_dir(tmp.path().join("b")).await.unwrap();
99 tokio::fs::write(tmp.path().join("c.txt"), b"").await.unwrap();
100 let mut names: Vec<String> = list_dir_entries(tmp.path())
101 .await
102 .into_iter()
103 .map(|e| e.file_name().to_string_lossy().to_string())
104 .collect();
105 names.sort();
106 assert_eq!(names, vec!["a", "b", "c.txt"]);
107 }
108
109 #[tokio::test]
110 async fn entry_is_dir_distinguishes_dir_and_file() {
111 let tmp = tempfile::tempdir().unwrap();
112 tokio::fs::create_dir(tmp.path().join("d")).await.unwrap();
113 tokio::fs::write(tmp.path().join("f"), b"x").await.unwrap();
114 let entries = list_dir_entries(tmp.path()).await;
115 for entry in entries {
116 let name = entry.file_name().to_string_lossy().to_string();
117 let is_dir = entry_is_dir(&entry).await;
118 match name.as_str() {
119 "d" => assert!(is_dir),
120 "f" => assert!(!is_dir),
121 other => panic!("unexpected entry: {other}"),
122 }
123 }
124 }
125}