socket_patch_core/patch/cow.rs
1//! Copy-on-write defense against package-manager hardlink farms.
2//!
3//! Several package managers (pnpm, bazel mirrors, nix store overlays,
4//! npm linked workspaces) point multiple project trees at a single
5//! content-addressed inode via symlinks or hardlinks. A naive patch
6//! that opens the path in a workspace and rewrites it would mutate the
7//! shared inode — corrupting every other project that references the
8//! same package.
9//!
10//! [`break_hardlink_if_needed`] is the pre-write hook that turns these
11//! shared-inode references into private file copies before any patch
12//! bytes touch disk. After the call, mutating the path is safe: only
13//! this project's copy changes; the store entry and every other
14//! project's link survive untouched.
15//!
16//! The function is idempotent and fast on the common case (regular
17//! file with `nlink == 1`): a single `symlink_metadata` syscall, no
18//! I/O beyond that. CoW only runs when there is something to break.
19//!
20//! **Windows note:** we always handle symlinks the same on Windows
21//! (replace with private regular file) but skip the `nlink > 1`
22//! check — `std::fs::Metadata` on Windows does not expose the file
23//! information that carries it, and pnpm-on-Windows typically uses
24//! reflinks/copies rather than hardlinks. A follow-up could call
25//! `GetFileInformationByHandle` via `windows-sys` for full Windows
26//! parity.
27
28use std::path::{Path, PathBuf};
29
30/// Outcome of [`break_hardlink_if_needed`].
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CowAction {
33 /// Path didn't exist — nothing to break, caller will create fresh.
34 NoFile,
35 /// Path was a regular private file (one link, not a symlink).
36 /// Caller can mutate it directly.
37 AlreadyPrivate,
38 /// Path was a symlink. We atomically replaced the link with a
39 /// fresh regular file holding the same content (staged in the same
40 /// directory and renamed over the link in one step). The link
41 /// target is untouched.
42 BrokeSymlink,
43 /// Path was a hardlinked regular file (`nlink > 1`). We copied
44 /// the content into a new inode and atomically renamed it over
45 /// the original. Sibling links are untouched.
46 BrokeHardlink,
47}
48
49/// Ensure `path` (if it exists) points at a private inode this
50/// project alone owns, so a subsequent in-place write only mutates
51/// our copy.
52///
53/// See module docs for the failure mode this protects against.
54pub async fn break_hardlink_if_needed(path: &Path) -> std::io::Result<CowAction> {
55 // `symlink_metadata` does NOT follow symlinks — that's what we
56 // want, since the symlink-vs-regular branch is the whole point.
57 let lstat = match tokio::fs::symlink_metadata(path).await {
58 Ok(m) => m,
59 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(CowAction::NoFile),
60 Err(e) => return Err(e),
61 };
62
63 if lstat.file_type().is_symlink() {
64 // Read through the symlink (this DOES follow it) to grab the
65 // current target content. We need it on disk as a regular
66 // file at `path` so the patch write lands on our copy.
67 let target_bytes = tokio::fs::read(path).await?;
68 // Stage the private copy in the same directory, then
69 // atomically rename it OVER the symlink. `rename(2)` operates
70 // on the final path component itself — it never follows the
71 // symlink — so this replaces the link with our regular file
72 // while leaving the link's *target* (the store entry / sibling
73 // project) untouched.
74 //
75 // We deliberately do NOT `remove_file(path)` first. Unlinking
76 // the symlink before the replacement is committed would open a
77 // window in which the package file simply does not exist: if
78 // the staged write then failed (ENOSPC, EPERM on an immutable
79 // target, a crash), the original would be gone with nothing to
80 // roll back to. The rename-over-symlink is a single atomic
81 // step — on any failure `path` still holds the original link.
82 // This mirrors the hardlink branch below and `write_atomic`.
83 write_via_stage_rename(path, &target_bytes).await?;
84 return Ok(CowAction::BrokeSymlink);
85 }
86
87 // Regular file. Hardlink defense is Unix-only — see module docs.
88 #[cfg(unix)]
89 {
90 use std::os::unix::fs::MetadataExt;
91 if lstat.nlink() > 1 {
92 // Atomic-rename-over-self pattern: copy our content into
93 // a fresh inode, then rename over the original. The other
94 // links keep pointing at the original inode (which now
95 // has one fewer link but otherwise unchanged content).
96 let content = tokio::fs::read(path).await?;
97 write_via_stage_rename(path, &content).await?;
98 return Ok(CowAction::BrokeHardlink);
99 }
100 }
101
102 Ok(CowAction::AlreadyPrivate)
103}
104
105/// Write `bytes` to a temp file in `path.parent()` then rename over
106/// `path`. Cross-FS-safe because the stage lives in the same
107/// directory as the target, so `rename(2)` is intra-filesystem.
108async fn write_via_stage_rename(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
109 // Preconditions: cow callers always pass a real file path
110 // inside a package directory, so `path.parent()` and
111 // `path.file_name()` are guaranteed `Some`. The previous
112 // `unwrap_or_else` defaults only fired on `path == "/"`,
113 // which cow can never reach (lstat on "/" returns a directory,
114 // and the hardlink branch's `read("/")` errors out long
115 // before we get here). Using `.expect()` documents the
116 // invariant and eliminates the dead defensive default.
117 let parent = path
118 .parent()
119 .expect("cow stage path always has a parent — callers pass package-internal files");
120 // Stage filename: leading dot so editors / globs don't pick it
121 // up as a real file; uuid suffix so concurrent calls don't
122 // collide. (The apply lock makes that practically impossible,
123 // but defense in depth.)
124 let stem = path
125 .file_name()
126 .map(|n| n.to_string_lossy().into_owned())
127 .expect("cow stage path always has a file_name — callers pass package-internal files");
128 let stage: PathBuf = parent.join(format!(".socket-cow-{}-{}", stem, uuid::Uuid::new_v4()));
129 // Stage write. If this fails *after* creating the file (e.g. a
130 // mid-write ENOSPC), the partial stage would otherwise leak as a
131 // `.socket-cow-*` turd, so clean it up before propagating — same
132 // discipline as `apply::write_atomic`'s write arm.
133 if let Err(e) = tokio::fs::write(&stage, bytes).await {
134 let _ = tokio::fs::remove_file(&stage).await;
135 return Err(e);
136 }
137 // `rename` over the target is atomic on POSIX and best-effort on
138 // Windows (`MoveFileExW` with REPLACE_EXISTING via std).
139 match tokio::fs::rename(&stage, path).await {
140 Ok(()) => Ok(()),
141 Err(e) => {
142 // Clean up the stage on rename failure so we don't leave
143 // litter in the package directory.
144 let _ = tokio::fs::remove_file(&stage).await;
145 Err(e)
146 }
147 }
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 #[tokio::test]
155 async fn missing_file_is_noop() {
156 let dir = tempfile::tempdir().unwrap();
157 let action = break_hardlink_if_needed(&dir.path().join("nope.txt"))
158 .await
159 .unwrap();
160 assert_eq!(action, CowAction::NoFile);
161 }
162
163 #[tokio::test]
164 async fn regular_file_with_one_link_is_already_private() {
165 let dir = tempfile::tempdir().unwrap();
166 let p = dir.path().join("a.txt");
167 tokio::fs::write(&p, b"hello").await.unwrap();
168 let action = break_hardlink_if_needed(&p).await.unwrap();
169 assert_eq!(action, CowAction::AlreadyPrivate);
170 // Content untouched.
171 assert_eq!(tokio::fs::read(&p).await.unwrap(), b"hello");
172 }
173
174 /// Hardlink case (Unix only — see module docs).
175 ///
176 /// Create file A, hardlink B → A. Run CoW on B. After:
177 /// - A's content is unchanged (the canonical store entry).
178 /// - B has the same bytes but lives in a new inode.
179 /// - Mutating B does NOT change A (the core invariant pnpm
180 /// safety depends on).
181 #[cfg(unix)]
182 #[tokio::test]
183 async fn hardlink_is_broken_and_sibling_survives_mutation() {
184 use std::os::unix::fs::MetadataExt;
185
186 let dir = tempfile::tempdir().unwrap();
187 let a = dir.path().join("store-a.txt");
188 let b = dir.path().join("project-b.txt");
189 tokio::fs::write(&a, b"original").await.unwrap();
190 tokio::fs::hard_link(&a, &b).await.unwrap();
191
192 // Sanity: both report nlink == 2.
193 let a_meta_before = tokio::fs::metadata(&a).await.unwrap();
194 assert_eq!(a_meta_before.nlink(), 2);
195
196 let action = break_hardlink_if_needed(&b).await.unwrap();
197 assert_eq!(action, CowAction::BrokeHardlink);
198
199 // A is now a single-link inode.
200 let a_meta_after = tokio::fs::metadata(&a).await.unwrap();
201 assert_eq!(a_meta_after.nlink(), 1);
202 // B has the same content but a different inode.
203 assert_eq!(tokio::fs::read(&b).await.unwrap(), b"original");
204 assert_ne!(
205 a_meta_after.ino(),
206 tokio::fs::metadata(&b).await.unwrap().ino()
207 );
208
209 // Mutate B — A must NOT change.
210 tokio::fs::write(&b, b"patched").await.unwrap();
211 assert_eq!(tokio::fs::read(&a).await.unwrap(), b"original");
212 assert_eq!(tokio::fs::read(&b).await.unwrap(), b"patched");
213 }
214
215 /// Symlink case (cross-platform). The symlink → target relation
216 /// is what pnpm's `node_modules/<pkg>` typically looks like. We
217 /// must replace the link with a private regular file and leave
218 /// the target alone.
219 #[cfg(unix)]
220 #[tokio::test]
221 async fn symlink_is_replaced_with_private_file() {
222 let dir = tempfile::tempdir().unwrap();
223 let target = dir.path().join("store-entry.txt");
224 let link = dir.path().join("project-link.txt");
225 tokio::fs::write(&target, b"shared bytes").await.unwrap();
226 tokio::fs::symlink(&target, &link).await.unwrap();
227
228 let action = break_hardlink_if_needed(&link).await.unwrap();
229 assert_eq!(action, CowAction::BrokeSymlink);
230
231 // Link path is now a regular file with the target's content.
232 let link_meta = tokio::fs::symlink_metadata(&link).await.unwrap();
233 assert!(link_meta.file_type().is_file());
234 assert!(!link_meta.file_type().is_symlink());
235 assert_eq!(tokio::fs::read(&link).await.unwrap(), b"shared bytes");
236
237 // Target is untouched.
238 let target_meta = tokio::fs::symlink_metadata(&target).await.unwrap();
239 assert!(target_meta.file_type().is_file());
240 assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
241
242 // Mutate the link path; target stays put.
243 tokio::fs::write(&link, b"patched").await.unwrap();
244 assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
245 }
246
247 /// Helper: count `.socket-cow-*` stage files left in a directory.
248 #[cfg(unix)]
249 fn leftover_stage_count(dir: &Path) -> usize {
250 std::fs::read_dir(dir)
251 .unwrap()
252 .filter_map(|e| e.ok())
253 .filter(|e| e.file_name().to_string_lossy().starts_with(".socket-cow-"))
254 .count()
255 }
256
257 /// Realistic pnpm shape: `node_modules/<pkg>` is a *symlink* into
258 /// the content store, and the store entry is itself *hardlinked*
259 /// across projects. Breaking the symlink must:
260 /// - leave the project path a private, single-link regular file,
261 /// - leave the store entry's content AND its sibling hardlink
262 /// completely untouched (the whole point of CoW),
263 /// - leave no `.socket-cow-*` stage litter behind.
264 #[cfg(unix)]
265 #[tokio::test]
266 async fn symlink_to_hardlinked_store_entry_is_fully_isolated() {
267 use std::os::unix::fs::MetadataExt;
268
269 let dir = tempfile::tempdir().unwrap();
270 // The content store entry + a sibling project's hardlink to it.
271 let store = dir.path().join("store-entry.txt");
272 let sibling = dir.path().join("other-project-hardlink.txt");
273 tokio::fs::write(&store, b"shared bytes").await.unwrap();
274 tokio::fs::hard_link(&store, &sibling).await.unwrap();
275 // Our project links to the store entry via a symlink.
276 let link = dir.path().join("our-project-link.txt");
277 tokio::fs::symlink(&store, &link).await.unwrap();
278 assert_eq!(tokio::fs::metadata(&store).await.unwrap().nlink(), 2);
279
280 let action = break_hardlink_if_needed(&link).await.unwrap();
281 assert_eq!(action, CowAction::BrokeSymlink);
282
283 // Our path is now a private regular file (not a symlink), and
284 // its inode is distinct from the store entry.
285 let link_meta = tokio::fs::symlink_metadata(&link).await.unwrap();
286 assert!(link_meta.file_type().is_file());
287 assert!(!link_meta.file_type().is_symlink());
288 assert_ne!(
289 link_meta.ino(),
290 tokio::fs::metadata(&store).await.unwrap().ino()
291 );
292
293 // Store entry + its sibling hardlink are byte-for-byte intact,
294 // and still share their inode (nlink unchanged at 2).
295 assert_eq!(tokio::fs::metadata(&store).await.unwrap().nlink(), 2);
296 assert_eq!(tokio::fs::read(&store).await.unwrap(), b"shared bytes");
297 assert_eq!(tokio::fs::read(&sibling).await.unwrap(), b"shared bytes");
298
299 // Mutating our copy must not bleed into the store or its sibling.
300 tokio::fs::write(&link, b"patched").await.unwrap();
301 assert_eq!(tokio::fs::read(&store).await.unwrap(), b"shared bytes");
302 assert_eq!(tokio::fs::read(&sibling).await.unwrap(), b"shared bytes");
303
304 // No stage litter survives the successful break.
305 assert_eq!(leftover_stage_count(dir.path()), 0);
306 }
307
308 /// Success-path litter check: neither the symlink break nor the
309 /// hardlink break may leave a `.socket-cow-*` stage file behind.
310 #[cfg(unix)]
311 #[tokio::test]
312 async fn break_leaves_no_stage_litter() {
313 let dir = tempfile::tempdir().unwrap();
314
315 let target = dir.path().join("t.txt");
316 tokio::fs::write(&target, b"x").await.unwrap();
317 let link = dir.path().join("l.txt");
318 tokio::fs::symlink(&target, &link).await.unwrap();
319 break_hardlink_if_needed(&link).await.unwrap();
320
321 let a = dir.path().join("a.txt");
322 tokio::fs::write(&a, b"y").await.unwrap();
323 let b = dir.path().join("b.txt");
324 tokio::fs::hard_link(&a, &b).await.unwrap();
325 break_hardlink_if_needed(&b).await.unwrap();
326
327 assert_eq!(leftover_stage_count(dir.path()), 0);
328 }
329
330 /// Idempotency: breaking a symlink yields a private regular file,
331 /// and a second call on the now-regular path is a clean
332 /// `AlreadyPrivate` no-op (no re-break, no litter).
333 #[cfg(unix)]
334 #[tokio::test]
335 async fn idempotent_after_breaking_symlink() {
336 let dir = tempfile::tempdir().unwrap();
337 let target = dir.path().join("store.txt");
338 let link = dir.path().join("link.txt");
339 tokio::fs::write(&target, b"bytes").await.unwrap();
340 tokio::fs::symlink(&target, &link).await.unwrap();
341
342 assert_eq!(
343 break_hardlink_if_needed(&link).await.unwrap(),
344 CowAction::BrokeSymlink
345 );
346 assert_eq!(
347 break_hardlink_if_needed(&link).await.unwrap(),
348 CowAction::AlreadyPrivate
349 );
350 assert_eq!(leftover_stage_count(dir.path()), 0);
351 }
352
353 /// Idempotency: calling twice in a row on a regular file is fine
354 /// and reports `AlreadyPrivate` both times.
355 #[tokio::test]
356 async fn idempotent_on_regular_file() {
357 let dir = tempfile::tempdir().unwrap();
358 let p = dir.path().join("x.txt");
359 tokio::fs::write(&p, b"hi").await.unwrap();
360 let a1 = break_hardlink_if_needed(&p).await.unwrap();
361 let a2 = break_hardlink_if_needed(&p).await.unwrap();
362 assert_eq!(a1, CowAction::AlreadyPrivate);
363 assert_eq!(a2, CowAction::AlreadyPrivate);
364 }
365}