socket_patch_core/patch/cow.rs
1//! Copy-on-write defense against package-manager hardlink farms.
2//!
3//! Several package managers (pnpm, bazel mirrors, nix store overlays,
4//! npm linked workspaces) point multiple project trees at a single
5//! content-addressed inode via symlinks or hardlinks. A naive patch
6//! that opens the path in a workspace and rewrites it would mutate the
7//! shared inode — corrupting every other project that references the
8//! same package.
9//!
10//! [`break_hardlink_if_needed`] is the pre-write hook that turns these
11//! shared-inode references into private file copies before any patch
12//! bytes touch disk. After the call, mutating the path is safe: only
13//! this project's copy changes; the store entry and every other
14//! project's link survive untouched.
15//!
16//! The function is idempotent and fast on the common case (regular
17//! file with `nlink == 1`): a single `symlink_metadata` syscall, no
18//! I/O beyond that. CoW only runs when there is something to break.
19//!
20//! **Windows note:** we always handle symlinks the same on Windows
21//! (replace with private regular file) but skip the `nlink > 1`
22//! check — `std::fs::Metadata` on Windows does not expose the file
23//! information that carries it, and pnpm-on-Windows typically uses
24//! reflinks/copies rather than hardlinks. A follow-up could call
25//! `GetFileInformationByHandle` via `windows-sys` for full Windows
26//! parity.
27
28use std::path::{Path, PathBuf};
29
30/// Outcome of [`break_hardlink_if_needed`].
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CowAction {
33 /// Path didn't exist — nothing to break, caller will create fresh.
34 NoFile,
35 /// Path was a regular private file (one link, not a symlink).
36 /// Caller can mutate it directly.
37 AlreadyPrivate,
38 /// Path was a symlink. We removed the link and put a fresh
39 /// regular file with the same content in its place. The link
40 /// target is untouched.
41 BrokeSymlink,
42 /// Path was a hardlinked regular file (`nlink > 1`). We copied
43 /// the content into a new inode and atomically renamed it over
44 /// the original. Sibling links are untouched.
45 BrokeHardlink,
46}
47
48/// Ensure `path` (if it exists) points at a private inode this
49/// project alone owns, so a subsequent in-place write only mutates
50/// our copy.
51///
52/// See module docs for the failure mode this protects against.
53pub async fn break_hardlink_if_needed(path: &Path) -> std::io::Result<CowAction> {
54 // `symlink_metadata` does NOT follow symlinks — that's what we
55 // want, since the symlink-vs-regular branch is the whole point.
56 let lstat = match tokio::fs::symlink_metadata(path).await {
57 Ok(m) => m,
58 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(CowAction::NoFile),
59 Err(e) => return Err(e),
60 };
61
62 if lstat.file_type().is_symlink() {
63 // Read through the symlink (this DOES follow it) to grab the
64 // current target content. We need it on disk as a regular
65 // file at `path` so the patch write lands on our copy.
66 let target_bytes = tokio::fs::read(path).await?;
67 // Remove the symlink. This only deletes the link itself; the
68 // target file (in the store, in a sibling project, wherever)
69 // is unaffected.
70 tokio::fs::remove_file(path).await?;
71 write_via_stage_rename(path, &target_bytes).await?;
72 return Ok(CowAction::BrokeSymlink);
73 }
74
75 // Regular file. Hardlink defense is Unix-only — see module docs.
76 #[cfg(unix)]
77 {
78 use std::os::unix::fs::MetadataExt;
79 if lstat.nlink() > 1 {
80 // Atomic-rename-over-self pattern: copy our content into
81 // a fresh inode, then rename over the original. The other
82 // links keep pointing at the original inode (which now
83 // has one fewer link but otherwise unchanged content).
84 let content = tokio::fs::read(path).await?;
85 write_via_stage_rename(path, &content).await?;
86 return Ok(CowAction::BrokeHardlink);
87 }
88 }
89
90 Ok(CowAction::AlreadyPrivate)
91}
92
93/// Write `bytes` to a temp file in `path.parent()` then rename over
94/// `path`. Cross-FS-safe because the stage lives in the same
95/// directory as the target, so `rename(2)` is intra-filesystem.
96async fn write_via_stage_rename(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
97 // Preconditions: cow callers always pass a real file path
98 // inside a package directory, so `path.parent()` and
99 // `path.file_name()` are guaranteed `Some`. The previous
100 // `unwrap_or_else` defaults only fired on `path == "/"`,
101 // which cow can never reach (lstat on "/" returns a directory,
102 // and the hardlink branch's `read("/")` errors out long
103 // before we get here). Using `.expect()` documents the
104 // invariant and eliminates the dead defensive default.
105 let parent = path
106 .parent()
107 .expect("cow stage path always has a parent — callers pass package-internal files");
108 // Stage filename: leading dot so editors / globs don't pick it
109 // up as a real file; uuid suffix so concurrent calls don't
110 // collide. (The apply lock makes that practically impossible,
111 // but defense in depth.)
112 let stem = path
113 .file_name()
114 .map(|n| n.to_string_lossy().into_owned())
115 .expect("cow stage path always has a file_name — callers pass package-internal files");
116 let stage: PathBuf = parent.join(format!(
117 ".socket-cow-{}-{}",
118 stem,
119 uuid::Uuid::new_v4()
120 ));
121 tokio::fs::write(&stage, bytes).await?;
122 // `rename` over the target is atomic on POSIX and best-effort on
123 // Windows (`MoveFileExW` with REPLACE_EXISTING via std).
124 match tokio::fs::rename(&stage, path).await {
125 Ok(()) => Ok(()),
126 Err(e) => {
127 // Clean up the stage on rename failure so we don't leave
128 // litter in the package directory.
129 let _ = tokio::fs::remove_file(&stage).await;
130 Err(e)
131 }
132 }
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138
139 #[tokio::test]
140 async fn missing_file_is_noop() {
141 let dir = tempfile::tempdir().unwrap();
142 let action = break_hardlink_if_needed(&dir.path().join("nope.txt"))
143 .await
144 .unwrap();
145 assert_eq!(action, CowAction::NoFile);
146 }
147
148 #[tokio::test]
149 async fn regular_file_with_one_link_is_already_private() {
150 let dir = tempfile::tempdir().unwrap();
151 let p = dir.path().join("a.txt");
152 tokio::fs::write(&p, b"hello").await.unwrap();
153 let action = break_hardlink_if_needed(&p).await.unwrap();
154 assert_eq!(action, CowAction::AlreadyPrivate);
155 // Content untouched.
156 assert_eq!(tokio::fs::read(&p).await.unwrap(), b"hello");
157 }
158
159 /// Hardlink case (Unix only — see module docs).
160 ///
161 /// Create file A, hardlink B → A. Run CoW on B. After:
162 /// - A's content is unchanged (the canonical store entry).
163 /// - B has the same bytes but lives in a new inode.
164 /// - Mutating B does NOT change A (the core invariant pnpm
165 /// safety depends on).
166 #[cfg(unix)]
167 #[tokio::test]
168 async fn hardlink_is_broken_and_sibling_survives_mutation() {
169 use std::os::unix::fs::MetadataExt;
170
171 let dir = tempfile::tempdir().unwrap();
172 let a = dir.path().join("store-a.txt");
173 let b = dir.path().join("project-b.txt");
174 tokio::fs::write(&a, b"original").await.unwrap();
175 tokio::fs::hard_link(&a, &b).await.unwrap();
176
177 // Sanity: both report nlink == 2.
178 let a_meta_before = tokio::fs::metadata(&a).await.unwrap();
179 assert_eq!(a_meta_before.nlink(), 2);
180
181 let action = break_hardlink_if_needed(&b).await.unwrap();
182 assert_eq!(action, CowAction::BrokeHardlink);
183
184 // A is now a single-link inode.
185 let a_meta_after = tokio::fs::metadata(&a).await.unwrap();
186 assert_eq!(a_meta_after.nlink(), 1);
187 // B has the same content but a different inode.
188 assert_eq!(tokio::fs::read(&b).await.unwrap(), b"original");
189 assert_ne!(
190 a_meta_after.ino(),
191 tokio::fs::metadata(&b).await.unwrap().ino()
192 );
193
194 // Mutate B — A must NOT change.
195 tokio::fs::write(&b, b"patched").await.unwrap();
196 assert_eq!(tokio::fs::read(&a).await.unwrap(), b"original");
197 assert_eq!(tokio::fs::read(&b).await.unwrap(), b"patched");
198 }
199
200 /// Symlink case (cross-platform). The symlink → target relation
201 /// is what pnpm's `node_modules/<pkg>` typically looks like. We
202 /// must replace the link with a private regular file and leave
203 /// the target alone.
204 #[cfg(unix)]
205 #[tokio::test]
206 async fn symlink_is_replaced_with_private_file() {
207 let dir = tempfile::tempdir().unwrap();
208 let target = dir.path().join("store-entry.txt");
209 let link = dir.path().join("project-link.txt");
210 tokio::fs::write(&target, b"shared bytes").await.unwrap();
211 tokio::fs::symlink(&target, &link).await.unwrap();
212
213 let action = break_hardlink_if_needed(&link).await.unwrap();
214 assert_eq!(action, CowAction::BrokeSymlink);
215
216 // Link path is now a regular file with the target's content.
217 let link_meta = tokio::fs::symlink_metadata(&link).await.unwrap();
218 assert!(link_meta.file_type().is_file());
219 assert!(!link_meta.file_type().is_symlink());
220 assert_eq!(tokio::fs::read(&link).await.unwrap(), b"shared bytes");
221
222 // Target is untouched.
223 let target_meta = tokio::fs::symlink_metadata(&target).await.unwrap();
224 assert!(target_meta.file_type().is_file());
225 assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
226
227 // Mutate the link path; target stays put.
228 tokio::fs::write(&link, b"patched").await.unwrap();
229 assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
230 }
231
232 /// Idempotency: calling twice in a row on a regular file is fine
233 /// and reports `AlreadyPrivate` both times.
234 #[tokio::test]
235 async fn idempotent_on_regular_file() {
236 let dir = tempfile::tempdir().unwrap();
237 let p = dir.path().join("x.txt");
238 tokio::fs::write(&p, b"hi").await.unwrap();
239 let a1 = break_hardlink_if_needed(&p).await.unwrap();
240 let a2 = break_hardlink_if_needed(&p).await.unwrap();
241 assert_eq!(a1, CowAction::AlreadyPrivate);
242 assert_eq!(a2, CowAction::AlreadyPrivate);
243 }
244}