Skip to main content

socket_patch_core/patch/
cow.rs

1//! Copy-on-write defense against package-manager hardlink farms.
2//!
3//! Several package managers (pnpm, bazel mirrors, nix store overlays,
4//! npm linked workspaces) point multiple project trees at a single
5//! content-addressed inode via symlinks or hardlinks. A naive patch
6//! that opens the path in a workspace and rewrites it would mutate the
7//! shared inode — corrupting every other project that references the
8//! same package.
9//!
10//! [`break_hardlink_if_needed`] is the pre-write hook that turns these
11//! shared-inode references into private file copies before any patch
12//! bytes touch disk. After the call, mutating the path is safe: only
13//! this project's copy changes; the store entry and every other
14//! project's link survive untouched.
15//!
16//! The function is idempotent and fast on the common case (regular
17//! file with `nlink == 1`): a single `symlink_metadata` syscall, no
18//! I/O beyond that. CoW only runs when there is something to break.
19//!
20//! **Windows note:** we always handle symlinks the same on Windows
21//! (replace with private regular file) but skip the `nlink > 1`
22//! check — `std::fs::Metadata` on Windows does not expose the file
23//! information that carries it, and pnpm-on-Windows typically uses
24//! reflinks/copies rather than hardlinks. A follow-up could call
25//! `GetFileInformationByHandle` via `windows-sys` for full Windows
26//! parity.
27
28use std::path::{Path, PathBuf};
29
30/// Outcome of [`break_hardlink_if_needed`].
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CowAction {
33    /// Path didn't exist — nothing to break, caller will create fresh.
34    NoFile,
35    /// Path was a regular private file (one link, not a symlink).
36    /// Caller can mutate it directly.
37    AlreadyPrivate,
38    /// Path was a symlink. We atomically replaced the link with a
39    /// fresh regular file holding the same content (staged in the same
40    /// directory and renamed over the link in one step). The link
41    /// target is untouched.
42    BrokeSymlink,
43    /// Path was a hardlinked regular file (`nlink > 1`). We copied
44    /// the content into a new inode and atomically renamed it over
45    /// the original. Sibling links are untouched.
46    BrokeHardlink,
47}
48
49/// Ensure `path` (if it exists) points at a private inode this
50/// project alone owns, so a subsequent in-place write only mutates
51/// our copy.
52///
53/// See module docs for the failure mode this protects against.
54pub async fn break_hardlink_if_needed(path: &Path) -> std::io::Result<CowAction> {
55    // `symlink_metadata` does NOT follow symlinks — that's what we
56    // want, since the symlink-vs-regular branch is the whole point.
57    let lstat = match tokio::fs::symlink_metadata(path).await {
58        Ok(m) => m,
59        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(CowAction::NoFile),
60        Err(e) => return Err(e),
61    };
62
63    if lstat.file_type().is_symlink() {
64        // Read through the symlink (this DOES follow it) to grab the
65        // current target content. We need it on disk as a regular
66        // file at `path` so the patch write lands on our copy.
67        let target_bytes = tokio::fs::read(path).await?;
68        // Stage the private copy in the same directory, then
69        // atomically rename it OVER the symlink. `rename(2)` operates
70        // on the final path component itself — it never follows the
71        // symlink — so this replaces the link with our regular file
72        // while leaving the link's *target* (the store entry / sibling
73        // project) untouched.
74        //
75        // We deliberately do NOT `remove_file(path)` first. Unlinking
76        // the symlink before the replacement is committed would open a
77        // window in which the package file simply does not exist: if
78        // the staged write then failed (ENOSPC, EPERM on an immutable
79        // target, a crash), the original would be gone with nothing to
80        // roll back to. The rename-over-symlink is a single atomic
81        // step — on any failure `path` still holds the original link.
82        // This mirrors the hardlink branch below and `write_atomic`.
83        write_via_stage_rename(path, &target_bytes).await?;
84        return Ok(CowAction::BrokeSymlink);
85    }
86
87    // Regular file. Hardlink defense is Unix-only — see module docs.
88    #[cfg(unix)]
89    {
90        use std::os::unix::fs::MetadataExt;
91        if lstat.nlink() > 1 {
92            // Atomic-rename-over-self pattern: copy our content into
93            // a fresh inode, then rename over the original. The other
94            // links keep pointing at the original inode (which now
95            // has one fewer link but otherwise unchanged content).
96            let content = tokio::fs::read(path).await?;
97            write_via_stage_rename(path, &content).await?;
98            return Ok(CowAction::BrokeHardlink);
99        }
100    }
101
102    Ok(CowAction::AlreadyPrivate)
103}
104
105/// Write `bytes` to a temp file in `path.parent()` then rename over
106/// `path`. Cross-FS-safe because the stage lives in the same
107/// directory as the target, so `rename(2)` is intra-filesystem.
108async fn write_via_stage_rename(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
109    // Preconditions: cow callers always pass a real file path
110    // inside a package directory, so `path.parent()` and
111    // `path.file_name()` are guaranteed `Some`. The previous
112    // `unwrap_or_else` defaults only fired on `path == "/"`,
113    // which cow can never reach (lstat on "/" returns a directory,
114    // and the hardlink branch's `read("/")` errors out long
115    // before we get here). Using `.expect()` documents the
116    // invariant and eliminates the dead defensive default.
117    let parent = path
118        .parent()
119        .expect("cow stage path always has a parent — callers pass package-internal files");
120    // Stage filename: leading dot so editors / globs don't pick it
121    // up as a real file; uuid suffix so concurrent calls don't
122    // collide. (The apply lock makes that practically impossible,
123    // but defense in depth.)
124    let stem = path
125        .file_name()
126        .map(|n| n.to_string_lossy().into_owned())
127        .expect("cow stage path always has a file_name — callers pass package-internal files");
128    let stage: PathBuf = parent.join(format!(".socket-cow-{}-{}", stem, uuid::Uuid::new_v4()));
129    // Stage write. If this fails *after* creating the file (e.g. a
130    // mid-write ENOSPC), the partial stage would otherwise leak as a
131    // `.socket-cow-*` turd, so clean it up before propagating — same
132    // discipline as `apply::write_atomic`'s write arm.
133    if let Err(e) = tokio::fs::write(&stage, bytes).await {
134        let _ = tokio::fs::remove_file(&stage).await;
135        return Err(e);
136    }
137    // `rename` over the target is atomic on POSIX and best-effort on
138    // Windows (`MoveFileExW` with REPLACE_EXISTING via std).
139    match tokio::fs::rename(&stage, path).await {
140        Ok(()) => Ok(()),
141        Err(e) => {
142            // Clean up the stage on rename failure so we don't leave
143            // litter in the package directory.
144            let _ = tokio::fs::remove_file(&stage).await;
145            Err(e)
146        }
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[tokio::test]
155    async fn missing_file_is_noop() {
156        let dir = tempfile::tempdir().unwrap();
157        let action = break_hardlink_if_needed(&dir.path().join("nope.txt"))
158            .await
159            .unwrap();
160        assert_eq!(action, CowAction::NoFile);
161    }
162
163    #[tokio::test]
164    async fn regular_file_with_one_link_is_already_private() {
165        let dir = tempfile::tempdir().unwrap();
166        let p = dir.path().join("a.txt");
167        tokio::fs::write(&p, b"hello").await.unwrap();
168        let action = break_hardlink_if_needed(&p).await.unwrap();
169        assert_eq!(action, CowAction::AlreadyPrivate);
170        // Content untouched.
171        assert_eq!(tokio::fs::read(&p).await.unwrap(), b"hello");
172    }
173
174    /// Hardlink case (Unix only — see module docs).
175    ///
176    /// Create file A, hardlink B → A. Run CoW on B. After:
177    /// - A's content is unchanged (the canonical store entry).
178    /// - B has the same bytes but lives in a new inode.
179    /// - Mutating B does NOT change A (the core invariant pnpm
180    ///   safety depends on).
181    #[cfg(unix)]
182    #[tokio::test]
183    async fn hardlink_is_broken_and_sibling_survives_mutation() {
184        use std::os::unix::fs::MetadataExt;
185
186        let dir = tempfile::tempdir().unwrap();
187        let a = dir.path().join("store-a.txt");
188        let b = dir.path().join("project-b.txt");
189        tokio::fs::write(&a, b"original").await.unwrap();
190        tokio::fs::hard_link(&a, &b).await.unwrap();
191
192        // Sanity: both report nlink == 2.
193        let a_meta_before = tokio::fs::metadata(&a).await.unwrap();
194        assert_eq!(a_meta_before.nlink(), 2);
195
196        let action = break_hardlink_if_needed(&b).await.unwrap();
197        assert_eq!(action, CowAction::BrokeHardlink);
198
199        // A is now a single-link inode.
200        let a_meta_after = tokio::fs::metadata(&a).await.unwrap();
201        assert_eq!(a_meta_after.nlink(), 1);
202        // B has the same content but a different inode.
203        assert_eq!(tokio::fs::read(&b).await.unwrap(), b"original");
204        assert_ne!(
205            a_meta_after.ino(),
206            tokio::fs::metadata(&b).await.unwrap().ino()
207        );
208
209        // Mutate B — A must NOT change.
210        tokio::fs::write(&b, b"patched").await.unwrap();
211        assert_eq!(tokio::fs::read(&a).await.unwrap(), b"original");
212        assert_eq!(tokio::fs::read(&b).await.unwrap(), b"patched");
213    }
214
215    /// Symlink case (cross-platform). The symlink → target relation
216    /// is what pnpm's `node_modules/<pkg>` typically looks like. We
217    /// must replace the link with a private regular file and leave
218    /// the target alone.
219    #[cfg(unix)]
220    #[tokio::test]
221    async fn symlink_is_replaced_with_private_file() {
222        let dir = tempfile::tempdir().unwrap();
223        let target = dir.path().join("store-entry.txt");
224        let link = dir.path().join("project-link.txt");
225        tokio::fs::write(&target, b"shared bytes").await.unwrap();
226        tokio::fs::symlink(&target, &link).await.unwrap();
227
228        let action = break_hardlink_if_needed(&link).await.unwrap();
229        assert_eq!(action, CowAction::BrokeSymlink);
230
231        // Link path is now a regular file with the target's content.
232        let link_meta = tokio::fs::symlink_metadata(&link).await.unwrap();
233        assert!(link_meta.file_type().is_file());
234        assert!(!link_meta.file_type().is_symlink());
235        assert_eq!(tokio::fs::read(&link).await.unwrap(), b"shared bytes");
236
237        // Target is untouched.
238        let target_meta = tokio::fs::symlink_metadata(&target).await.unwrap();
239        assert!(target_meta.file_type().is_file());
240        assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
241
242        // Mutate the link path; target stays put.
243        tokio::fs::write(&link, b"patched").await.unwrap();
244        assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
245    }
246
247    /// Helper: count `.socket-cow-*` stage files left in a directory.
248    #[cfg(unix)]
249    fn leftover_stage_count(dir: &Path) -> usize {
250        std::fs::read_dir(dir)
251            .unwrap()
252            .filter_map(|e| e.ok())
253            .filter(|e| e.file_name().to_string_lossy().starts_with(".socket-cow-"))
254            .count()
255    }
256
257    /// Realistic pnpm shape: `node_modules/<pkg>` is a *symlink* into
258    /// the content store, and the store entry is itself *hardlinked*
259    /// across projects. Breaking the symlink must:
260    ///   - leave the project path a private, single-link regular file,
261    ///   - leave the store entry's content AND its sibling hardlink
262    ///     completely untouched (the whole point of CoW),
263    ///   - leave no `.socket-cow-*` stage litter behind.
264    #[cfg(unix)]
265    #[tokio::test]
266    async fn symlink_to_hardlinked_store_entry_is_fully_isolated() {
267        use std::os::unix::fs::MetadataExt;
268
269        let dir = tempfile::tempdir().unwrap();
270        // The content store entry + a sibling project's hardlink to it.
271        let store = dir.path().join("store-entry.txt");
272        let sibling = dir.path().join("other-project-hardlink.txt");
273        tokio::fs::write(&store, b"shared bytes").await.unwrap();
274        tokio::fs::hard_link(&store, &sibling).await.unwrap();
275        // Our project links to the store entry via a symlink.
276        let link = dir.path().join("our-project-link.txt");
277        tokio::fs::symlink(&store, &link).await.unwrap();
278        assert_eq!(tokio::fs::metadata(&store).await.unwrap().nlink(), 2);
279
280        let action = break_hardlink_if_needed(&link).await.unwrap();
281        assert_eq!(action, CowAction::BrokeSymlink);
282
283        // Our path is now a private regular file (not a symlink), and
284        // its inode is distinct from the store entry.
285        let link_meta = tokio::fs::symlink_metadata(&link).await.unwrap();
286        assert!(link_meta.file_type().is_file());
287        assert!(!link_meta.file_type().is_symlink());
288        assert_ne!(
289            link_meta.ino(),
290            tokio::fs::metadata(&store).await.unwrap().ino()
291        );
292
293        // Store entry + its sibling hardlink are byte-for-byte intact,
294        // and still share their inode (nlink unchanged at 2).
295        assert_eq!(tokio::fs::metadata(&store).await.unwrap().nlink(), 2);
296        assert_eq!(tokio::fs::read(&store).await.unwrap(), b"shared bytes");
297        assert_eq!(tokio::fs::read(&sibling).await.unwrap(), b"shared bytes");
298
299        // Mutating our copy must not bleed into the store or its sibling.
300        tokio::fs::write(&link, b"patched").await.unwrap();
301        assert_eq!(tokio::fs::read(&store).await.unwrap(), b"shared bytes");
302        assert_eq!(tokio::fs::read(&sibling).await.unwrap(), b"shared bytes");
303
304        // No stage litter survives the successful break.
305        assert_eq!(leftover_stage_count(dir.path()), 0);
306    }
307
308    /// Success-path litter check: neither the symlink break nor the
309    /// hardlink break may leave a `.socket-cow-*` stage file behind.
310    #[cfg(unix)]
311    #[tokio::test]
312    async fn break_leaves_no_stage_litter() {
313        let dir = tempfile::tempdir().unwrap();
314
315        let target = dir.path().join("t.txt");
316        tokio::fs::write(&target, b"x").await.unwrap();
317        let link = dir.path().join("l.txt");
318        tokio::fs::symlink(&target, &link).await.unwrap();
319        break_hardlink_if_needed(&link).await.unwrap();
320
321        let a = dir.path().join("a.txt");
322        tokio::fs::write(&a, b"y").await.unwrap();
323        let b = dir.path().join("b.txt");
324        tokio::fs::hard_link(&a, &b).await.unwrap();
325        break_hardlink_if_needed(&b).await.unwrap();
326
327        assert_eq!(leftover_stage_count(dir.path()), 0);
328    }
329
330    /// Idempotency: breaking a symlink yields a private regular file,
331    /// and a second call on the now-regular path is a clean
332    /// `AlreadyPrivate` no-op (no re-break, no litter).
333    #[cfg(unix)]
334    #[tokio::test]
335    async fn idempotent_after_breaking_symlink() {
336        let dir = tempfile::tempdir().unwrap();
337        let target = dir.path().join("store.txt");
338        let link = dir.path().join("link.txt");
339        tokio::fs::write(&target, b"bytes").await.unwrap();
340        tokio::fs::symlink(&target, &link).await.unwrap();
341
342        assert_eq!(
343            break_hardlink_if_needed(&link).await.unwrap(),
344            CowAction::BrokeSymlink
345        );
346        assert_eq!(
347            break_hardlink_if_needed(&link).await.unwrap(),
348            CowAction::AlreadyPrivate
349        );
350        assert_eq!(leftover_stage_count(dir.path()), 0);
351    }
352
353    /// Idempotency: calling twice in a row on a regular file is fine
354    /// and reports `AlreadyPrivate` both times.
355    #[tokio::test]
356    async fn idempotent_on_regular_file() {
357        let dir = tempfile::tempdir().unwrap();
358        let p = dir.path().join("x.txt");
359        tokio::fs::write(&p, b"hi").await.unwrap();
360        let a1 = break_hardlink_if_needed(&p).await.unwrap();
361        let a2 = break_hardlink_if_needed(&p).await.unwrap();
362        assert_eq!(a1, CowAction::AlreadyPrivate);
363        assert_eq!(a2, CowAction::AlreadyPrivate);
364    }
365}