Skip to main content

socket_patch_core/patch/
cow.rs

1//! Copy-on-write defense against package-manager hardlink farms.
2//!
3//! Several package managers (pnpm, bazel mirrors, nix store overlays,
4//! npm linked workspaces) point multiple project trees at a single
5//! content-addressed inode via symlinks or hardlinks. A naive patch
6//! that opens the path in a workspace and rewrites it would mutate the
7//! shared inode — corrupting every other project that references the
8//! same package.
9//!
10//! [`break_hardlink_if_needed`] is the pre-write hook that turns these
11//! shared-inode references into private file copies before any patch
12//! bytes touch disk. After the call, mutating the path is safe: only
13//! this project's copy changes; the store entry and every other
14//! project's link survive untouched.
15//!
16//! The function is idempotent and fast on the common case (regular
17//! file with `nlink == 1`): a single `symlink_metadata` syscall, no
18//! I/O beyond that. CoW only runs when there is something to break.
19//!
20//! **Windows note:** we always handle symlinks the same on Windows
21//! (replace with private regular file) but skip the `nlink > 1`
22//! check — `std::fs::Metadata` on Windows does not expose the file
23//! information that carries it, and pnpm-on-Windows typically uses
24//! reflinks/copies rather than hardlinks. A follow-up could call
25//! `GetFileInformationByHandle` via `windows-sys` for full Windows
26//! parity.
27
28use std::path::{Path, PathBuf};
29
30/// Outcome of [`break_hardlink_if_needed`].
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CowAction {
33    /// Path didn't exist — nothing to break, caller will create fresh.
34    NoFile,
35    /// Path was a regular private file (one link, not a symlink).
36    /// Caller can mutate it directly.
37    AlreadyPrivate,
38    /// Path was a symlink. We removed the link and put a fresh
39    /// regular file with the same content in its place. The link
40    /// target is untouched.
41    BrokeSymlink,
42    /// Path was a hardlinked regular file (`nlink > 1`). We copied
43    /// the content into a new inode and atomically renamed it over
44    /// the original. Sibling links are untouched.
45    BrokeHardlink,
46}
47
48/// Ensure `path` (if it exists) points at a private inode this
49/// project alone owns, so a subsequent in-place write only mutates
50/// our copy.
51///
52/// See module docs for the failure mode this protects against.
53pub async fn break_hardlink_if_needed(path: &Path) -> std::io::Result<CowAction> {
54    // `symlink_metadata` does NOT follow symlinks — that's what we
55    // want, since the symlink-vs-regular branch is the whole point.
56    let lstat = match tokio::fs::symlink_metadata(path).await {
57        Ok(m) => m,
58        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(CowAction::NoFile),
59        Err(e) => return Err(e),
60    };
61
62    if lstat.file_type().is_symlink() {
63        // Read through the symlink (this DOES follow it) to grab the
64        // current target content. We need it on disk as a regular
65        // file at `path` so the patch write lands on our copy.
66        let target_bytes = tokio::fs::read(path).await?;
67        // Remove the symlink. This only deletes the link itself; the
68        // target file (in the store, in a sibling project, wherever)
69        // is unaffected.
70        tokio::fs::remove_file(path).await?;
71        write_via_stage_rename(path, &target_bytes).await?;
72        return Ok(CowAction::BrokeSymlink);
73    }
74
75    // Regular file. Hardlink defense is Unix-only — see module docs.
76    #[cfg(unix)]
77    {
78        use std::os::unix::fs::MetadataExt;
79        if lstat.nlink() > 1 {
80            // Atomic-rename-over-self pattern: copy our content into
81            // a fresh inode, then rename over the original. The other
82            // links keep pointing at the original inode (which now
83            // has one fewer link but otherwise unchanged content).
84            let content = tokio::fs::read(path).await?;
85            write_via_stage_rename(path, &content).await?;
86            return Ok(CowAction::BrokeHardlink);
87        }
88    }
89
90    Ok(CowAction::AlreadyPrivate)
91}
92
93/// Write `bytes` to a temp file in `path.parent()` then rename over
94/// `path`. Cross-FS-safe because the stage lives in the same
95/// directory as the target, so `rename(2)` is intra-filesystem.
96async fn write_via_stage_rename(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
97    // Preconditions: cow callers always pass a real file path
98    // inside a package directory, so `path.parent()` and
99    // `path.file_name()` are guaranteed `Some`. The previous
100    // `unwrap_or_else` defaults only fired on `path == "/"`,
101    // which cow can never reach (lstat on "/" returns a directory,
102    // and the hardlink branch's `read("/")` errors out long
103    // before we get here). Using `.expect()` documents the
104    // invariant and eliminates the dead defensive default.
105    let parent = path
106        .parent()
107        .expect("cow stage path always has a parent — callers pass package-internal files");
108    // Stage filename: leading dot so editors / globs don't pick it
109    // up as a real file; uuid suffix so concurrent calls don't
110    // collide. (The apply lock makes that practically impossible,
111    // but defense in depth.)
112    let stem = path
113        .file_name()
114        .map(|n| n.to_string_lossy().into_owned())
115        .expect("cow stage path always has a file_name — callers pass package-internal files");
116    let stage: PathBuf = parent.join(format!(
117        ".socket-cow-{}-{}",
118        stem,
119        uuid::Uuid::new_v4()
120    ));
121    tokio::fs::write(&stage, bytes).await?;
122    // `rename` over the target is atomic on POSIX and best-effort on
123    // Windows (`MoveFileExW` with REPLACE_EXISTING via std).
124    match tokio::fs::rename(&stage, path).await {
125        Ok(()) => Ok(()),
126        Err(e) => {
127            // Clean up the stage on rename failure so we don't leave
128            // litter in the package directory.
129            let _ = tokio::fs::remove_file(&stage).await;
130            Err(e)
131        }
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[tokio::test]
140    async fn missing_file_is_noop() {
141        let dir = tempfile::tempdir().unwrap();
142        let action = break_hardlink_if_needed(&dir.path().join("nope.txt"))
143            .await
144            .unwrap();
145        assert_eq!(action, CowAction::NoFile);
146    }
147
148    #[tokio::test]
149    async fn regular_file_with_one_link_is_already_private() {
150        let dir = tempfile::tempdir().unwrap();
151        let p = dir.path().join("a.txt");
152        tokio::fs::write(&p, b"hello").await.unwrap();
153        let action = break_hardlink_if_needed(&p).await.unwrap();
154        assert_eq!(action, CowAction::AlreadyPrivate);
155        // Content untouched.
156        assert_eq!(tokio::fs::read(&p).await.unwrap(), b"hello");
157    }
158
159    /// Hardlink case (Unix only — see module docs).
160    ///
161    /// Create file A, hardlink B → A. Run CoW on B. After:
162    /// - A's content is unchanged (the canonical store entry).
163    /// - B has the same bytes but lives in a new inode.
164    /// - Mutating B does NOT change A (the core invariant pnpm
165    ///   safety depends on).
166    #[cfg(unix)]
167    #[tokio::test]
168    async fn hardlink_is_broken_and_sibling_survives_mutation() {
169        use std::os::unix::fs::MetadataExt;
170
171        let dir = tempfile::tempdir().unwrap();
172        let a = dir.path().join("store-a.txt");
173        let b = dir.path().join("project-b.txt");
174        tokio::fs::write(&a, b"original").await.unwrap();
175        tokio::fs::hard_link(&a, &b).await.unwrap();
176
177        // Sanity: both report nlink == 2.
178        let a_meta_before = tokio::fs::metadata(&a).await.unwrap();
179        assert_eq!(a_meta_before.nlink(), 2);
180
181        let action = break_hardlink_if_needed(&b).await.unwrap();
182        assert_eq!(action, CowAction::BrokeHardlink);
183
184        // A is now a single-link inode.
185        let a_meta_after = tokio::fs::metadata(&a).await.unwrap();
186        assert_eq!(a_meta_after.nlink(), 1);
187        // B has the same content but a different inode.
188        assert_eq!(tokio::fs::read(&b).await.unwrap(), b"original");
189        assert_ne!(
190            a_meta_after.ino(),
191            tokio::fs::metadata(&b).await.unwrap().ino()
192        );
193
194        // Mutate B — A must NOT change.
195        tokio::fs::write(&b, b"patched").await.unwrap();
196        assert_eq!(tokio::fs::read(&a).await.unwrap(), b"original");
197        assert_eq!(tokio::fs::read(&b).await.unwrap(), b"patched");
198    }
199
200    /// Symlink case (cross-platform). The symlink → target relation
201    /// is what pnpm's `node_modules/<pkg>` typically looks like. We
202    /// must replace the link with a private regular file and leave
203    /// the target alone.
204    #[cfg(unix)]
205    #[tokio::test]
206    async fn symlink_is_replaced_with_private_file() {
207        let dir = tempfile::tempdir().unwrap();
208        let target = dir.path().join("store-entry.txt");
209        let link = dir.path().join("project-link.txt");
210        tokio::fs::write(&target, b"shared bytes").await.unwrap();
211        tokio::fs::symlink(&target, &link).await.unwrap();
212
213        let action = break_hardlink_if_needed(&link).await.unwrap();
214        assert_eq!(action, CowAction::BrokeSymlink);
215
216        // Link path is now a regular file with the target's content.
217        let link_meta = tokio::fs::symlink_metadata(&link).await.unwrap();
218        assert!(link_meta.file_type().is_file());
219        assert!(!link_meta.file_type().is_symlink());
220        assert_eq!(tokio::fs::read(&link).await.unwrap(), b"shared bytes");
221
222        // Target is untouched.
223        let target_meta = tokio::fs::symlink_metadata(&target).await.unwrap();
224        assert!(target_meta.file_type().is_file());
225        assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
226
227        // Mutate the link path; target stays put.
228        tokio::fs::write(&link, b"patched").await.unwrap();
229        assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes");
230    }
231
232    /// Idempotency: calling twice in a row on a regular file is fine
233    /// and reports `AlreadyPrivate` both times.
234    #[tokio::test]
235    async fn idempotent_on_regular_file() {
236        let dir = tempfile::tempdir().unwrap();
237        let p = dir.path().join("x.txt");
238        tokio::fs::write(&p, b"hi").await.unwrap();
239        let a1 = break_hardlink_if_needed(&p).await.unwrap();
240        let a2 = break_hardlink_if_needed(&p).await.unwrap();
241        assert_eq!(a1, CowAction::AlreadyPrivate);
242        assert_eq!(a2, CowAction::AlreadyPrivate);
243    }
244}