Skip to main content

whisker_cli/
linker_shim.rs

1//! `whisker-linker-shim` — `-C linker=<shim>` target.
2//!
3//! rustc, when told `-C linker=whisker-linker-shim`, invokes us as
4//!
5//! ```text
6//! whisker-linker-shim <linker-driver-args...>
7//! ```
8//!
9//! There is **no real-linker-path prefix** in argv (rustc treats the
10//! shim itself as the linker). To forward the call, we read the real
11//! linker path from the `WHISKER_REAL_LINKER` env var. The dev-server
12//! sets this when it spawns the build with the shim active; if it
13//! isn't set the shim aborts (so a stray shim left in the toolchain
14//! configuration after a crashed `whisker run` doesn't silently break
15//! ordinary `cargo build` — it fails fast with a clear message).
16//!
17//! What we capture in JSON:
18//!
19//! ```text
20//! { output, args, timestamp_micros }
21//! ```
22//!
23//! `output` is the value following `-o` in argv (or `None` for an
24//! invocation that doesn't have one — rustc never omits it in
25//! practice, but defensive). The dev-server keys captured invocations
26//! by output filename so the right one can be replayed for the right
27//! crate during thin rebuild.
28
29use anyhow::{Context, Result};
30use std::path::{Path, PathBuf};
31use std::time::{SystemTime, UNIX_EPOCH};
32
33/// One captured linker invocation.
34#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
35pub struct CapturedLinkerInvocation {
36    /// Value passed to `-o`, if any. Used as the index key.
37    pub output: Option<String>,
38    /// Full argv passed to the linker driver — what we re-invoke
39    /// during thin rebuild.
40    pub args: Vec<String>,
41    /// Microseconds since UNIX epoch.
42    pub timestamp_micros: u128,
43}
44
45/// Entry point called from `src/bin/whisker_linker_shim.rs`.
46pub fn run() -> Result<()> {
47    let mut argv: Vec<String> = std::env::args().collect();
48    if argv.is_empty() {
49        anyhow::bail!("whisker-linker-shim: empty argv");
50    }
51    let _shim_path = argv.remove(0);
52    let linker_args = argv;
53
54    if let Some(cache_dir) = std::env::var_os("WHISKER_LINKER_CACHE_DIR") {
55        let cache_dir = PathBuf::from(cache_dir);
56        let invocation = capture(&linker_args)?;
57        save_invocation(&cache_dir, &invocation)
58            .with_context(|| format!("save to {}", cache_dir.display()))?;
59    }
60
61    let real_linker = std::env::var("WHISKER_REAL_LINKER").context(
62        "WHISKER_REAL_LINKER not set; whisker-linker-shim has nothing to forward to. \
63         Did you mean to install the shim in your toolchain config?",
64    )?;
65    let status = std::process::Command::new(&real_linker)
66        .args(&linker_args)
67        .status()
68        .with_context(|| format!("spawn {real_linker}"))?;
69    std::process::exit(status.code().unwrap_or(1));
70}
71
72// ----- Pure helpers ---------------------------------------------------------
73
74pub fn capture(linker_args: &[String]) -> Result<CapturedLinkerInvocation> {
75    Ok(CapturedLinkerInvocation {
76        output: extract_output(linker_args),
77        args: linker_args.to_vec(),
78        timestamp_micros: SystemTime::now()
79            .duration_since(UNIX_EPOCH)
80            .map(|d| d.as_micros())
81            .unwrap_or(0),
82    })
83}
84
85/// Find the value passed to `-o`. Linker drivers (clang/gcc) always
86/// use the **separated** form (`-o /path/lib.so`); the attached form
87/// (`-o/path/lib.so`) is technically valid for `ld` but isn't what
88/// rustc emits. We deliberately only handle the separated form so we
89/// don't false-positive on lookalike flags such as `-output-format=…`.
90pub fn extract_output(args: &[String]) -> Option<String> {
91    let mut iter = args.iter();
92    while let Some(arg) = iter.next() {
93        if arg == "-o" {
94            return iter.next().cloned();
95        }
96    }
97    None
98}
99
100/// Best-effort filename slug for a captured invocation: the basename
101/// of the `-o` argument minus extension, with non-ascii-alphanumeric
102/// characters replaced with `_`. Falls back to `_unknown`.
103pub fn invocation_filename(invocation: &CapturedLinkerInvocation) -> String {
104    let stem_for_path = invocation
105        .output
106        .as_deref()
107        .and_then(|s| Path::new(s).file_name())
108        .and_then(|n| n.to_str())
109        .unwrap_or("_unknown");
110    let safe: String = stem_for_path
111        .chars()
112        .map(|c| {
113            if c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '-' {
114                c
115            } else {
116                '_'
117            }
118        })
119        .collect();
120    format!("{}-{}.json", safe, invocation.timestamp_micros)
121}
122
123pub fn save_invocation(cache_dir: &Path, invocation: &CapturedLinkerInvocation) -> Result<()> {
124    std::fs::create_dir_all(cache_dir)
125        .with_context(|| format!("create {}", cache_dir.display()))?;
126    let path = cache_dir.join(invocation_filename(invocation));
127    let json = serde_json::to_string_pretty(invocation).context("serialize")?;
128    std::fs::write(&path, json).with_context(|| format!("write {}", path.display()))?;
129    Ok(())
130}
131
132// ============================================================================
133// Tests
134// ============================================================================
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use std::sync::atomic::{AtomicU64, Ordering};
140
141    fn s(v: &[&str]) -> Vec<String> {
142        v.iter().map(|s| s.to_string()).collect()
143    }
144
145    fn unique_tempdir() -> PathBuf {
146        static SEQ: AtomicU64 = AtomicU64::new(0);
147        let n = SEQ.fetch_add(1, Ordering::Relaxed);
148        let pid = std::process::id();
149        let p = std::env::temp_dir().join(format!("whisker-linker-shim-test-{pid}-{n}"));
150        let _ = std::fs::remove_dir_all(&p);
151        std::fs::create_dir_all(&p).unwrap();
152        p
153    }
154
155    // ----- extract_output ---------------------------------------------
156
157    #[test]
158    fn extract_output_from_separated_form() {
159        let args = s(&["-O3", "-o", "/tmp/libfoo.dylib", "obj.o"]);
160        assert_eq!(extract_output(&args).as_deref(), Some("/tmp/libfoo.dylib"));
161    }
162
163    #[test]
164    fn extract_output_ignores_attached_form() {
165        // Deliberately not handled — see extract_output's rationale.
166        // If a real driver ever uses attached form we'll hear about it
167        // when -o ends up None and the cache file is named "_unknown".
168        let args = s(&["-o/tmp/libfoo.dylib", "obj.o"]);
169        assert_eq!(extract_output(&args), None);
170    }
171
172    #[test]
173    fn extract_output_returns_none_when_absent() {
174        let args = s(&["obj.o", "-shared"]);
175        assert_eq!(extract_output(&args), None);
176    }
177
178    #[test]
179    fn extract_output_does_not_grab_lookalike_long_flags() {
180        // `-output-format=...` shouldn't be treated as `-o`.
181        let args = s(&["-output-format=binary", "-o", "/tmp/real.so"]);
182        assert_eq!(extract_output(&args).as_deref(), Some("/tmp/real.so"));
183    }
184
185    // ----- capture ----------------------------------------------------
186
187    #[test]
188    fn capture_preserves_full_argv() {
189        let args = s(&[
190            "-O3",
191            "-shared",
192            "-o",
193            "/tmp/libfoo.dylib",
194            "-Wl,-undefined,dynamic_lookup",
195            "/tmp/foo.o",
196        ]);
197        let inv = capture(&args).unwrap();
198        assert_eq!(inv.args, args);
199        assert_eq!(inv.output.as_deref(), Some("/tmp/libfoo.dylib"));
200        assert!(inv.timestamp_micros > 0);
201    }
202
203    #[test]
204    fn capture_with_no_output_leaves_field_none() {
205        let inv = capture(&s(&["-shared", "obj.o"])).unwrap();
206        assert_eq!(inv.output, None);
207    }
208
209    // ----- invocation_filename ---------------------------------------
210
211    #[test]
212    fn invocation_filename_uses_output_basename_and_timestamp() {
213        let inv = CapturedLinkerInvocation {
214            output: Some("/tmp/build/libfoo.dylib".into()),
215            args: vec![],
216            timestamp_micros: 42,
217        };
218        assert_eq!(invocation_filename(&inv), "libfoo.dylib-42.json");
219    }
220
221    #[test]
222    fn invocation_filename_handles_anonymous_invocation() {
223        let inv = CapturedLinkerInvocation {
224            output: None,
225            args: vec![],
226            timestamp_micros: 7,
227        };
228        assert_eq!(invocation_filename(&inv), "_unknown-7.json");
229    }
230
231    #[test]
232    fn invocation_filename_sanitises_weird_characters() {
233        // Defensive — actual rustc-produced output paths don't have
234        // these, but it's cheap to be tolerant.
235        let inv = CapturedLinkerInvocation {
236            output: Some("/tmp/foo bar/lib weird?name.so".into()),
237            args: vec![],
238            timestamp_micros: 1,
239        };
240        assert_eq!(invocation_filename(&inv), "lib_weird_name.so-1.json");
241    }
242
243    // ----- save_invocation --------------------------------------------
244
245    #[test]
246    fn save_invocation_writes_and_round_trips() {
247        let dir = unique_tempdir();
248        let inv = CapturedLinkerInvocation {
249            output: Some("/tmp/libfoo.dylib".into()),
250            args: s(&["-shared", "-o", "/tmp/libfoo.dylib", "foo.o"]),
251            timestamp_micros: 12345,
252        };
253        save_invocation(&dir, &inv).expect("save");
254
255        let path = dir.join(invocation_filename(&inv));
256        assert!(path.is_file());
257        let body = std::fs::read_to_string(&path).unwrap();
258        let parsed: CapturedLinkerInvocation = serde_json::from_str(&body).unwrap();
259        assert_eq!(parsed, inv);
260
261        let _ = std::fs::remove_dir_all(&dir);
262    }
263
264    #[test]
265    fn save_invocation_creates_the_cache_dir_if_missing() {
266        let dir = unique_tempdir().join("nested/path");
267        assert!(!dir.exists());
268        let inv = CapturedLinkerInvocation {
269            output: Some("/tmp/lib.dylib".into()),
270            args: vec![],
271            timestamp_micros: 1,
272        };
273        save_invocation(&dir, &inv).expect("save");
274        assert!(dir.is_dir());
275
276        // best-effort cleanup
277        let mut to_remove = dir;
278        for _ in 0..3 {
279            to_remove.pop();
280        }
281        let _ = std::fs::remove_dir_all(&to_remove);
282    }
283}