whisker_dev_server/hotpatch/thin_build.rs
1//! Thin-rebuild driver — produce a patch dylib from a single
2//! captured rustc invocation by editing as few of its args as
3//! possible.
4//!
5//! ## Design principle: "minimal edit, verbatim everything else"
6//!
7//! Whisker does **not** want to re-derive linker / sysroot / SDK args
8//! itself — those are the parts most likely to break across OS
9//! versions, NDK upgrades, Xcode releases, glibc CSU layout
10//! changes, and so on. Instead we capture cargo+rustc's full
11//! invocation in I4g-4 and replay it here, **changing only the
12//! handful of args that have to differ for a hot-patch dylib**:
13//!
14//! - `--crate-type` is forced to `rlib` so rustc emits an object
15//! file containing every `pub fn`'s mangled symbol (cdylib
16//! would strip them — see I4g-6 pivot).
17//! - `--emit` is forced to `obj` so we get a single `.o` we can
18//! hand to the linker ourselves.
19//! - `--out-dir` is redirected to a session-local cache so the
20//! patch artifact doesn't clobber the original `target/`
21//! output.
22//!
23//! Everything else — target triple, sysroot, link-args, optimisation
24//! level, `cfg` flags, `-L` search paths, `-l` link directives — is
25//! preserved verbatim. That is the whole point: rustc + cargo
26//! already know how to make the linker happy on this OS / SDK
27//! combo, and we lean on that.
28//!
29//! After rustc emits the `.o`, [`build_link_plan`] (X2b) takes the
30//! captured **linker** invocation, drops its object inputs (we have
31//! a fresh one), substitutes our `.o` and `-o`, and adds
32//! `-undefined dynamic_lookup` (macOS) /
33//! `--unresolved-symbols=ignore-all` (Linux) so unresolved symbols
34//! are deferred to the host process at `dlopen` time. The result is
35//! a `.so` / `.dylib` that re-exports back into the original binary
36//! for everything except the patched function bodies — exactly what
37//! `subsecond::apply_patch` expects.
38
39use std::path::{Path, PathBuf};
40
41use super::wrapper::CapturedRustcInvocation;
42
43/// What [`build_obj_plan`] returns — captured rustc args, edited so
44/// that running `rustc` with them produces a single `.o` containing
45/// every `pub fn`'s mangled symbol.
46///
47/// `output_dir` is the directory rustc will write the object into;
48/// the actual filename rustc emits is `<crate_name>.o` (with the
49/// usual hyphen → underscore translation). `expected_object` is the
50/// absolute path the runner should expect to see after the call.
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub struct ObjBuildPlan {
53 pub args: Vec<String>,
54 pub output_dir: PathBuf,
55 pub expected_object: PathBuf,
56}
57
58/// Object filename rustc emits for `--emit=obj --crate-type=rlib`:
59/// `<crate>.o` with hyphens converted to underscores. (Notably no
60/// `lib` prefix and no extension other than `.o` — cdylib's
61/// `lib<crate>.dylib` rules don't apply here.)
62pub fn object_filename(crate_name: &str) -> String {
63 let stem = crate_name.replace('-', "_");
64 format!("{stem}.o")
65}
66
67/// Edit a captured rustc invocation so that running it produces an
68/// object file containing every `pub fn`'s mangled symbol — the
69/// input to the linker step in [`build_link_plan`].
70///
71/// Three changes only:
72///
73/// - **`--crate-type`** is forced to `rlib`. Object files emitted
74/// for an `rlib` crate-type retain mangled `pub fn` symbols
75/// (cdylib's symbol-visibility filter wouldn't have run yet,
76/// because we stop before linking). `lib` would also work but
77/// `rlib` is what cargo itself uses for normal dependency
78/// compilation, so we stay closer to the rustc call shape that
79/// gets the most testing.
80/// - **`--emit`** is forced to a single `obj=<output_dir>/<crate>.o`
81/// directive. This skips the link step (no `lib<crate>.rlib`
82/// metadata bundle, no `.rmeta`, no codegen-units fan-out into
83/// deps) and writes one consolidated object file we can hand
84/// directly to the linker.
85/// - **`--out-dir`** is redirected so the host's `target/` isn't
86/// touched (it's still the rustc-default location for any
87/// auxiliary file rustc decides to emit).
88///
89/// Everything else is preserved verbatim — same target triple,
90/// sysroot, sysroot suffix, `-C` flags, `-L`/`-l` directives, cfg
91/// gates. This is the same "minimal edit, verbatim everything else"
92/// principle as the captured-args replay does for the linker side
93/// in [`super::link_plan::build_link_plan`]; the only difference is
94/// where we stop in rustc's pipeline (`obj` vs `link`).
95pub fn build_obj_plan(captured: &CapturedRustcInvocation, output_dir: &Path) -> ObjBuildPlan {
96 let mut args = captured.args.clone();
97 set_crate_type(&mut args, "rlib");
98 set_out_dir(&mut args, output_dir);
99 let object_path = output_dir.join(object_filename(&captured.crate_name));
100 set_emit_obj(&mut args, &object_path);
101 // cargo's captured args include `--json=artifacts,…` +
102 // `--error-format=json` so its build pipeline can parse rustc's
103 // structured output. We just spawn rustc and inspect exit status;
104 // leaving the JSON flags in produces a noisy
105 // `{"$message_type":"artifact",…}` line on stdout for every
106 // patch, which clutters the dev terminal. Strip them so rustc
107 // falls back to human-readable output (errors still surface on
108 // stderr in plain text).
109 strip_json_flags(&mut args);
110 // Reuse rustc's incremental cache across thin rebuilds. rustc
111 // fingerprints source files + query results into this dir; the
112 // next invocation skips re-typechecking + re-codegenning anything
113 // the fingerprints prove unchanged. For a single-function edit
114 // in a small user crate this can cut Stage 1 (~200 ms cold) to
115 // 50–100 ms warm. The cache lives under our patch dir so it's
116 // wiped together with the thin object on a fresh `whisker run`.
117 set_incremental(&mut args, &output_dir.join("incremental"));
118 // The fat (release) build captured `-Copt-level=3`. For Tier 1
119 // hot patches we strip the LLVM optimization pipeline entirely —
120 // `time-passes` confirms LLVM passes account for ~88% of rustc's
121 // wall time on opt-level=3 user crates. The patched code only
122 // has to *run*, not run fast: hot reload is a dev affordance, so
123 // a 3-4x runtime slowdown of just the patched function bodies is
124 // an excellent trade for a 3× compile-time win. opt-level=0 also
125 // disables intra-crate inlining, which keeps the patch dylib's
126 // call graph well-aligned with the host's exported symbols (no
127 // mystery UND references from functions the host inlined away).
128 override_opt_level(&mut args, "0");
129 ObjBuildPlan {
130 args,
131 output_dir: output_dir.to_path_buf(),
132 expected_object: object_path,
133 }
134}
135
136/// Force `-Copt-level=<level>`. Strips any existing opt-level
137/// directive (single-arg `-Copt-level=N`, split form `-C opt-level=N`,
138/// and the shorthand `-O` = opt-level=2).
139fn override_opt_level(args: &mut Vec<String>, level: &str) {
140 let mut i = 0;
141 while i < args.len() {
142 if (args[i] == "-C" || args[i] == "--codegen")
143 && i + 1 < args.len()
144 && args[i + 1].starts_with("opt-level=")
145 {
146 args.drain(i..i + 2);
147 continue;
148 }
149 if args[i].starts_with("-Copt-level=") || args[i].starts_with("--codegen=opt-level=") {
150 args.remove(i);
151 continue;
152 }
153 if args[i] == "-O" {
154 args.remove(i);
155 continue;
156 }
157 i += 1;
158 }
159 args.push("-C".into());
160 args.push(format!("opt-level={level}"));
161}
162
163/// Force `-C incremental=<dir>` to point at our patch cache dir.
164/// Strips any existing `-C incremental=...` rustc dropped in (cargo
165/// usually doesn't pass one when building a `dylib`/`cdylib`, but
166/// we strip defensively in case the captured args ever carry one).
167fn set_incremental(args: &mut Vec<String>, incremental_dir: &Path) {
168 let mut i = 0;
169 while i < args.len() {
170 if (args[i] == "-C" || args[i] == "--codegen")
171 && i + 1 < args.len()
172 && args[i + 1].starts_with("incremental=")
173 {
174 args.drain(i..=i + 1);
175 continue;
176 }
177 // Combined form: `-Cincremental=...`
178 if args[i].starts_with("-Cincremental=") || args[i].starts_with("--codegen=incremental=") {
179 args.remove(i);
180 continue;
181 }
182 i += 1;
183 }
184 args.push("-C".into());
185 args.push(format!("incremental={}", incremental_dir.display()));
186}
187
188/// Remove `--json=…` and `--error-format=json` from the captured
189/// rustc args so the thin rebuild emits plain human text instead of
190/// the structured JSON channel cargo would have parsed. Both forms
191/// (separated and `=`) are stripped; we don't restore a default
192/// because rustc's default is already human output.
193fn strip_json_flags(args: &mut Vec<String>) {
194 let mut i = 0;
195 while i < args.len() {
196 let arg = &args[i];
197 // `--json <list>` or `--error-format <fmt>` (separated forms)
198 if (arg == "--json" || arg == "--error-format") && i + 1 < args.len() {
199 args.drain(i..=i + 1);
200 continue;
201 }
202 // `--json=...` or `--error-format=...` (equals forms)
203 if arg.starts_with("--json=") || arg.starts_with("--error-format=") {
204 args.remove(i);
205 continue;
206 }
207 i += 1;
208 }
209}
210
211/// Force `--emit` to exactly one directive: `obj=<path>`. Strips
212/// every existing `--emit` (separated, `=`, comma-separated mix)
213/// and appends one fresh pair. Same fold-and-add semantics as
214/// [`set_crate_type`].
215///
216/// rustc accepts `--emit obj=<path>` as a single output kind with
217/// an explicit destination, which avoids ambiguity when other
218/// `--emit` directives would otherwise have asked for `link` or
219/// `dep-info` etc. (cargo always passes a comma-separated set:
220/// `dep-info,metadata,link`. We collapse the lot to just `obj`.)
221pub fn set_emit_obj(args: &mut Vec<String>, object_path: &Path) {
222 let mut i = 0;
223 while i < args.len() {
224 let arg = &args[i];
225 if arg == "--emit" && i + 1 < args.len() {
226 args.drain(i..=i + 1);
227 continue;
228 }
229 if arg.starts_with("--emit=") {
230 args.remove(i);
231 continue;
232 }
233 i += 1;
234 }
235 args.push("--emit".into());
236 args.push(format!("obj={}", object_path.to_string_lossy()));
237}
238
239/// Force every `--crate-type` arg to a single value (`new_kind`).
240/// rustc allows the flag to repeat (one binary can be multiple
241/// crate-types in one invocation); for a hot-patch we always want
242/// exactly one — `cdylib`. The fold-and-add behaviour is:
243///
244/// - every existing `--crate-type X` (separate or `=` form) is
245/// stripped;
246/// - one fresh `--crate-type <new_kind>` pair is appended at the
247/// end.
248///
249/// This is more idempotent than "rewrite in place" — the result
250/// is always a single contiguous pair regardless of how many
251/// the input had.
252pub fn set_crate_type(args: &mut Vec<String>, new_kind: &str) {
253 let mut i = 0;
254 while i < args.len() {
255 let arg = &args[i];
256 if arg == "--crate-type" && i + 1 < args.len() {
257 args.drain(i..=i + 1);
258 continue;
259 }
260 if arg.starts_with("--crate-type=") {
261 args.remove(i);
262 continue;
263 }
264 i += 1;
265 }
266 args.push("--crate-type".into());
267 args.push(new_kind.into());
268}
269
270/// Platform-specific cdylib filename for the **host** OS. Matches
271/// what rustc itself emits for `--crate-type cdylib`:
272/// macOS → `lib<crate>.dylib`
273/// Linux → `lib<crate>.so` (Android uses the same convention)
274/// Windows → `<crate>.dll`
275///
276/// Hyphens in the crate name become underscores (rustc convention).
277/// Use [`library_filename_for_os`] when the patch target's OS differs
278/// from the host (e.g. cross-compiling for Android from macOS).
279pub fn library_filename(crate_name: &str) -> String {
280 let stem = crate_name.replace('-', "_");
281 if cfg!(target_os = "macos") || cfg!(target_os = "ios") {
282 format!("lib{stem}.dylib")
283 } else if cfg!(target_os = "windows") {
284 format!("{stem}.dll")
285 } else {
286 format!("lib{stem}.so")
287 }
288}
289
290/// Cross-platform variant: produce the cdylib filename for the
291/// **patch target** OS (which may differ from the host). The hot-
292/// patch dylib has to match the on-device shared-library naming
293/// convention, not the host's — Android wants `lib<crate>.so` even
294/// when the dev session is on macOS.
295pub fn library_filename_for_os(crate_name: &str, os: super::link_plan::LinkerOs) -> String {
296 use super::link_plan::LinkerOs;
297 let stem = crate_name.replace('-', "_");
298 match os {
299 LinkerOs::Macos => format!("lib{stem}.dylib"),
300 LinkerOs::Linux => format!("lib{stem}.so"),
301 LinkerOs::Other => format!("{stem}.dll"),
302 }
303}
304
305/// Redirect rustc's output directory. Same fold-and-add semantics
306/// as [`set_crate_type`]: strip every existing form, append one
307/// fresh pair. Handles `--out-dir <DIR>`, `--out-dir=<DIR>`, and
308/// the `-o <PATH>` short form (rare in cargo invocations but
309/// possible — we drop it because `--out-dir` wins for `--crate-type
310/// cdylib`).
311pub fn set_out_dir(args: &mut Vec<String>, dir: &Path) {
312 let dir_str = dir.to_string_lossy().to_string();
313 let mut i = 0;
314 while i < args.len() {
315 let arg = &args[i];
316 if (arg == "--out-dir" || arg == "-o") && i + 1 < args.len() {
317 args.drain(i..=i + 1);
318 continue;
319 }
320 if arg.starts_with("--out-dir=") {
321 args.remove(i);
322 continue;
323 }
324 i += 1;
325 }
326 args.push("--out-dir".into());
327 args.push(dir_str);
328}
329
330// ============================================================================
331// Tests
332// ============================================================================
333
334#[cfg(test)]
335mod tests {
336 use super::*;
337
338 fn s(v: &[&str]) -> Vec<String> {
339 v.iter().map(|s| s.to_string()).collect()
340 }
341
342 fn captured_with(args: Vec<String>) -> CapturedRustcInvocation {
343 CapturedRustcInvocation {
344 crate_name: "demo".into(),
345 args,
346 timestamp_micros: 0,
347 }
348 }
349
350 // ----- set_crate_type ----------------------------------------------
351
352 #[test]
353 fn set_crate_type_replaces_a_single_existing_separated_pair() {
354 let mut args = s(&["--edition=2021", "--crate-type", "rlib", "src/lib.rs"]);
355 set_crate_type(&mut args, "cdylib");
356 assert_eq!(
357 args,
358 s(&["--edition=2021", "src/lib.rs", "--crate-type", "cdylib"]),
359 );
360 }
361
362 #[test]
363 fn set_crate_type_replaces_the_equals_form() {
364 let mut args = s(&["--crate-type=rlib", "--edition=2021"]);
365 set_crate_type(&mut args, "cdylib");
366 assert_eq!(args, s(&["--edition=2021", "--crate-type", "cdylib"]));
367 }
368
369 #[test]
370 fn set_crate_type_collapses_multiple_existing_into_one() {
371 // rustc allows `--crate-type rlib --crate-type cdylib` to
372 // produce both at once. For a hot-patch we want exactly
373 // one, regardless of how many came in.
374 let mut args = s(&[
375 "--crate-type",
376 "rlib",
377 "--crate-type",
378 "dylib",
379 "--crate-type=staticlib",
380 "src/lib.rs",
381 ]);
382 set_crate_type(&mut args, "cdylib");
383 assert_eq!(args, s(&["src/lib.rs", "--crate-type", "cdylib"]));
384 }
385
386 #[test]
387 fn set_crate_type_appends_when_no_existing() {
388 let mut args = s(&["--edition=2021", "src/lib.rs"]);
389 set_crate_type(&mut args, "cdylib");
390 assert_eq!(
391 args,
392 s(&["--edition=2021", "src/lib.rs", "--crate-type", "cdylib"]),
393 );
394 }
395
396 // ----- set_out_dir -------------------------------------------------
397
398 #[test]
399 fn set_out_dir_replaces_separated_form() {
400 let mut args = s(&["--out-dir", "/old/path", "src/lib.rs"]);
401 set_out_dir(&mut args, Path::new("/new/path"));
402 assert_eq!(args, s(&["src/lib.rs", "--out-dir", "/new/path"]));
403 }
404
405 #[test]
406 fn set_out_dir_replaces_equals_form() {
407 let mut args = s(&["--out-dir=/old/path", "src/lib.rs"]);
408 set_out_dir(&mut args, Path::new("/new/path"));
409 assert_eq!(args, s(&["src/lib.rs", "--out-dir", "/new/path"]));
410 }
411
412 #[test]
413 fn set_out_dir_replaces_the_short_o_form() {
414 let mut args = s(&["-o", "/old/file.rlib", "src/lib.rs"]);
415 set_out_dir(&mut args, Path::new("/new/path"));
416 assert_eq!(args, s(&["src/lib.rs", "--out-dir", "/new/path"]));
417 }
418
419 #[test]
420 fn set_out_dir_appends_when_no_existing() {
421 let mut args = s(&["src/lib.rs"]);
422 set_out_dir(&mut args, Path::new("/new/path"));
423 assert_eq!(args, s(&["src/lib.rs", "--out-dir", "/new/path"]));
424 }
425
426 // ----- set_emit_obj ------------------------------------------------
427
428 #[test]
429 fn set_emit_obj_replaces_separated_form() {
430 let mut args = s(&["--emit", "link", "src/lib.rs"]);
431 set_emit_obj(&mut args, Path::new("/p/demo.o"));
432 assert_eq!(args, s(&["src/lib.rs", "--emit", "obj=/p/demo.o"]));
433 }
434
435 #[test]
436 fn set_emit_obj_replaces_equals_form_including_comma_lists() {
437 // cargo always passes `--emit=dep-info,metadata,link`; the
438 // whole comma-separated lot collapses to a single `obj=…`.
439 let mut args = s(&["--emit=dep-info,metadata,link", "src/lib.rs"]);
440 set_emit_obj(&mut args, Path::new("/p/demo.o"));
441 assert_eq!(args, s(&["src/lib.rs", "--emit", "obj=/p/demo.o"]));
442 }
443
444 #[test]
445 fn set_emit_obj_collapses_multiple_existing_into_one() {
446 let mut args = s(&[
447 "--emit",
448 "link",
449 "--emit=dep-info,metadata",
450 "--emit",
451 "metadata",
452 "src/lib.rs",
453 ]);
454 set_emit_obj(&mut args, Path::new("/p/demo.o"));
455 assert_eq!(args, s(&["src/lib.rs", "--emit", "obj=/p/demo.o"]));
456 }
457
458 #[test]
459 fn set_emit_obj_appends_when_no_existing() {
460 let mut args = s(&["src/lib.rs"]);
461 set_emit_obj(&mut args, Path::new("/p/demo.o"));
462 assert_eq!(args, s(&["src/lib.rs", "--emit", "obj=/p/demo.o"]));
463 }
464
465 // ----- object_filename ---------------------------------------------
466
467 #[test]
468 fn object_filename_is_crate_dot_o_with_underscores() {
469 assert_eq!(object_filename("demo"), "demo.o");
470 assert_eq!(object_filename("hello-world"), "hello_world.o");
471 assert_eq!(object_filename("a-b-c"), "a_b_c.o");
472 }
473
474 // ----- build_obj_plan ----------------------------------------------
475
476 #[test]
477 fn obj_plan_forces_rlib_and_obj_emit_and_redirects_out_dir() {
478 let captured = captured_with(s(&[
479 "--edition=2021",
480 "--crate-name",
481 "demo",
482 "--crate-type",
483 "lib",
484 "--emit=dep-info,metadata,link",
485 "--out-dir",
486 "/cargo/target/debug/deps",
487 "-C",
488 "opt-level=3",
489 "src/lib.rs",
490 ]));
491 let plan = build_obj_plan(&captured, Path::new("/whisker/objs/x"));
492 assert_eq!(
493 plan.args,
494 s(&[
495 "--edition=2021",
496 "--crate-name",
497 "demo",
498 "src/lib.rs",
499 "--crate-type",
500 "rlib",
501 "--out-dir",
502 "/whisker/objs/x",
503 "--emit",
504 "obj=/whisker/objs/x/demo.o",
505 "-C",
506 "incremental=/whisker/objs/x/incremental",
507 "-C",
508 "opt-level=0",
509 ]),
510 );
511 assert_eq!(plan.output_dir, Path::new("/whisker/objs/x"));
512 assert_eq!(plan.expected_object, Path::new("/whisker/objs/x/demo.o"));
513 }
514
515 #[test]
516 fn obj_plan_picks_object_filename_from_captured_crate_name() {
517 // crate_name comes from CapturedRustcInvocation.crate_name,
518 // *not* from the --crate-name arg — they're typically equal,
519 // but the captured field is what we use, so test that.
520 let captured = CapturedRustcInvocation {
521 crate_name: "thin-build-fixture".into(),
522 args: s(&["src/lib.rs"]),
523 timestamp_micros: 0,
524 };
525 let plan = build_obj_plan(&captured, Path::new("/o"));
526 assert_eq!(plan.expected_object, Path::new("/o/thin_build_fixture.o"));
527 assert!(
528 plan.args.contains(&"obj=/o/thin_build_fixture.o".into()),
529 "args: {:?}",
530 plan.args,
531 );
532 }
533
534 #[test]
535 fn obj_plan_is_idempotent_on_re_run() {
536 let captured = captured_with(s(&["src/lib.rs"]));
537 let plan1 = build_obj_plan(&captured, Path::new("/o"));
538 let plan2 = build_obj_plan(
539 &CapturedRustcInvocation {
540 crate_name: captured.crate_name.clone(),
541 args: plan1.args.clone(),
542 timestamp_micros: 0,
543 },
544 Path::new("/o"),
545 );
546 assert_eq!(plan1.args, plan2.args);
547 }
548
549 #[test]
550 fn obj_plan_preserves_target_triple_and_sysroot_args() {
551 // The whole point of "minimal edit" is that target-triple,
552 // sysroot, link-args, etc. survive untouched. Regression
553 // guard: these specific flags must come through verbatim.
554 let captured = captured_with(s(&[
555 "--target",
556 "aarch64-linux-android",
557 "--sysroot",
558 "/some/ndk/sysroot",
559 "-Clinker=lld",
560 "-Clink-arg=-fuse-ld=lld",
561 "-L",
562 "native=/some/lib",
563 "-l",
564 "log",
565 "src/lib.rs",
566 ]));
567 let plan = build_obj_plan(&captured, Path::new("/o"));
568 for needle in [
569 "--target",
570 "aarch64-linux-android",
571 "--sysroot",
572 "/some/ndk/sysroot",
573 "-Clinker=lld",
574 "-Clink-arg=-fuse-ld=lld",
575 "-L",
576 "native=/some/lib",
577 "-l",
578 "log",
579 ] {
580 assert!(
581 plan.args.iter().any(|a| a == needle),
582 "missing {needle:?} from {:?}",
583 plan.args,
584 );
585 }
586 }
587}