Skip to main content

whisker_dev_server/hotpatch/
cache.rs

1//! Cached snapshot of the *original* binary's symbol table.
2//!
3//! Why a cache: the binary an app boots with is "hundreds of MB" on
4//! a real-world build (subsecond's own comment). We pay that I/O +
5//! parse cost once at the start of a `whisker run` session and re-use
6//! it for every subsequent hot patch. Each save then only has to
7//! parse the small patch dylib and diff it against the cached
8//! original — closer to the sub-second budget.
9//!
10//! Apart from the symbols, we also capture the static virtual
11//! address of `whisker_aslr_anchor` (emitted by `#[whisker::main]`)
12//! in the host binary. That becomes `JumpTable::aslr_reference`,
13//! and our vendored subsecond's `apply_patch` uses it as
14//!
15//! ```ignore
16//! old_offset = aslr_reference()       // dlsym(RTLD_DEFAULT,
17//!                                     //   "whisker_aslr_anchor")
18//!            - table.aslr_reference    // static anchor addr (us)
19//!            = runtime image base.
20//! ```
21//!
22//! so the JumpTable's static keys can be adjusted to live runtime
23//! addresses. Two earlier bugs led us here:
24//!
25//! 1. Setting this to `file.relative_address_base()` (always 0 for
26//!    ELF PIE) shifted the keys by `runtime_main_addr` rather than
27//!    the image base; `call_as_ptr`'s map lookup always missed.
28//! 2. Anchoring on `main` instead of `whisker_aslr_anchor` — on
29//!    Android, `dlsym(RTLD_DEFAULT, "main")` resolves to
30//!    `app_process64`'s `main`, not the user's `.so`, so the slide
31//!    math computed garbage. The unique anchor name fixes that.
32
33use anyhow::{Context, Result};
34use std::path::{Path, PathBuf};
35
36use super::symbol_table::{parse_symbol_table_from_bytes, SymbolTable};
37
38/// Pre-parsed snapshot of the original (== "fat") binary. Built once
39/// per dev-server run; subsequent JumpTable construction reads from
40/// here without touching the disk again.
41#[derive(Debug, Clone)]
42pub struct HotpatchModuleCache {
43    /// Original binary path on the host. Useful in error messages.
44    pub lib: PathBuf,
45    /// All symbols projected through `parse_symbol_table_from_bytes`.
46    pub symbols: SymbolTable,
47    /// Static virtual address of `whisker_aslr_anchor` in the host
48    /// binary. Goes straight into
49    /// [`subsecond_types::JumpTable::aslr_reference`]. See module
50    /// docs for why this is the anchor symbol's address rather than
51    /// the file's image base, and why we use a dedicated anchor
52    /// rather than upstream subsecond's `main`.
53    pub aslr_reference: u64,
54}
55
56impl HotpatchModuleCache {
57    /// Read the binary at `path` and capture everything we'll need
58    /// for hot-patching. Errors out (rather than partially populating
59    /// the struct) on read / parse failure — there's no useful cache
60    /// to keep around if the original binary is malformed.
61    pub fn from_path(path: impl Into<PathBuf>) -> Result<Self> {
62        let path = path.into();
63        let bytes = std::fs::read(&path).with_context(|| format!("read {}", path.display()))?;
64        let symbols = parse_symbol_table_from_bytes(&bytes)
65            .with_context(|| format!("parse {} symbols", path.display()))?;
66        // Mach-O symbol tables keep the legacy underscore prefix
67        // (`_whisker_aslr_anchor`); ELF strips it. Try both so the
68        // cache works uniformly across host and Android. When
69        // neither is present (test fixtures with no
70        // `#[whisker::main]`) fall back to 0 — subsecond's
71        // `aslr_reference()` math will be off-by-`runtime_anchor`
72        // in that case, so device hot patches won't dispatch, but
73        // the cache still parses and unit tests that just want
74        // symbol-table access still work.
75        let aslr_reference = symbols
76            .by_name
77            .get("whisker_aslr_anchor")
78            .or_else(|| symbols.by_name.get("_whisker_aslr_anchor"))
79            .map(|s| s.address)
80            .unwrap_or(0);
81        Ok(Self {
82            lib: path,
83            symbols,
84            aslr_reference,
85        })
86    }
87
88    /// Convenience accessor (saves the caller a `cache.symbols.by_name`
89    /// where they really just want a lookup).
90    pub fn symbol_count(&self) -> usize {
91        self.symbols.by_name.len()
92    }
93
94    /// Convenience: borrow path back as a `&Path`.
95    pub fn lib_path(&self) -> &Path {
96        &self.lib
97    }
98}
99
100// ============================================================================
101// Tests
102// ============================================================================
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107    use std::process::Command;
108    use std::time::Instant;
109
110    /// Same workspace-aware bin discovery the symbol-table tests use.
111    fn ensure_whisker_binary() -> PathBuf {
112        let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
113            .parent()
114            .unwrap()
115            .parent()
116            .unwrap()
117            .to_path_buf();
118        let bin = workspace_root.join("target/debug/whisker");
119        if !bin.is_file() {
120            let status = Command::new("cargo")
121                .args(["build", "-p", "whisker-cli", "--bin", "whisker"])
122                .current_dir(&workspace_root)
123                .status()
124                .expect("spawn cargo");
125            assert!(status.success());
126        }
127        bin
128    }
129
130    #[test]
131    fn from_path_loads_symbols_and_aslr_reference() {
132        let bin = ensure_whisker_binary();
133        let cache = HotpatchModuleCache::from_path(&bin).expect("cache");
134        assert_eq!(cache.lib, bin);
135        assert!(
136            cache.symbol_count() > 100,
137            "expected hundreds of symbols in a debug build, got {}",
138            cache.symbol_count(),
139        );
140        // macOS Mach-O default base is 0x1_0000_0000; Linux ELF is
141        // 0; the actual host doesn't matter, we only assert the field
142        // round-tripped (non-panic).
143        let _ = cache.aslr_reference;
144    }
145
146    #[test]
147    fn cached_symbol_lookup_is_cheap_after_construction() {
148        // The point of the cache: do the heavy parse once. We assert
149        // the *second* lookup is much faster than the construction.
150        // Threshold is conservative — even a debug-mode HashMap get
151        // is microseconds, while parsing the binary is dozens of ms.
152        let bin = ensure_whisker_binary();
153        let t0 = Instant::now();
154        let cache = HotpatchModuleCache::from_path(&bin).expect("cache");
155        let parse_time = t0.elapsed();
156
157        let t1 = Instant::now();
158        // Touch the table N times to measure something non-trivial.
159        let mut found = 0_usize;
160        for _ in 0..1_000 {
161            if cache.symbols.by_name.contains_key("nonexistent_xyz") {
162                found += 1;
163            }
164        }
165        let lookup_time = t1.elapsed();
166        assert_eq!(found, 0);
167
168        assert!(
169            lookup_time * 50 < parse_time,
170            "1000 lookups ({lookup_time:?}) should be much faster than \
171             one parse ({parse_time:?}); cache benefit unclear",
172        );
173    }
174
175    #[test]
176    fn missing_path_errors_out() {
177        let err = HotpatchModuleCache::from_path("/no/such/binary/exists").unwrap_err();
178        assert!(
179            err.to_string().contains("read") || err.to_string().contains("/no/such"),
180            "{err}",
181        );
182    }
183
184    #[test]
185    fn lib_path_round_trips_the_original_argument() {
186        let bin = ensure_whisker_binary();
187        let cache = HotpatchModuleCache::from_path(&bin).expect("cache");
188        assert_eq!(cache.lib_path(), bin.as_path());
189    }
190}