Skip to main content

whisker_dev_server/hotpatch/
symbol_table.rs

1//! ELF / Mach-O symbol-table parser.
2//!
3//! The single piece of logic [`parse_symbol_table`] hides: open a
4//! binary file, hand it to the `object` crate, and project the rich
5//! `object::SymbolTable` API down to the small [`SymbolTable`] view
6//! Whisker's hot-reload pipeline actually needs (name → address +
7//! kind/size/visibility flags).
8//!
9//! Why a projection rather than re-using `object::Symbol` directly:
10//! the `object` types are bound by lifetimes to the file they came
11//! out of. Storing them across an async boundary would force the
12//! file bytes to live for the whole dev-loop run. Copying the small
13//! pieces we need (name + 4 numbers per symbol) into owned data is
14//! cheaper than that lifetime gymnastics.
15
16use anyhow::{Context, Result};
17use object::{Object, ObjectSymbol, SymbolKind};
18use std::collections::HashMap;
19use std::path::Path;
20
21/// What we keep about each symbol — enough to drive subsecond's
22/// JumpTable construction (I4g-2) and no more.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct SymbolInfo {
25    /// Address relative to the binary's base (= the value we
26    /// eventually feed into a JumpTable's `map` entry on the host
27    /// side; ASLR is corrected by the receiver).
28    pub address: u64,
29    /// Symbol kind: function vs data vs other. Only `Text` is a
30    /// hot-patch candidate.
31    pub kind: SymbolKind,
32    /// Symbol size in bytes (0 if unknown / undefined).
33    pub size: u64,
34    /// True for `extern "C"` declarations the binary refers to but
35    /// doesn't define. JumpTable diffing must skip these.
36    pub is_undefined: bool,
37    /// Weak linkage. Hot-patching weak symbols is unreliable
38    /// (linker chooses winners non-deterministically); we tag them
39    /// so callers can decide.
40    pub is_weak: bool,
41}
42
43/// Owned name → info map. `BTreeMap` would give deterministic
44/// iteration order but we want O(1) lookup by symbol name in the
45/// JumpTable construction step, so HashMap it is.
46#[derive(Debug, Clone, Default, PartialEq, Eq)]
47pub struct SymbolTable {
48    pub by_name: HashMap<String, SymbolInfo>,
49}
50
51impl SymbolTable {
52    /// Number of symbols of the given kind. Mostly a test convenience.
53    pub fn count_kind(&self, kind: SymbolKind) -> usize {
54        self.by_name.values().filter(|s| s.kind == kind).count()
55    }
56}
57
58/// Open `path` and project its symbol table.
59pub fn parse_symbol_table(path: &Path) -> Result<SymbolTable> {
60    let bytes = std::fs::read(path).with_context(|| format!("read {}", path.display()))?;
61    parse_symbol_table_from_bytes(&bytes).with_context(|| format!("parse {}", path.display()))
62}
63
64/// Same as [`parse_symbol_table`] but takes the bytes directly. Used
65/// by tests so we don't need a fixture file on disk.
66pub fn parse_symbol_table_from_bytes(bytes: &[u8]) -> Result<SymbolTable> {
67    let file = object::File::parse(bytes).context("object::File::parse")?;
68    let mut by_name = HashMap::new();
69    for sym in file.symbols() {
70        let name = match sym.name() {
71            Ok(n) if !n.is_empty() => normalize_symbol_name(n),
72            _ => continue, // unnamed (anonymous local) — useless to us
73        };
74        by_name.insert(
75            name,
76            SymbolInfo {
77                address: sym.address(),
78                kind: sym.kind(),
79                size: sym.size(),
80                is_undefined: sym.is_undefined(),
81                is_weak: sym.is_weak(),
82            },
83        );
84    }
85    Ok(SymbolTable { by_name })
86}
87
88/// Strip LLVM's ThinLTO internalization suffix (`.llvm.<digits>`)
89/// from a symbol name.
90///
91/// LLVM appends this suffix during LTO when it promotes a previously
92/// external symbol to module-local — which is what happens to most
93/// Rust functions in a release dylib build. The host (a full fat
94/// build with LTO) emits `_ZN..hello_world..app..h<rust>E.llvm.<lto>`
95/// while the patch (a single-crate thin rebuild with no LTO) emits
96/// `_ZN..hello_world..app..h<rust>E` with no suffix. Without
97/// normalization, the JumpTable would map nothing — every patched
98/// function would look like an "added" symbol.
99///
100/// We only strip past the first `.llvm.`; the rest is opaque LLVM
101/// data with no semantic value to us.
102fn normalize_symbol_name(name: &str) -> String {
103    match name.split_once(".llvm.") {
104        Some((stem, _suffix)) => stem.to_string(),
105        None => name.to_string(),
106    }
107}
108
109// ============================================================================
110// Tests
111// ============================================================================
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use std::process::Command;
117
118    /// `target/debug/whisker` always exists during a workspace-wide
119    /// `cargo test` run because whisker-cli is a member of the workspace
120    /// and the test harness builds every member's lib by default.
121    /// We force its bin to exist by spawning `cargo build -p whisker-cli
122    /// --bin whisker` once at the top of the test, but only if it isn't
123    /// already there — a no-op on the second run.
124    fn ensure_whisker_binary() -> std::path::PathBuf {
125        let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
126            .parent()
127            .unwrap()
128            .parent()
129            .unwrap()
130            .to_path_buf();
131        let bin = workspace_root.join("target/debug/whisker");
132        if !bin.is_file() {
133            let status = Command::new("cargo")
134                .args(["build", "-p", "whisker-cli", "--bin", "whisker"])
135                .current_dir(&workspace_root)
136                .status()
137                .expect("spawn cargo");
138            assert!(status.success(), "cargo build failed");
139        }
140        bin
141    }
142
143    #[test]
144    fn parses_a_real_host_binary_and_finds_known_function_symbols() {
145        let bin = ensure_whisker_binary();
146        let table = parse_symbol_table(&bin).expect("parse");
147
148        // The symbol table from a debug build of whisker has hundreds
149        // of entries; we just need to confirm we loaded SOMETHING
150        // reasonable.
151        assert!(
152            !table.by_name.is_empty(),
153            "expected symbols in {}",
154            bin.display(),
155        );
156        assert!(
157            table.count_kind(SymbolKind::Text) > 10,
158            "expected dozens of function symbols, got {}",
159            table.count_kind(SymbolKind::Text),
160        );
161    }
162
163    #[test]
164    fn parses_a_real_host_binary_with_at_least_one_named_function() {
165        let bin = ensure_whisker_binary();
166        let table = parse_symbol_table(&bin).expect("parse");
167        let any_named_text = table
168            .by_name
169            .values()
170            .any(|s| s.kind == SymbolKind::Text && !s.is_undefined);
171        assert!(any_named_text, "no defined function symbols");
172    }
173
174    #[test]
175    fn normalize_strips_llvm_internalization_suffix() {
176        // Host with ThinLTO internalization → patch without LTO.
177        // Both must collide on the normalized form so JumpTable
178        // construction sees them as the same symbol.
179        let host_form = "_ZN11hello_world3app17h04c91e1b6c02c8b4E.llvm.9162950015328890148";
180        let patch_form = "_ZN11hello_world3app17h04c91e1b6c02c8b4E";
181        assert_eq!(normalize_symbol_name(host_form), patch_form);
182        assert_eq!(normalize_symbol_name(patch_form), patch_form);
183    }
184
185    #[test]
186    fn normalize_leaves_unrelated_dots_alone() {
187        // `.llvm.X` is specifically LLVM's internalization marker;
188        // we shouldn't truncate at arbitrary dots (e.g. `.cold`,
189        // unmangled C++ symbols, or just dots inside string-encoded
190        // sections).
191        assert_eq!(normalize_symbol_name("foo.cold.0"), "foo.cold.0",);
192        assert_eq!(normalize_symbol_name("plain_C_symbol"), "plain_C_symbol");
193    }
194
195    #[test]
196    fn rejects_non_object_bytes_with_an_error() {
197        let err = parse_symbol_table_from_bytes(b"not an object file at all").unwrap_err();
198        // We don't pin on the exact message — `object` crate may
199        // word it differently across releases — only that an
200        // error path exists.
201        let _ = err.to_string();
202    }
203
204    #[test]
205    fn count_kind_is_a_simple_filter() {
206        let mut t = SymbolTable::default();
207        t.by_name.insert(
208            "f".into(),
209            SymbolInfo {
210                address: 0x1000,
211                kind: SymbolKind::Text,
212                size: 32,
213                is_undefined: false,
214                is_weak: false,
215            },
216        );
217        t.by_name.insert(
218            "g".into(),
219            SymbolInfo {
220                address: 0x2000,
221                kind: SymbolKind::Data,
222                size: 8,
223                is_undefined: false,
224                is_weak: false,
225            },
226        );
227        assert_eq!(t.count_kind(SymbolKind::Text), 1);
228        assert_eq!(t.count_kind(SymbolKind::Data), 1);
229        assert_eq!(t.count_kind(SymbolKind::Tls), 0);
230    }
231}