whisker_dev_server/hotpatch/symbol_table.rs
1//! ELF / Mach-O symbol-table parser.
2//!
3//! The single piece of logic [`parse_symbol_table`] hides: open a
4//! binary file, hand it to the `object` crate, and project the rich
5//! `object::SymbolTable` API down to the small [`SymbolTable`] view
6//! Whisker's hot-reload pipeline actually needs (name → address +
7//! kind/size/visibility flags).
8//!
9//! Why a projection rather than re-using `object::Symbol` directly:
10//! the `object` types are bound by lifetimes to the file they came
11//! out of. Storing them across an async boundary would force the
12//! file bytes to live for the whole dev-loop run. Copying the small
13//! pieces we need (name + 4 numbers per symbol) into owned data is
14//! cheaper than that lifetime gymnastics.
15
16use anyhow::{Context, Result};
17use object::{Object, ObjectSymbol, SymbolKind};
18use std::collections::HashMap;
19use std::path::Path;
20
21/// What we keep about each symbol — enough to drive subsecond's
22/// JumpTable construction (I4g-2) and no more.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct SymbolInfo {
25 /// Address relative to the binary's base (= the value we
26 /// eventually feed into a JumpTable's `map` entry on the host
27 /// side; ASLR is corrected by the receiver).
28 pub address: u64,
29 /// Symbol kind: function vs data vs other. Only `Text` is a
30 /// hot-patch candidate.
31 pub kind: SymbolKind,
32 /// Symbol size in bytes (0 if unknown / undefined).
33 pub size: u64,
34 /// True for `extern "C"` declarations the binary refers to but
35 /// doesn't define. JumpTable diffing must skip these.
36 pub is_undefined: bool,
37 /// Weak linkage. Hot-patching weak symbols is unreliable
38 /// (linker chooses winners non-deterministically); we tag them
39 /// so callers can decide.
40 pub is_weak: bool,
41}
42
43/// Owned name → info map. `BTreeMap` would give deterministic
44/// iteration order but we want O(1) lookup by symbol name in the
45/// JumpTable construction step, so HashMap it is.
46#[derive(Debug, Clone, Default, PartialEq, Eq)]
47pub struct SymbolTable {
48 pub by_name: HashMap<String, SymbolInfo>,
49}
50
51impl SymbolTable {
52 /// Number of symbols of the given kind. Mostly a test convenience.
53 pub fn count_kind(&self, kind: SymbolKind) -> usize {
54 self.by_name.values().filter(|s| s.kind == kind).count()
55 }
56}
57
58/// Open `path` and project its symbol table.
59pub fn parse_symbol_table(path: &Path) -> Result<SymbolTable> {
60 let bytes = std::fs::read(path).with_context(|| format!("read {}", path.display()))?;
61 parse_symbol_table_from_bytes(&bytes).with_context(|| format!("parse {}", path.display()))
62}
63
64/// Same as [`parse_symbol_table`] but takes the bytes directly. Used
65/// by tests so we don't need a fixture file on disk.
66pub fn parse_symbol_table_from_bytes(bytes: &[u8]) -> Result<SymbolTable> {
67 let file = object::File::parse(bytes).context("object::File::parse")?;
68 let mut by_name = HashMap::new();
69 for sym in file.symbols() {
70 let name = match sym.name() {
71 Ok(n) if !n.is_empty() => normalize_symbol_name(n),
72 _ => continue, // unnamed (anonymous local) — useless to us
73 };
74 by_name.insert(
75 name,
76 SymbolInfo {
77 address: sym.address(),
78 kind: sym.kind(),
79 size: sym.size(),
80 is_undefined: sym.is_undefined(),
81 is_weak: sym.is_weak(),
82 },
83 );
84 }
85 Ok(SymbolTable { by_name })
86}
87
88/// Strip LLVM's ThinLTO internalization suffix (`.llvm.<digits>`)
89/// from a symbol name.
90///
91/// LLVM appends this suffix during LTO when it promotes a previously
92/// external symbol to module-local — which is what happens to most
93/// Rust functions in a release dylib build. The host (a full fat
94/// build with LTO) emits `_ZN..hello_world..app..h<rust>E.llvm.<lto>`
95/// while the patch (a single-crate thin rebuild with no LTO) emits
96/// `_ZN..hello_world..app..h<rust>E` with no suffix. Without
97/// normalization, the JumpTable would map nothing — every patched
98/// function would look like an "added" symbol.
99///
100/// We only strip past the first `.llvm.`; the rest is opaque LLVM
101/// data with no semantic value to us.
102fn normalize_symbol_name(name: &str) -> String {
103 match name.split_once(".llvm.") {
104 Some((stem, _suffix)) => stem.to_string(),
105 None => name.to_string(),
106 }
107}
108
109// ============================================================================
110// Tests
111// ============================================================================
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116 use std::process::Command;
117
118 /// `target/debug/whisker` always exists during a workspace-wide
119 /// `cargo test` run because whisker-cli is a member of the workspace
120 /// and the test harness builds every member's lib by default.
121 /// We force its bin to exist by spawning `cargo build -p whisker-cli
122 /// --bin whisker` once at the top of the test, but only if it isn't
123 /// already there — a no-op on the second run.
124 fn ensure_whisker_binary() -> std::path::PathBuf {
125 let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
126 .parent()
127 .unwrap()
128 .parent()
129 .unwrap()
130 .to_path_buf();
131 let bin = workspace_root.join("target/debug/whisker");
132 if !bin.is_file() {
133 let status = Command::new("cargo")
134 .args(["build", "-p", "whisker-cli", "--bin", "whisker"])
135 .current_dir(&workspace_root)
136 .status()
137 .expect("spawn cargo");
138 assert!(status.success(), "cargo build failed");
139 }
140 bin
141 }
142
143 #[test]
144 fn parses_a_real_host_binary_and_finds_known_function_symbols() {
145 let bin = ensure_whisker_binary();
146 let table = parse_symbol_table(&bin).expect("parse");
147
148 // The symbol table from a debug build of whisker has hundreds
149 // of entries; we just need to confirm we loaded SOMETHING
150 // reasonable.
151 assert!(
152 !table.by_name.is_empty(),
153 "expected symbols in {}",
154 bin.display(),
155 );
156 assert!(
157 table.count_kind(SymbolKind::Text) > 10,
158 "expected dozens of function symbols, got {}",
159 table.count_kind(SymbolKind::Text),
160 );
161 }
162
163 #[test]
164 fn parses_a_real_host_binary_with_at_least_one_named_function() {
165 let bin = ensure_whisker_binary();
166 let table = parse_symbol_table(&bin).expect("parse");
167 let any_named_text = table
168 .by_name
169 .values()
170 .any(|s| s.kind == SymbolKind::Text && !s.is_undefined);
171 assert!(any_named_text, "no defined function symbols");
172 }
173
174 #[test]
175 fn normalize_strips_llvm_internalization_suffix() {
176 // Host with ThinLTO internalization → patch without LTO.
177 // Both must collide on the normalized form so JumpTable
178 // construction sees them as the same symbol.
179 let host_form = "_ZN11hello_world3app17h04c91e1b6c02c8b4E.llvm.9162950015328890148";
180 let patch_form = "_ZN11hello_world3app17h04c91e1b6c02c8b4E";
181 assert_eq!(normalize_symbol_name(host_form), patch_form);
182 assert_eq!(normalize_symbol_name(patch_form), patch_form);
183 }
184
185 #[test]
186 fn normalize_leaves_unrelated_dots_alone() {
187 // `.llvm.X` is specifically LLVM's internalization marker;
188 // we shouldn't truncate at arbitrary dots (e.g. `.cold`,
189 // unmangled C++ symbols, or just dots inside string-encoded
190 // sections).
191 assert_eq!(normalize_symbol_name("foo.cold.0"), "foo.cold.0",);
192 assert_eq!(normalize_symbol_name("plain_C_symbol"), "plain_C_symbol");
193 }
194
195 #[test]
196 fn rejects_non_object_bytes_with_an_error() {
197 let err = parse_symbol_table_from_bytes(b"not an object file at all").unwrap_err();
198 // We don't pin on the exact message — `object` crate may
199 // word it differently across releases — only that an
200 // error path exists.
201 let _ = err.to_string();
202 }
203
204 #[test]
205 fn count_kind_is_a_simple_filter() {
206 let mut t = SymbolTable::default();
207 t.by_name.insert(
208 "f".into(),
209 SymbolInfo {
210 address: 0x1000,
211 kind: SymbolKind::Text,
212 size: 32,
213 is_undefined: false,
214 is_weak: false,
215 },
216 );
217 t.by_name.insert(
218 "g".into(),
219 SymbolInfo {
220 address: 0x2000,
221 kind: SymbolKind::Data,
222 size: 8,
223 is_undefined: false,
224 is_weak: false,
225 },
226 );
227 assert_eq!(t.count_kind(SymbolKind::Text), 1);
228 assert_eq!(t.count_kind(SymbolKind::Data), 1);
229 assert_eq!(t.count_kind(SymbolKind::Tls), 0);
230 }
231}