Skip to main content

dbg_cli/session_db/collectors/
disasm.rs

1//! Three disasm collectors for Phase 1: lldb (native), jitdasm (.NET),
2//! go-objdump (Go).
3//!
4//! Each implements `OnDemandCollector` and produces a `DisasmOutput`
5//! the caller can feed to `persist_disasm`. Collectors do NOT touch
6//! the DB directly — that keeps the shell-out + parse logic testable
7//! in isolation and lets the daemon batch multiple collections.
8
9use std::process::{Command, Stdio};
10use std::sync::OnceLock;
11
12use anyhow::{Context, Result, bail};
13use regex::Regex;
14
15use super::{CollectCtx, DisasmOutput, LiveDebugger, OnDemandCollector};
16use crate::session_db::TargetClass;
17
18// ============================================================
19// lldb `disassemble --name <sym>`
20// ============================================================
21
22pub struct LldbDisassembleCollector;
23
24impl OnDemandCollector for LldbDisassembleCollector {
25    fn kind(&self) -> &'static str {
26        "lldb-disassemble"
27    }
28
29    fn supports(&self, class: TargetClass) -> bool {
30        matches!(class, TargetClass::NativeCpu)
31    }
32
33    fn collect(
34        &self,
35        ctx: &CollectCtx<'_>,
36        live: Option<&dyn LiveDebugger>,
37    ) -> Result<DisasmOutput> {
38        let cmd = format!("disassemble --name {}", ctx.symbol);
39        let raw = match live {
40            Some(l) => l.send(&cmd)?,
41            None => run_oneshot_lldb(ctx.target, &cmd)?,
42        };
43        let asm_text = raw.trim().to_string();
44        if asm_text.is_empty() {
45            bail!("lldb produced no disassembly for {}", ctx.symbol);
46        }
47        let code_bytes = count_instruction_bytes(&asm_text);
48        Ok(DisasmOutput {
49            source: "lldb-disassemble",
50            tier: None,
51            code_bytes,
52            asm_text,
53            asm_lines_json: None,
54        })
55    }
56}
57
58fn run_oneshot_lldb(target: &str, disasm_cmd: &str) -> Result<String> {
59    let bin = std::env::var("LLDB_BIN").unwrap_or_else(|_| "lldb".into());
60    let output = Command::new(&bin)
61        .args([
62            "--batch",
63            "--no-use-colors",
64            "-o",
65            &format!("target create \"{}\"", target.replace('"', "\\\"")),
66            "-o",
67            disasm_cmd,
68            "-o",
69            "quit",
70        ])
71        .stdout(Stdio::piped())
72        .stderr(Stdio::piped())
73        .output()
74        .with_context(|| format!("invoking {bin} for disasm"))?;
75    if !output.status.success() && output.stdout.is_empty() {
76        bail!(
77            "{bin} exited {}: {}",
78            output.status,
79            String::from_utf8_lossy(&output.stderr)
80        );
81    }
82    Ok(String::from_utf8_lossy(&output.stdout).to_string())
83}
84
85/// Very rough upper bound on code size: count `0x` prefixes on the
86/// address column of each disasm line. lldb emits `0x<hex>:` per
87/// instruction — works for both x86-64 and aarch64 dumps.
88fn count_instruction_bytes(asm: &str) -> Option<i64> {
89    let re = lldb_addr_regex();
90    let mut addrs = asm
91        .lines()
92        .filter_map(|l| re.captures(l).and_then(|c| u64::from_str_radix(&c[1], 16).ok()));
93    let first = addrs.next()?;
94    let last = addrs.last()?;
95    Some((last as i64) - (first as i64))
96}
97
98fn lldb_addr_regex() -> &'static Regex {
99    static RE: OnceLock<Regex> = OnceLock::new();
100    RE.get_or_init(|| Regex::new(r"(?:->\s+)?\s*0x([0-9a-fA-F]+)\s*[:<]").unwrap())
101}
102
103// ============================================================
104// .NET jitdasm (via `DOTNET_JitDisasm` env on a fresh process)
105// ============================================================
106
107pub struct JitDasmCollector;
108
109impl OnDemandCollector for JitDasmCollector {
110    fn kind(&self) -> &'static str {
111        "jitdasm"
112    }
113
114    fn supports(&self, class: TargetClass) -> bool {
115        matches!(class, TargetClass::ManagedDotnet)
116    }
117
118    /// Prefers the `jitdasm` backend's pre-captured `capture.asm` (the
119    /// backend ran `DOTNET_JitDisasm='*'` once at session start and
120    /// dumped every method to a file). When that file is missing —
121    /// e.g. the session was started with a different .NET backend —
122    /// falls back to spawning a fresh `dotnet` with `DOTNET_JitDisasm`
123    /// scoped to the requested symbol. The live debug session is
124    /// untouched either way.
125    fn collect(
126        &self,
127        ctx: &CollectCtx<'_>,
128        _live: Option<&dyn LiveDebugger>,
129    ) -> Result<DisasmOutput> {
130        let capture = std::env::var_os("DBG_JITDASM_CAPTURE").map(std::path::PathBuf::from);
131        let (text, source_desc) = match capture.as_ref().filter(|p| p.is_file()) {
132            Some(p) => (
133                std::fs::read_to_string(p)
134                    .with_context(|| format!("reading {}", p.display()))?,
135                p.display().to_string(),
136            ),
137            None => (run_jitdasm_fresh(ctx.target, ctx.symbol)?, "fresh dotnet run".into()),
138        };
139        let asm_text = extract_jitdasm_section(&text, ctx.symbol);
140        if asm_text.is_empty() {
141            bail!(
142                "jitdasm produced no assembly for {} (no matching `; Assembly listing for method ...{}...` header in {})",
143                ctx.symbol,
144                ctx.symbol,
145                source_desc,
146            );
147        }
148        let tier = parse_jitdasm_tier(&asm_text);
149        Ok(DisasmOutput {
150            source: "jitdasm",
151            tier,
152            code_bytes: None,
153            asm_text,
154            asm_lines_json: None,
155        })
156    }
157}
158
159/// Fallback when no pre-captured `capture.asm` is available. Mirrors
160/// the invocation form used by `backend::jitdasm` so a `.csproj`
161/// target works (`dotnet run --project ...`) instead of being passed
162/// to `dotnet` as a positional arg, which fails.
163fn run_jitdasm_fresh(target: &str, symbol: &str) -> Result<String> {
164    let dotnet = std::env::var("DOTNET").unwrap_or_else(|_| "dotnet".into());
165    let mut cmd = Command::new(&dotnet);
166    if target.ends_with(".csproj") || target.ends_with(".fsproj") {
167        cmd.args(["run", "--project", target, "-c", "Release"]);
168    } else {
169        cmd.arg(target);
170    }
171    let output = cmd
172        .env("DOTNET_JitDisasm", symbol)
173        .env("DOTNET_TieredCompilation", "0") // deterministic tier
174        .env("DOTNET_JitDiffableDasm", "1")
175        .stdout(Stdio::piped())
176        .stderr(Stdio::piped())
177        .output()
178        .with_context(|| format!("invoking {dotnet} for jitdasm"))?;
179    // JitDisasm writes to stdout under `dotnet run` (stderr-vs-stdout
180    // varies by host); concatenate both so the parser sees everything.
181    let mut text = String::from_utf8_lossy(&output.stdout).into_owned();
182    text.push('\n');
183    text.push_str(&String::from_utf8_lossy(&output.stderr));
184    if text.trim().is_empty() {
185        bail!("dotnet exited {} with no output", output.status);
186    }
187    Ok(text)
188}
189
190/// jitdasm writes one method listing at a time, each starting with a
191/// `; Assembly listing for method <method> (...)`-style header. We
192/// keep everything from the first header that mentions our symbol up
193/// to the next header (or end).
194fn extract_jitdasm_section(stderr: &str, symbol: &str) -> String {
195    // .NET jitdasm headers use a single colon between type and method
196    // (`Namespace.Type:Method`). Accept the common C++/docs-style
197    // `Namespace.Type::Method` input by normalising both sides.
198    let needle = symbol.replace("::", ":");
199    let mut out = Vec::new();
200    let mut capturing = false;
201    for line in stderr.lines() {
202        if line.starts_with("; Assembly listing for method") {
203            if capturing {
204                break;
205            }
206            if line.contains(&needle) {
207                capturing = true;
208                out.push(line);
209            }
210        } else if capturing {
211            out.push(line);
212        }
213    }
214    out.join("\n")
215}
216
217fn parse_jitdasm_tier(asm: &str) -> Option<String> {
218    static RE: OnceLock<Regex> = OnceLock::new();
219    let re = RE.get_or_init(|| Regex::new(r"\((Tier-?\d|OSR|MinOpts|FullOpts)\)").unwrap());
220    for line in asm.lines().take(3) {
221        if let Some(c) = re.captures(line) {
222            return Some(c[1].to_lowercase().replace('-', ""));
223        }
224    }
225    None
226}
227
228// ============================================================
229// Go: `go tool objdump -s <sym> <target>`
230// ============================================================
231
232pub struct GoDisassCollector;
233
234impl OnDemandCollector for GoDisassCollector {
235    fn kind(&self) -> &'static str {
236        "go-objdump"
237    }
238
239    fn supports(&self, class: TargetClass) -> bool {
240        // Go binaries compile to native, but we expose them as
241        // NativeCpu in the target-class taxonomy. This collector is
242        // safe to register alongside the lldb collector — the
243        // dispatcher (task 8) picks one per invocation.
244        matches!(class, TargetClass::NativeCpu)
245    }
246
247    fn collect(
248        &self,
249        ctx: &CollectCtx<'_>,
250        _live: Option<&dyn LiveDebugger>,
251    ) -> Result<DisasmOutput> {
252        let output = Command::new("go")
253            .args(["tool", "objdump", "-s", ctx.symbol, ctx.target])
254            .stdout(Stdio::piped())
255            .stderr(Stdio::piped())
256            .output()
257            .context("invoking `go tool objdump`")?;
258        if !output.status.success() {
259            bail!(
260                "go tool objdump failed: {}",
261                String::from_utf8_lossy(&output.stderr).trim()
262            );
263        }
264        let asm_text = String::from_utf8_lossy(&output.stdout).trim().to_string();
265        if asm_text.is_empty() {
266            bail!("go tool objdump produced no output for {}", ctx.symbol);
267        }
268        Ok(DisasmOutput {
269            source: "go-objdump",
270            tier: None,
271            code_bytes: None,
272            asm_text,
273            asm_lines_json: None,
274        })
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    // Parser-level tests — don't require the tool to be installed.
283
284    #[test]
285    fn extract_jitdasm_isolates_target_method() {
286        let stderr = "\
287Hello from pre-JIT chatter.
288; Assembly listing for method MyApp.Foo:Bar() (Tier1)
289 mov rax, rbx
290 ret
291; Assembly listing for method MyApp.Baz:Qux()
292 mov rcx, rdx";
293        let got = extract_jitdasm_section(stderr, "MyApp.Foo:Bar");
294        assert!(got.contains("MyApp.Foo:Bar"));
295        assert!(got.contains("mov rax, rbx"));
296        assert!(!got.contains("MyApp.Baz:Qux"));
297    }
298
299    #[test]
300    fn extract_jitdasm_normalizes_double_colon() {
301        // Callers (and human intuition) often write C++/docs-style
302        // `Namespace::Type::Method`; the .NET jitdasm header uses a
303        // single `:` between type and method. The extractor must
304        // match both spellings so `dbg disasm "Broken.Program::Foo"`
305        // works when the header says `Broken.Program:Foo`.
306        let stderr = "\
307; Assembly listing for method Broken.Program:SumFast(System.Int32[]):int (Tier1)
308 vaddps ymm0, ymm0, ymm1
309 ret";
310        let got = extract_jitdasm_section(stderr, "Broken.Program::SumFast");
311        assert!(
312            got.contains("vaddps"),
313            "double-colon form did not match single-colon header: got={got:?}"
314        );
315    }
316
317    #[test]
318    fn extract_jitdasm_empty_when_no_match() {
319        let stderr = "; Assembly listing for method MyApp.X:Y\n mov rax, rbx";
320        let got = extract_jitdasm_section(stderr, "Other.Method");
321        assert!(got.is_empty());
322    }
323
324    #[test]
325    fn parse_tier_from_header() {
326        let asm = "; Assembly listing for method MyApp.Foo:Bar() (Tier1)\n mov rax, rbx";
327        assert_eq!(parse_jitdasm_tier(asm), Some("tier1".into()));
328        let asm = "; Assembly listing for method ... (Tier-0)\nnop";
329        assert_eq!(parse_jitdasm_tier(asm), Some("tier0".into()));
330        let asm = "; Assembly listing for method ... (FullOpts)\nnop";
331        assert_eq!(parse_jitdasm_tier(asm), Some("fullopts".into()));
332        let asm = "; Assembly listing for method ... (no tier mark)\nnop";
333        assert_eq!(parse_jitdasm_tier(asm), None);
334    }
335
336    #[test]
337    fn count_bytes_from_address_column() {
338        let asm = "\
339test`main:
340    0x100003f80 <+0>:   push   rbp
341    0x100003f84 <+4>:   mov    rbp, rsp
342    0x100003f88 <+8>:   mov    eax, 0x0
343    0x100003f8d <+13>:  pop    rbp
344    0x100003f8e <+14>:  ret";
345        let bytes = count_instruction_bytes(asm);
346        assert_eq!(bytes, Some(0x100003f8e - 0x100003f80));
347    }
348
349    #[test]
350    fn count_bytes_none_on_empty() {
351        assert_eq!(count_instruction_bytes(""), None);
352        assert_eq!(count_instruction_bytes("no addrs here"), None);
353    }
354
355    #[test]
356    fn collector_supports_matrix() {
357        let l = LldbDisassembleCollector;
358        assert!(l.supports(TargetClass::NativeCpu));
359        assert!(!l.supports(TargetClass::ManagedDotnet));
360        assert!(!l.supports(TargetClass::Python));
361
362        let j = JitDasmCollector;
363        assert!(j.supports(TargetClass::ManagedDotnet));
364        assert!(!j.supports(TargetClass::NativeCpu));
365
366        let g = GoDisassCollector;
367        assert!(g.supports(TargetClass::NativeCpu));
368        assert!(!g.supports(TargetClass::Python));
369    }
370
371    #[test]
372    fn kinds_match_source_column() {
373        assert_eq!(LldbDisassembleCollector.kind(), "lldb-disassemble");
374        assert_eq!(JitDasmCollector.kind(), "jitdasm");
375        assert_eq!(GoDisassCollector.kind(), "go-objdump");
376    }
377}