Skip to main content

dbg_cli/session_db/collectors/
disasm.rs

1//! Three disasm collectors for Phase 1: lldb (native), jitdasm (.NET),
2//! go-objdump (Go).
3//!
4//! Each implements `OnDemandCollector` and produces a `DisasmOutput`
5//! the caller can feed to `persist_disasm`. Collectors do NOT touch
6//! the DB directly — that keeps the shell-out + parse logic testable
7//! in isolation and lets the daemon batch multiple collections.
8
9use std::process::{Command, Stdio};
10use std::sync::OnceLock;
11
12use anyhow::{Context, Result, bail};
13use regex::Regex;
14
15use super::{CollectCtx, DisasmOutput, LiveDebugger, OnDemandCollector};
16use crate::jitdasm::JitIndex;
17use crate::session_db::TargetClass;
18
19// ============================================================
20// lldb `disassemble --name <sym>`
21// ============================================================
22
23pub struct LldbDisassembleCollector;
24
25impl OnDemandCollector for LldbDisassembleCollector {
26    fn kind(&self) -> &'static str {
27        "lldb-disassemble"
28    }
29
30    fn supports(&self, class: TargetClass) -> bool {
31        matches!(class, TargetClass::NativeCpu)
32    }
33
34    fn collect(
35        &self,
36        ctx: &CollectCtx<'_>,
37        live: Option<&dyn LiveDebugger>,
38    ) -> Result<DisasmOutput> {
39        let cmd = format!("disassemble --name {}", ctx.symbol);
40        let raw = match live {
41            Some(l) => l.send(&cmd)?,
42            None => run_oneshot_lldb(ctx.target, &cmd)?,
43        };
44        let asm_text = raw.trim().to_string();
45        if asm_text.is_empty() {
46            bail!("lldb produced no disassembly for {}", ctx.symbol);
47        }
48        let code_bytes = count_instruction_bytes(&asm_text);
49        Ok(DisasmOutput {
50            source: "lldb-disassemble",
51            tier: None,
52            code_bytes,
53            asm_text,
54            asm_lines_json: None,
55        })
56    }
57}
58
59fn run_oneshot_lldb(target: &str, disasm_cmd: &str) -> Result<String> {
60    let bin = std::env::var("LLDB_BIN").unwrap_or_else(|_| "lldb".into());
61    let output = Command::new(&bin)
62        .args([
63            "--batch",
64            "--no-use-colors",
65            "-o",
66            &format!("target create \"{}\"", target.replace('"', "\\\"")),
67            "-o",
68            disasm_cmd,
69            "-o",
70            "quit",
71        ])
72        .stdout(Stdio::piped())
73        .stderr(Stdio::piped())
74        .output()
75        .with_context(|| format!("invoking {bin} for disasm"))?;
76    if !output.status.success() && output.stdout.is_empty() {
77        bail!(
78            "{bin} exited {}: {}",
79            output.status,
80            String::from_utf8_lossy(&output.stderr)
81        );
82    }
83    Ok(String::from_utf8_lossy(&output.stdout).to_string())
84}
85
86/// Very rough upper bound on code size: count `0x` prefixes on the
87/// address column of each disasm line. lldb emits `0x<hex>:` per
88/// instruction — works for both x86-64 and aarch64 dumps.
89fn count_instruction_bytes(asm: &str) -> Option<i64> {
90    let re = lldb_addr_regex();
91    let mut addrs = asm
92        .lines()
93        .filter_map(|l| re.captures(l).and_then(|c| u64::from_str_radix(&c[1], 16).ok()));
94    let first = addrs.next()?;
95    let last = addrs.last()?;
96    Some((last as i64) - (first as i64))
97}
98
99fn lldb_addr_regex() -> &'static Regex {
100    static RE: OnceLock<Regex> = OnceLock::new();
101    RE.get_or_init(|| Regex::new(r"(?:->\s+)?\s*0x([0-9a-fA-F]+)\s*[:<]").unwrap())
102}
103
104// ============================================================
105// .NET jitdasm (via `DOTNET_JitDisasm` env on a fresh process)
106// ============================================================
107
108pub struct JitDasmCollector;
109
110impl OnDemandCollector for JitDasmCollector {
111    fn kind(&self) -> &'static str {
112        "jitdasm"
113    }
114
115    fn supports(&self, class: TargetClass) -> bool {
116        matches!(class, TargetClass::ManagedDotnet)
117    }
118
119    /// Prefers the `jitdasm` backend's pre-captured `capture.asm` (the
120    /// backend ran `DOTNET_JitDisasm='*'` once at session start and
121    /// dumped every method to a file). When that file is missing —
122    /// e.g. the session was started with a different .NET backend —
123    /// falls back to spawning a fresh `dotnet` with `DOTNET_JitDisasm`
124    /// scoped to the requested symbol. The live debug session is
125    /// untouched either way.
126    fn collect(
127        &self,
128        ctx: &CollectCtx<'_>,
129        _live: Option<&dyn LiveDebugger>,
130    ) -> Result<DisasmOutput> {
131        let capture = std::env::var_os("DBG_JITDASM_CAPTURE").map(std::path::PathBuf::from);
132        let (text, source_desc) = match capture.as_ref().filter(|p| p.is_file()) {
133            Some(p) => (
134                std::fs::read_to_string(p)
135                    .with_context(|| format!("reading {}", p.display()))?,
136                p.display().to_string(),
137            ),
138            None => (run_jitdasm_fresh(ctx.target, ctx.symbol)?, "fresh dotnet run".into()),
139        };
140
141        // `::` is a C++/docs-style separator; .NET uses a single `:`.
142        // Normalise so the same symbol works in either shape.
143        let needle = ctx.symbol.replace("::", ":");
144
145        // Use the shared `JitIndex` fallback path so `dbg disasm` from
146        // a shell behaves exactly like the interactive REPL: when the
147        // target has no standalone body but the call graph knows
148        // callers, return the callers' bodies banner-separated (the
149        // inlined code lives embedded inside them). A plain header
150        // scan would bail with "no matching header" and leave the
151        // agent stuck.
152        let index = JitIndex::parse(&text);
153        let asm_text = index.disasm_with_parent_fallback(&needle).ok_or_else(|| {
154            anyhow::anyhow!(
155                "jitdasm produced no assembly for {} — no standalone body and no caller \
156                 references the name (so the method is either truly absent from this capture, \
157                 the pattern is misspelled, or every caller was also inlined away). Searched {}",
158                ctx.symbol,
159                source_desc,
160            )
161        })?;
162
163        let tier = parse_jitdasm_tier(&asm_text);
164        Ok(DisasmOutput {
165            source: "jitdasm",
166            tier,
167            code_bytes: None,
168            asm_text,
169            asm_lines_json: None,
170        })
171    }
172}
173
174/// Fallback when no pre-captured `capture.asm` is available.
175///
176/// `dotnet run --project <csproj>` appears to drop `DOTNET_JitDisasm`
177/// (and friends) somewhere between the `dotnet` CLI host and the user
178/// process — an empty capture with zero `; Assembly listing` headers
179/// is the observable symptom. `dotnet exec <dll>` bypasses the
180/// intermediate host and the env vars reach the JIT. So: for a
181/// project target, first `dotnet build` and then resolve the output
182/// dll via the `runtimeconfig.json` sibling (only executable outputs
183/// emit one); for an already-built dll/exe target, exec it directly.
184fn run_jitdasm_fresh(target: &str, symbol: &str) -> Result<String> {
185    let dotnet = std::env::var("DOTNET").unwrap_or_else(|_| "dotnet".into());
186
187    let dll_path: std::path::PathBuf = if target.ends_with(".csproj") || target.ends_with(".fsproj")
188    {
189        // Build the project so bin/Release/net*/ is populated.
190        let build = Command::new(&dotnet)
191            .args(["build", target, "-c", "Release", "--nologo", "-v", "q"])
192            .stdout(Stdio::piped())
193            .stderr(Stdio::piped())
194            .output()
195            .with_context(|| format!("invoking {dotnet} build for {target}"))?;
196        if !build.status.success() {
197            bail!(
198                "dotnet build {} failed:\n{}",
199                target,
200                String::from_utf8_lossy(&build.stderr)
201            );
202        }
203        let proj_dir = std::path::Path::new(target)
204            .parent()
205            .unwrap_or_else(|| std::path::Path::new("."));
206        locate_executable_dll(proj_dir).with_context(|| {
207            format!(
208                "locating built dll under {}/bin/Release/net*/",
209                proj_dir.display()
210            )
211        })?
212    } else {
213        std::path::PathBuf::from(target)
214    };
215
216    let output = Command::new(&dotnet)
217        .arg("exec")
218        .arg(&dll_path)
219        .env("DOTNET_JitDisasm", symbol)
220        .env("DOTNET_TieredCompilation", "0") // deterministic tier
221        .env("DOTNET_JitDiffableDasm", "1")
222        .stdout(Stdio::piped())
223        .stderr(Stdio::piped())
224        .output()
225        .with_context(|| format!("invoking {dotnet} exec {}", dll_path.display()))?;
226    // JitDisasm writes to stdout under `dotnet exec` (stderr-vs-stdout
227    // varies by host); concatenate both so the parser sees everything.
228    let mut text = String::from_utf8_lossy(&output.stdout).into_owned();
229    text.push('\n');
230    text.push_str(&String::from_utf8_lossy(&output.stderr));
231    if text.trim().is_empty() {
232        bail!("dotnet exited {} with no output", output.status);
233    }
234    Ok(text)
235}
236
237/// Find the executable dll in a project's Release output directory.
238///
239/// Executable projects emit a `<AssemblyName>.runtimeconfig.json` next
240/// to the dll — library projects do not. Using its presence as a
241/// marker is more reliable than globbing *.dll and guessing which one
242/// is the entry point when dependencies are side-by-side.
243fn locate_executable_dll(proj_dir: &std::path::Path) -> Result<std::path::PathBuf> {
244    let release_root = proj_dir.join("bin").join("Release");
245    let tfm_dirs = std::fs::read_dir(&release_root)
246        .with_context(|| format!("reading {}", release_root.display()))?;
247    // Multiple `net*` TFMs may coexist (e.g. net6.0 + net8.0); prefer
248    // the most recently modified so a fresh build wins.
249    let mut candidates: Vec<(std::time::SystemTime, std::path::PathBuf)> = Vec::new();
250    for tfm in tfm_dirs.flatten() {
251        let tfm_path = tfm.path();
252        if !tfm_path.is_dir() {
253            continue;
254        }
255        let Ok(entries) = std::fs::read_dir(&tfm_path) else { continue };
256        for entry in entries.flatten() {
257            let p = entry.path();
258            if p.extension().and_then(|e| e.to_str()) == Some("json")
259                && p.file_name()
260                    .and_then(|n| n.to_str())
261                    .is_some_and(|n| n.ends_with(".runtimeconfig.json"))
262            {
263                let dll = p.with_extension("").with_extension("dll");
264                if dll.is_file() {
265                    let mtime = entry
266                        .metadata()
267                        .and_then(|m| m.modified())
268                        .unwrap_or(std::time::UNIX_EPOCH);
269                    candidates.push((mtime, dll));
270                }
271            }
272        }
273    }
274    candidates.sort_by(|a, b| b.0.cmp(&a.0));
275    candidates
276        .into_iter()
277        .next()
278        .map(|(_, p)| p)
279        .ok_or_else(|| anyhow::anyhow!("no <name>.runtimeconfig.json found under {}/net*/", release_root.display()))
280}
281
282fn parse_jitdasm_tier(asm: &str) -> Option<String> {
283    static RE: OnceLock<Regex> = OnceLock::new();
284    let re = RE.get_or_init(|| Regex::new(r"\((Tier-?\d|OSR|MinOpts|FullOpts)\)").unwrap());
285    for line in asm.lines().take(3) {
286        if let Some(c) = re.captures(line) {
287            return Some(c[1].to_lowercase().replace('-', ""));
288        }
289    }
290    None
291}
292
293// ============================================================
294// Go: `go tool objdump -s <sym> <target>`
295// ============================================================
296
297pub struct GoDisassCollector;
298
299impl OnDemandCollector for GoDisassCollector {
300    fn kind(&self) -> &'static str {
301        "go-objdump"
302    }
303
304    fn supports(&self, class: TargetClass) -> bool {
305        // Go binaries compile to native, but we expose them as
306        // NativeCpu in the target-class taxonomy. This collector is
307        // safe to register alongside the lldb collector — the
308        // dispatcher (task 8) picks one per invocation.
309        matches!(class, TargetClass::NativeCpu)
310    }
311
312    fn collect(
313        &self,
314        ctx: &CollectCtx<'_>,
315        _live: Option<&dyn LiveDebugger>,
316    ) -> Result<DisasmOutput> {
317        let output = Command::new("go")
318            .args(["tool", "objdump", "-s", ctx.symbol, ctx.target])
319            .stdout(Stdio::piped())
320            .stderr(Stdio::piped())
321            .output()
322            .context("invoking `go tool objdump`")?;
323        if !output.status.success() {
324            bail!(
325                "go tool objdump failed: {}",
326                String::from_utf8_lossy(&output.stderr).trim()
327            );
328        }
329        let asm_text = String::from_utf8_lossy(&output.stdout).trim().to_string();
330        if asm_text.is_empty() {
331            bail!("go tool objdump produced no output for {}", ctx.symbol);
332        }
333        Ok(DisasmOutput {
334            source: "go-objdump",
335            tier: None,
336            code_bytes: None,
337            asm_text,
338            asm_lines_json: None,
339        })
340    }
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346
347    // Parser-level tests — don't require the tool to be installed.
348
349    #[test]
350    fn jitdasm_collector_isolates_target_method() {
351        let text = "\
352Hello from pre-JIT chatter.
353; Assembly listing for method MyApp.Foo:Bar() (Tier1)
354 mov rax, rbx
355 ret
356; Assembly listing for method MyApp.Baz:Qux()
357 mov rcx, rdx";
358        let got = JitIndex::parse(text)
359            .disasm_with_parent_fallback("MyApp.Foo:Bar")
360            .expect("expected match");
361        assert!(got.contains("MyApp.Foo:Bar"));
362        assert!(got.contains("mov rax, rbx"));
363        assert!(!got.contains("MyApp.Baz:Qux"));
364    }
365
366    #[test]
367    fn jitdasm_collector_normalizes_double_colon() {
368        // Callers (and human intuition) often write C++/docs-style
369        // `Namespace::Type::Method`; the .NET jitdasm header uses a
370        // single `:` between type and method. The collector
371        // normalises `::` → `:` before lookup so `dbg disasm
372        // "Broken.Program::SumFast"` still works.
373        let text = "\
374; Assembly listing for method Broken.Program:SumFast(System.Int32[]):int (Tier1)
375 vaddps ymm0, ymm0, ymm1
376 ret";
377        let needle = "Broken.Program::SumFast".replace("::", ":");
378        let got = JitIndex::parse(text)
379            .disasm_with_parent_fallback(&needle)
380            .expect("expected match");
381        assert!(
382            got.contains("vaddps"),
383            "double-colon form did not match single-colon header: got={got:?}"
384        );
385    }
386
387    #[test]
388    fn jitdasm_collector_empty_when_no_match_and_no_callers() {
389        let text = "; Assembly listing for method MyApp.X:Y\n mov rax, rbx";
390        let got = JitIndex::parse(text).disasm_with_parent_fallback("Other.Method");
391        assert!(
392            got.is_none(),
393            "no standalone body and no caller references should return None: {got:?}"
394        );
395    }
396
397    #[test]
398    fn jitdasm_collector_falls_back_to_parent_on_inlined_target() {
399        // Inlined target: no standalone `Helper:Probe` header, but
400        // `Outer:Run` contains a call to it. The collector should
401        // surface `Outer:Run`'s body with the banner.
402        let text = "\
403; Assembly listing for method MyNs.Outer:Run():int (FullOpts)
404       push     rbp
405       call     [MyNs.Helper:Probe(long):int]
406       pop      rbp
407       ret
408; Total bytes of code 9";
409        let got = JitIndex::parse(text)
410            .disasm_with_parent_fallback("Helper:Probe")
411            .expect("expected fallback-to-parent hit");
412        assert!(
413            got.contains("no standalone body") && got.contains("parent:"),
414            "fallback output should carry the banner+parent header: {got}"
415        );
416        assert!(
417            got.contains("MyNs.Outer:Run"),
418            "parent method name should be shown: {got}"
419        );
420        assert!(
421            got.contains("call     [MyNs.Helper:Probe"),
422            "parent body should be embedded verbatim: {got}"
423        );
424    }
425
426    #[test]
427    fn parse_tier_from_header() {
428        let asm = "; Assembly listing for method MyApp.Foo:Bar() (Tier1)\n mov rax, rbx";
429        assert_eq!(parse_jitdasm_tier(asm), Some("tier1".into()));
430        let asm = "; Assembly listing for method ... (Tier-0)\nnop";
431        assert_eq!(parse_jitdasm_tier(asm), Some("tier0".into()));
432        let asm = "; Assembly listing for method ... (FullOpts)\nnop";
433        assert_eq!(parse_jitdasm_tier(asm), Some("fullopts".into()));
434        let asm = "; Assembly listing for method ... (no tier mark)\nnop";
435        assert_eq!(parse_jitdasm_tier(asm), None);
436    }
437
438    #[test]
439    fn count_bytes_from_address_column() {
440        let asm = "\
441test`main:
442    0x100003f80 <+0>:   push   rbp
443    0x100003f84 <+4>:   mov    rbp, rsp
444    0x100003f88 <+8>:   mov    eax, 0x0
445    0x100003f8d <+13>:  pop    rbp
446    0x100003f8e <+14>:  ret";
447        let bytes = count_instruction_bytes(asm);
448        assert_eq!(bytes, Some(0x100003f8e - 0x100003f80));
449    }
450
451    #[test]
452    fn count_bytes_none_on_empty() {
453        assert_eq!(count_instruction_bytes(""), None);
454        assert_eq!(count_instruction_bytes("no addrs here"), None);
455    }
456
457    #[test]
458    fn collector_supports_matrix() {
459        let l = LldbDisassembleCollector;
460        assert!(l.supports(TargetClass::NativeCpu));
461        assert!(!l.supports(TargetClass::ManagedDotnet));
462        assert!(!l.supports(TargetClass::Python));
463
464        let j = JitDasmCollector;
465        assert!(j.supports(TargetClass::ManagedDotnet));
466        assert!(!j.supports(TargetClass::NativeCpu));
467
468        let g = GoDisassCollector;
469        assert!(g.supports(TargetClass::NativeCpu));
470        assert!(!g.supports(TargetClass::Python));
471    }
472
473    #[test]
474    fn kinds_match_source_column() {
475        assert_eq!(LldbDisassembleCollector.kind(), "lldb-disassemble");
476        assert_eq!(JitDasmCollector.kind(), "jitdasm");
477        assert_eq!(GoDisassCollector.kind(), "go-objdump");
478    }
479}