Skip to main content

dbg_cli/
jitdasm.rs

1//! JIT disassembly parser and interactive REPL.
2//!
3//! Parses .NET JIT disassembly output (DOTNET_JitDisasm) into structured
4//! method records, then provides a command-line interface for querying them.
5
6use std::io::{self, BufRead, Write};
7
8/// A single disassembled method.
9#[derive(Debug)]
10pub struct JitMethod {
11    /// Full method signature, e.g. "MyNamespace.SimdOps:DotProduct(...):float"
12    pub name: String,
13    /// Total bytes of generated code.
14    pub code_bytes: u32,
15    /// Raw assembly lines (everything between the header and the next method).
16    pub body: String,
17}
18
19/// Message shown when a `methods`/`disasm`/etc. pattern matches nothing.
20///
21/// The common trap: the JIT only emits a standalone body (with a
22/// `; Assembly listing for method …` header) for methods it compiled
23/// on their own. Small/hot methods get inlined at every call site and
24/// leave no trace in `capture.asm` under their own name — `dbg disasm
25/// FooHelper` correctly but misleadingly reports "no methods found".
26/// When the pattern looks specific (not bare `*` / empty), steer the
27/// agent toward `search` (find callers that contain the tell-tale
28/// instructions of the inlined body) before they conclude the code
29/// path is missing.
30fn empty_match_hint(pattern: &str, callers: &[&str]) -> String {
31    let p = pattern.trim().trim_matches('*');
32    if p.is_empty() {
33        return "no methods found\n".into();
34    }
35
36    // Preferred path: we already know, from the call-graph built at
37    // parse time, which methods reference this target. Advertise them
38    // concretely — the agent can jump straight to `disasm <caller>`
39    // without first running `search` to locate one. This is the
40    // "preemptive" behavior: information derived from one pass over
41    // the capture, surfaced at the moment it's actionable.
42    if !callers.is_empty() {
43        let list: Vec<String> = callers
44            .iter()
45            .take(8)
46            .map(|c| format!("    • {c}"))
47            .collect();
48        let more = if callers.len() > 8 {
49            format!("\n    … and {} more", callers.len() - 8)
50        } else {
51            String::new()
52        };
53        return format!(
54            "no standalone body for `{pattern}` — it was inlined at every call\n\
55             site. The inlined code runs, but lives embedded inside the callers'\n\
56             listings, not under its own header.\n\
57             \n\
58             Known callers (from the call graph of this capture):\n\
59             {}{more}\n\
60             \n\
61             Try `disasm {}` — the inlined body appears right after that\n\
62             call-site's argument setup. `search <tell-tale-instruction>` still\n\
63             works if you want to narrow down which caller actually hit a\n\
64             specific codegen variant.\n",
65            list.join("\n"),
66            callers[0],
67        );
68    }
69
70    // Fallback: no caller info (target not referenced by any standalone
71    // method either, e.g. everything up the chain also got inlined, or
72    // the pattern is misspelled). Give the generic workflow.
73    format!(
74        "no methods found matching `{pattern}`.\n\
75         If the method is small/hot it was probably inlined — the JIT emits no\n\
76         standalone body for inlined methods, so it won't appear here. The\n\
77         inlined code still executes: it lives inside the caller's disasm.\n\
78         \n\
79         Workflow:\n\
80           1. Find a caller (the \"parent\" method that invokes it):\n\
81                `search <tell-tale-instruction-or-helper>`\n\
82              Pick an op the callee is likely to emit (a distinctive mask,\n\
83              shift, compare, or helper call). Every method whose body\n\
84              contains it is a caller that inlined the target.\n\
85           2. `disasm <parent-method>` — the inlined body appears embedded\n\
86              in the caller's listing, usually right after the call-site's\n\
87              argument setup.\n\
88           3. `methods *{p}*` — double-check the name isn't just qualified\n\
89              differently (generics, overloads, nested types).\n\
90           4. Only as a last resort: add `[MethodImpl(MethodImplOptions.NoInlining)]`\n\
91              for a one-off run to force standalone codegen. Revert after —\n\
92              it distorts everything else's disasm.\n"
93    )
94}
95
96/// The .NET JIT emits `vxorps reg, reg, reg` (and `vpxor`, `xorps`, …)
97/// purely to zero a register before scalar work. Counting it as a SIMD
98/// hit in `simd` output turned scalar methods into false positives,
99/// defeating the point of the command. The check is intentionally
100/// narrow: a same-register xor is the only form treated as zero-init.
101fn is_zero_init_xor(line: &str) -> bool {
102    let lower = line.to_ascii_lowercase();
103    let (mnemonic, operands) = match lower
104        .trim()
105        .split_once(|c: char| c.is_ascii_whitespace())
106    {
107        Some(pair) => pair,
108        None => return false,
109    };
110    if !matches!(mnemonic, "vxorps" | "vxorpd" | "vpxor" | "xorps" | "pxor") {
111        return false;
112    }
113    let ops: Vec<&str> = operands
114        .split(',')
115        .map(|t| t.split(';').next().unwrap_or("").trim())
116        .filter(|t| !t.is_empty())
117        .collect();
118    !ops.is_empty() && ops.iter().all(|r| *r == ops[0])
119}
120
121/// Parsed index of all methods in a JIT disassembly file.
122pub struct JitIndex {
123    pub methods: Vec<JitMethod>,
124    /// Inverted call graph: callee-name-substring → caller method names.
125    ///
126    /// Built from `call     [Namespace.Class:Method(args):ret]` operands
127    /// in every method body. When `disasm <X>` matches no standalone
128    /// listing (because `X` was inlined at every site), we look up `X`
129    /// here and advertise the callers by name — the user can jump
130    /// straight to `disasm <caller>` and find the inlined body embedded
131    /// there, instead of having to discover the caller via `search`
132    /// first.
133    ///
134    /// Keyed by both the fully-qualified `Namespace.Class:Method` form
135    /// and the short `Class:Method` form, so a pattern in either shape
136    /// resolves.
137    pub call_graph: std::collections::HashMap<String, Vec<String>>,
138}
139
140/// Pull callee names out of a single disassembly line of the form
141/// `       call     [Namespace.Class:Method(args):ret]`.
142///
143/// Returns `(fq_name, short_name)` where `short_name` drops the leading
144/// `Namespace.` before `Class`. Returns `None` for runtime-helper calls
145/// (`CORINFO_HELP_*`) and indirect calls that don't name a method.
146fn extract_call_target(line: &str) -> Option<(String, String)> {
147    let trimmed = line.trim_start();
148    let rest = trimmed.strip_prefix("call")?;
149    // Must be whitespace-separated "call" followed by arguments — don't
150    // match `callq` or stray identifiers containing "call".
151    if !rest.starts_with(|c: char| c.is_ascii_whitespace()) {
152        return None;
153    }
154    let after = rest.trim_start();
155    let inside = after.strip_prefix('[').and_then(|s| s.split_once(']'))?.0;
156    // Take up to the first `(` — the signature is noise for graph purposes.
157    let name = inside.split('(').next().unwrap_or(inside).trim();
158    if !name.contains(':') {
159        return None;
160    }
161    let short = match name.rsplit_once('.') {
162        Some((_, tail)) if tail.contains(':') => tail.to_string(),
163        _ => name.to_string(),
164    };
165    Some((name.to_string(), short))
166}
167
168impl JitIndex {
169    /// Parse raw JIT disassembly text into an indexed structure.
170    pub fn parse(text: &str) -> Self {
171        let mut methods = Vec::new();
172        let mut current_name: Option<String> = None;
173        let mut current_body = String::new();
174        let mut current_bytes: u32 = 0;
175
176        for line in text.lines() {
177            if let Some(rest) = line.strip_prefix("; Assembly listing for method ") {
178                // Flush previous method
179                if let Some(name) = current_name.take() {
180                    methods.push(JitMethod {
181                        name,
182                        code_bytes: current_bytes,
183                        body: std::mem::take(&mut current_body),
184                    });
185                }
186                // Parse method name (strip trailing "(FullOpts)" etc.)
187                let name = rest
188                    .rsplit_once(" (")
189                    .map(|(n, _)| n)
190                    .unwrap_or(rest)
191                    .to_string();
192                current_name = Some(name);
193                current_body.clear();
194                current_bytes = 0;
195            }
196
197            if let Some(rest) = line.strip_prefix("; Total bytes of code") {
198                // Format: "; Total bytes of code 42" or "; Total bytes of code = 42"
199                current_bytes = rest
200                    .trim()
201                    .trim_start_matches('=')
202                    .trim()
203                    .parse()
204                    .unwrap_or(0);
205            }
206
207            if current_name.is_some() {
208                current_body.push_str(line);
209                current_body.push('\n');
210            }
211        }
212
213        // Flush last method
214        if let Some(name) = current_name {
215            methods.push(JitMethod {
216                name,
217                code_bytes: current_bytes,
218                body: current_body,
219            });
220        }
221
222        // Build the inverted call graph. See struct doc for rationale.
223        let mut call_graph: std::collections::HashMap<String, Vec<String>> =
224            std::collections::HashMap::new();
225        for m in &methods {
226            let mut already: std::collections::HashSet<(String, String)> =
227                std::collections::HashSet::new();
228            for line in m.body.lines() {
229                if let Some((fq, short)) = extract_call_target(line) {
230                    // Dedup per-caller so a method calling the same target
231                    // twice doesn't list itself twice in the advert.
232                    if already.insert((fq.clone(), short.clone())) {
233                        call_graph.entry(fq).or_default().push(m.name.clone());
234                        if !call_graph.get(&short).is_some_and(|v| v.contains(&m.name)) {
235                            call_graph.entry(short).or_default().push(m.name.clone());
236                        }
237                    }
238                }
239            }
240        }
241
242        JitIndex { methods, call_graph }
243    }
244
245    /// Callers that reference `pattern` in a `call` instruction. Used
246    /// by the empty-match hint to advertise where an inlined target's
247    /// body can actually be inspected (inside the caller's listing).
248    ///
249    /// Substring match so `MathUtils:Length`, `:Length`, and the full
250    /// `MyNamespace.MathUtils:Length` all resolve.
251    pub fn callers_of(&self, pattern: &str) -> Vec<&str> {
252        let needle = pattern.trim().trim_matches('*');
253        if needle.is_empty() {
254            return Vec::new();
255        }
256        let mut seen = std::collections::HashSet::new();
257        let mut out: Vec<&str> = Vec::new();
258        for (callee, callers) in &self.call_graph {
259            if callee.contains(needle) {
260                for c in callers {
261                    if seen.insert(c.as_str()) {
262                        out.push(c.as_str());
263                    }
264                }
265            }
266        }
267        out.sort_unstable();
268        out
269    }
270
271    /// Filter methods whose name matches a substring (case-sensitive).
272    fn filter(&self, pattern: &str) -> Vec<&JitMethod> {
273        if pattern.is_empty() || pattern == "." {
274            self.methods.iter().collect()
275        } else {
276            self.methods
277                .iter()
278                .filter(|m| m.name.contains(pattern))
279                .collect()
280        }
281    }
282
283    /// `methods [pattern]` — list methods with code sizes, sorted largest first.
284    pub fn cmd_methods(&self, pattern: &str) -> String {
285        let mut matched = self.filter(pattern);
286        matched.sort_by(|a, b| b.code_bytes.cmp(&a.code_bytes));
287        let mut out = String::new();
288        for m in &matched {
289            out.push_str(&format!("{:<60} {} bytes\n", m.name, m.code_bytes));
290        }
291        if out.is_empty() {
292            let callers = self.callers_of(pattern);
293            out.push_str(&empty_match_hint(pattern, &callers));
294        }
295        out
296    }
297
298    /// Produce real disassembly text for `pattern` — the method's own
299    /// body if it was emitted standalone, or the callers' bodies
300    /// (banner-separated) when it was inlined away. Returns `None`
301    /// when neither path has anything: no standalone match, no
302    /// caller references in the capture. Callers decide whether to
303    /// render a hint, bail, etc.
304    ///
305    /// Shared between the interactive REPL and the on-demand
306    /// disasm collector so both surfaces benefit from the
307    /// inlined-parent fallback — previously the collector did a
308    /// plain header scan and bailed with "no matching header",
309    /// making `dbg disasm` from a shell strictly worse than the
310    /// REPL for inlined targets.
311    pub fn disasm_with_parent_fallback(&self, pattern: &str) -> Option<String> {
312        let matched = self.filter(pattern);
313        if !matched.is_empty() {
314            let mut out = String::new();
315            for m in &matched {
316                out.push_str(&m.body);
317                out.push('\n');
318            }
319            return Some(out);
320        }
321
322        let callers = self.callers_of(pattern);
323        if callers.is_empty() {
324            return None;
325        }
326
327        // Cap at the 6 largest caller bodies to keep the output
328        // navigable. Larger methods are more likely to contain the
329        // inlined body in a recognisable form; if the agent wants all
330        // of them, they can ask for each by name.
331        const MAX: usize = 6;
332        let mut caller_methods: Vec<&JitMethod> = callers
333            .iter()
334            .filter_map(|c| self.methods.iter().find(|m| m.name == *c))
335            .collect();
336        caller_methods.sort_by(|a, b| b.code_bytes.cmp(&a.code_bytes));
337        let truncated = caller_methods.len() > MAX;
338        caller_methods.truncate(MAX);
339
340        let mut out = format!(
341            "── `{pattern}` has no standalone body — inlined at every call site. \
342             Showing {} caller listing(s); the inlined body is embedded in each. ──\n\n",
343            caller_methods.len()
344        );
345        for m in &caller_methods {
346            out.push_str(&format!(
347                "════════ parent: {} ════════\n",
348                m.name
349            ));
350            out.push_str(&m.body);
351            out.push('\n');
352        }
353        if truncated {
354            out.push_str(&format!(
355                "\n(… {} more caller(s) omitted; request by name if needed.)\n",
356                callers.len() - MAX
357            ));
358        }
359        Some(out)
360    }
361
362    /// `disasm <pattern>` — REPL command. Thin wrapper over
363    /// `disasm_with_parent_fallback` that renders the generic
364    /// inlining hint when there's nothing useful to show.
365    pub fn cmd_disasm(&self, pattern: &str) -> String {
366        self.disasm_with_parent_fallback(pattern)
367            .unwrap_or_else(|| empty_match_hint(pattern, &self.callers_of(pattern)))
368    }
369
370    /// `search <instruction>` — find methods containing a specific instruction.
371    pub fn cmd_search(&self, pattern: &str) -> String {
372        let mut out = String::new();
373        for m in &self.methods {
374            let hits: Vec<&str> = m
375                .body
376                .lines()
377                .filter(|l| !l.starts_with(';') && l.contains(pattern))
378                .collect();
379            if !hits.is_empty() {
380                out.push_str(&format!("{} ({} hits):\n", m.name, hits.len()));
381                for h in &hits {
382                    out.push_str(&format!("  {}\n", h.trim()));
383                }
384            }
385        }
386        if out.is_empty() {
387            out.push_str("no matches\n");
388        }
389        out
390    }
391
392    /// Extract call targets from a method body.
393    fn extract_calls(body: &str) -> Vec<String> {
394        let mut targets = Vec::new();
395        for line in body.lines() {
396            let trimmed = line.trim();
397            if trimmed.starts_with(';') {
398                continue;
399            }
400            // Match: call [Target] or call Target
401            if let Some(rest) = trimmed.strip_prefix("call") {
402                let rest = rest.trim();
403                // Strip brackets: [Foo:Bar(...)] → Foo:Bar(...)
404                let target = rest
405                    .strip_prefix('[')
406                    .and_then(|s| s.strip_suffix(']'))
407                    .unwrap_or(rest);
408                if !target.is_empty() {
409                    targets.push(target.to_string());
410                }
411            }
412        }
413        targets
414    }
415
416    /// `calls <pattern>` — what does this method call?
417    pub fn cmd_calls(&self, pattern: &str) -> String {
418        let matched = self.filter(pattern);
419        let mut out = String::new();
420        for m in &matched {
421            let targets = Self::extract_calls(&m.body);
422            if targets.is_empty() {
423                out.push_str(&format!("{}: no calls\n", m.name));
424            } else {
425                out.push_str(&format!("{} ({} calls):\n", m.name, targets.len()));
426                for t in &targets {
427                    out.push_str(&format!("  → {}\n", t));
428                }
429            }
430        }
431        if out.is_empty() {
432            out.push_str("no methods found\n");
433        }
434        out
435    }
436
437    /// `callers <pattern>` — who calls this method?
438    pub fn cmd_callers(&self, pattern: &str) -> String {
439        let mut out = String::new();
440        for m in &self.methods {
441            let targets = Self::extract_calls(&m.body);
442            let hits: Vec<&String> = targets.iter().filter(|t| t.contains(pattern)).collect();
443            if !hits.is_empty() {
444                out.push_str(&format!("{} calls it {} time(s):\n", m.name, hits.len()));
445                for t in &hits {
446                    out.push_str(&format!("  → {}\n", t));
447                }
448            }
449        }
450        if out.is_empty() {
451            out.push_str(&format!("no callers found for '{}'\n", pattern));
452        }
453        out
454    }
455
456    /// `stats [pattern]` — summary statistics.
457    pub fn cmd_stats(&self, pattern: &str) -> String {
458        let matched = self.filter(pattern);
459        if matched.is_empty() {
460            return "no methods found\n".into();
461        }
462
463        let total_bytes: u32 = matched.iter().map(|m| m.code_bytes).sum();
464
465        // Collect all non-comment instruction lines
466        let instructions: Vec<&str> = matched
467            .iter()
468            .flat_map(|m| m.body.lines())
469            .filter(|l| !l.starts_with(';') && !l.is_empty())
470            .collect();
471
472        let count = |pats: &[&str]| -> usize {
473            instructions.iter().filter(|l| pats.iter().any(|p| l.contains(p))).count()
474        };
475
476        let avx512 = count(&["zmm"]);
477        let avx2 = count(&["ymm"]);
478        let sse = count(&["xmm"]);
479        let fma = count(&["vfmadd", "vfmsub", "vfnmadd", "vfnmsub"]);
480        let neon = instructions.iter().filter(|l| l.contains("{v") && (l.contains("ld1") || l.contains("st1") || l.contains("fmla") || l.contains("fmul"))).count();
481        let sve = instructions.iter().filter(|l| (l.contains("ld1w") || l.contains("st1w")) && l.contains("z")).count();
482        let bounds = count(&["RNGCHKFAIL"]);
483        let spills = instructions.iter().filter(|l| l.contains("mov") && l.contains("[rsp")).count();
484
485        let label = if pattern.is_empty() || pattern == "." {
486            "--- all methods ---".to_string()
487        } else {
488            format!("--- filter: {} ---", pattern)
489        };
490
491        let mut out = format!("{}\n", label);
492        out.push_str(&format!("Methods:       {}\n", matched.len()));
493        out.push_str(&format!("Total code:    {} bytes\n", total_bytes));
494
495        if avx512 > 0 || avx2 > 0 || sse > 0 {
496            out.push_str(&format!("AVX-512 (zmm): {} instructions\n", avx512));
497            out.push_str(&format!("AVX2 (ymm):    {} instructions\n", avx2));
498            out.push_str(&format!("SSE (xmm):     {} instructions\n", sse));
499        }
500        if neon > 0 || sve > 0 {
501            out.push_str(&format!("NEON:          {} instructions\n", neon));
502            out.push_str(&format!("SVE:           {} instructions\n", sve));
503        }
504        // If no SIMD detected at all, show zeros
505        if avx512 == 0 && avx2 == 0 && sse == 0 && neon == 0 && sve == 0 {
506            out.push_str("SIMD:          none detected\n");
507        }
508        out.push_str(&format!("FMA:           {} instructions\n", fma));
509        out.push_str(&format!("Bounds checks: {}\n", bounds));
510        out.push_str(&format!("Stack spills:  {}\n", spills));
511        out
512    }
513
514    /// `hotspots [N] [pattern]` — top N methods by code size.
515    pub fn cmd_hotspots(&self, n: usize, pattern: &str) -> String {
516        let mut matched = self.filter(pattern);
517        matched.sort_by(|a, b| b.code_bytes.cmp(&a.code_bytes));
518        let mut out = String::new();
519        for m in matched.iter().take(n) {
520            out.push_str(&format!("{:<60} {} bytes\n", m.name, m.code_bytes));
521        }
522        if out.is_empty() {
523            out.push_str("no methods found\n");
524        }
525        out
526    }
527
528    /// `simd [pattern]` — find methods using SIMD instructions,
529    /// optionally scoped to a name-substring filter.
530    pub fn cmd_simd_filtered(&self, pattern: &str) -> String {
531        const SIMD_PATTERNS: &[&str] = &[
532            "vmovups", "vmovaps", "vmulps", "vaddps", "vfmadd", "vdpps",
533            "vxorps", "vperm", "vbroadcast",
534            // ARM NEON
535            "ld1", "st1", "fmla", "fmul.v", "fadd.v",
536        ];
537
538        let methods = self.filter(pattern);
539        let mut out = String::new();
540        for m in &methods {
541            let hits: Vec<&str> = m
542                .body
543                .lines()
544                .filter(|l| {
545                    !l.starts_with(';')
546                        && SIMD_PATTERNS.iter().any(|p| l.contains(p))
547                        && !is_zero_init_xor(l)
548                })
549                .collect();
550            if !hits.is_empty() {
551                out.push_str(&format!("{} ({} hits):\n", m.name, hits.len()));
552                for h in &hits {
553                    out.push_str(&format!("  {}\n", h.trim()));
554                }
555            }
556        }
557        if out.is_empty() {
558            out.push_str("no SIMD instructions found\n");
559        }
560        out
561    }
562}
563
564/// Normalize user-typed verb to the REPL's canonical form. `jitdasm`
565/// is a documented synonym for `disasm`: the scenario instructions and
566/// the top-level `dbg jitdasm <pattern>` command line mirror the
567/// session type, and users followed that naming into the REPL where
568/// only `disasm` was recognized.
569pub(crate) fn canonical_verb(cmd: &str) -> &str {
570    match cmd {
571        "jitdasm" => "disasm",
572        other => other,
573    }
574}
575
576/// Run the interactive REPL. Reads commands from stdin, writes results to stdout.
577pub fn run_repl(asm_path: &str, default_pattern: &str) -> io::Result<()> {
578    let text = std::fs::read_to_string(asm_path)?;
579    let index = JitIndex::parse(&text);
580
581    eprintln!(
582        "--- ready: {} methods captured ---",
583        index.methods.len()
584    );
585    if !default_pattern.is_empty() {
586        eprintln!(
587            "--- default filter: `{}` (stats/simd/hotspots narrow to this) ---",
588            default_pattern
589        );
590    }
591    eprintln!("Type: help");
592
593    let stdin = io::stdin();
594    let mut stdout = io::stdout();
595
596    loop {
597        print!("jitdasm> ");
598        stdout.flush()?;
599
600        let mut line = String::new();
601        if stdin.lock().read_line(&mut line)? == 0 {
602            break; // EOF
603        }
604        let line = line.trim();
605        if line.is_empty() {
606            continue;
607        }
608
609        let parts: Vec<&str> = line.splitn(3, ' ').collect();
610        let cmd = parts[0];
611        let arg1 = parts.get(1).copied().unwrap_or("");
612        let arg2 = parts.get(2).copied().unwrap_or("");
613
614        let pat = if arg2.is_empty() { arg1.to_string() } else { format!("{arg1} {arg2}") };
615
616        // Default summary-style commands to the session's pattern
617        // when the user didn't pass one. Explicit args always win.
618        let stats_arg = if arg1.is_empty() { default_pattern } else { arg1 };
619        let methods_arg = if arg1.is_empty() { default_pattern } else { arg1 };
620        let hotspots_arg = if arg2.is_empty() { default_pattern } else { arg2 };
621
622        let cmd = canonical_verb(cmd);
623
624        let result = match cmd {
625            "methods" => index.cmd_methods(methods_arg),
626            "disasm" if arg1.is_empty() && !default_pattern.is_empty() => {
627                index.cmd_disasm(default_pattern)
628            }
629            "disasm" if arg1.is_empty() => "usage: disasm <pattern>\n".into(),
630            "disasm" => index.cmd_disasm(&pat),
631            "search" if arg1.is_empty() => "usage: search <instruction>\n".into(),
632            "search" => index.cmd_search(arg1),
633            "stats" => index.cmd_stats(stats_arg),
634            "calls" if arg1.is_empty() => "usage: calls <pattern>\n".into(),
635            "calls" => index.cmd_calls(arg1),
636            "callers" if arg1.is_empty() => "usage: callers <pattern>\n".into(),
637            "callers" => index.cmd_callers(arg1),
638            "hotspots" => {
639                let n: usize = arg1.parse().unwrap_or(10);
640                index.cmd_hotspots(n, hotspots_arg)
641            }
642            "simd" => index.cmd_simd_filtered(default_pattern),
643            "help" => {
644                "jitdasm commands:\n  \
645                 methods [pattern]    list methods with code sizes (sorted by size)\n  \
646                 disasm <pattern>     show full disassembly for matching methods\n  \
647                 search <instruction> find methods containing an instruction\n  \
648                 stats [pattern]      summary stats — scope to method, class, or namespace\n  \
649                 calls <pattern>      what does this method call?\n  \
650                 callers <pattern>    who calls this method?\n  \
651                 hotspots [N] [pat]   top N methods by code size (default 10)\n  \
652                 simd                 find all methods using SIMD instructions\n  \
653                 help                 show this help\n  \
654                 exit                 quit\n"
655                    .into()
656            }
657            "exit" | "quit" => {
658                // Leaving the REPL does NOT kill the daemon; the
659                // session keeps the capture file and any child
660                // subprocesses alive. Agents (and humans) routinely
661                // forget this and move on, leaving leaked state that
662                // confuses the next `dbg start`. Surface the reminder
663                // at the exit boundary — it's the last thing they see.
664                println!(
665                    "\nREPL closed. The dbg session is still running in the background.\n\
666                     Run `dbg kill` now to release the capture file and any subprocesses."
667                );
668                break;
669            }
670            _ => format!("unknown command: {}. Type 'help' for available commands.\n", cmd),
671        };
672
673        print!("{}", result);
674        stdout.flush()?;
675    }
676
677    Ok(())
678}
679
680#[cfg(test)]
681mod tests {
682    use super::*;
683
684    const SAMPLE: &str = include_str!("../tests/fixtures/jitdasm_sample.asm");
685
686    /// Regression: `dbg jitdasm <pattern>` inside a jitdasm session
687    /// returned "unknown command" because the REPL only recognised
688    /// `disasm`. The scenario instructions and top-level `dbg jitdasm`
689    /// verb both suggest the user-facing name, so the REPL now accepts
690    /// `jitdasm` as a synonym.
691    #[test]
692    fn empty_match_hint_plain_for_wildcard() {
693        // Bare `*` or empty means "whole capture is empty" — a
694        // different problem (no methods compiled at all), so no
695        // inlining hint.
696        assert_eq!(empty_match_hint("*", &[]), "no methods found\n");
697        assert_eq!(empty_match_hint("", &[]), "no methods found\n");
698        assert_eq!(empty_match_hint("  ", &[]), "no methods found\n");
699    }
700
701    #[test]
702    fn empty_match_hint_generic_when_no_callers_known() {
703        let msg = empty_match_hint("FlatLongIntMap:TryGetValue", &[]);
704        assert!(msg.contains("inlined"), "hint should mention inlining: {msg}");
705        assert!(msg.contains("search"), "hint should suggest `search`: {msg}");
706        assert!(
707            msg.contains("caller"),
708            "hint should tell the agent to look at the caller: {msg}"
709        );
710        assert!(
711            msg.contains("disasm <parent-method>"),
712            "hint should spell out the parent-disasm step: {msg}"
713        );
714        assert!(
715            msg.contains("NoInlining"),
716            "hint should mention NoInlining escape hatch: {msg}"
717        );
718        assert!(
719            msg.contains("FlatLongIntMap:TryGetValue"),
720            "hint should echo the pattern: {msg}"
721        );
722    }
723
724    #[test]
725    fn empty_match_hint_advertises_known_callers() {
726        let msg = empty_match_hint(
727            "MathUtils:Length",
728            &["MyNamespace.MathUtils:Normalize(float[]):float[]"],
729        );
730        assert!(
731            msg.contains("Known callers"),
732            "hint should label the caller list: {msg}"
733        );
734        assert!(
735            msg.contains("MathUtils:Normalize"),
736            "hint should name the actual caller: {msg}"
737        );
738        assert!(
739            msg.contains("disasm MyNamespace.MathUtils:Normalize"),
740            "hint should include a ready-to-run `disasm <caller>` command: {msg}"
741        );
742        // Preemptive-mode hint shouldn't belabour the NoInlining escape
743        // hatch — the agent has a concrete next step.
744        assert!(
745            !msg.contains("NoInlining"),
746            "preemptive hint should skip the last-resort escape hatch: {msg}"
747        );
748    }
749
750    #[test]
751    fn extract_call_target_parses_managed_call() {
752        let (fq, short) = extract_call_target(
753            "       call     [MyNamespace.SimdOps:DotProduct(System.ReadOnlySpan`1[float],System.ReadOnlySpan`1[float]):float]",
754        )
755        .unwrap();
756        assert_eq!(fq, "MyNamespace.SimdOps:DotProduct");
757        assert_eq!(short, "SimdOps:DotProduct");
758    }
759
760    #[test]
761    fn extract_call_target_ignores_runtime_helpers() {
762        assert!(extract_call_target("       call     CORINFO_HELP_RNGCHKFAIL").is_none());
763        assert!(extract_call_target("       call     qword ptr [rax+0x10]").is_none());
764        // Must not match lines that merely contain "call" as a substring.
765        assert!(extract_call_target("       mov      rax, callable_ptr").is_none());
766    }
767
768    #[test]
769    fn call_graph_maps_inlined_callees_to_their_callers() {
770        // Fixture scenario: `MyNamespace.MathUtils:Length` is called
771        // inside `MathUtils:Normalize` but has no standalone listing.
772        // Exactly the inlinee → parent case we want to advertise.
773        let idx = JitIndex::parse(SAMPLE);
774        let callers = idx.callers_of("MathUtils:Length");
775        assert!(
776            callers.iter().any(|c| c.contains("MathUtils:Normalize")),
777            "MathUtils:Length should be advertised as called-by MathUtils:Normalize, got {callers:?}"
778        );
779    }
780
781    #[test]
782    fn cmd_disasm_of_inlined_method_emits_parent_body() {
783        // The central behavior: asking for an inlined method's disasm
784        // should transparently show the parent's disassembly (where
785        // the inlined body actually lives), not just name the parent
786        // and make the agent run a second command.
787        let idx = JitIndex::parse(SAMPLE);
788        let out = idx.cmd_disasm("MathUtils:Length");
789
790        // Banner makes the substitution explicit.
791        assert!(
792            out.contains("no standalone body"),
793            "output should flag that the target is inlined: {out}"
794        );
795        assert!(
796            out.contains("parent:") && out.contains("MathUtils:Normalize"),
797            "output should label the parent being shown: {out}"
798        );
799
800        // The parent's actual disassembly must be present, not just
801        // its name. `Normalize`'s body in the fixture contains this
802        // call instruction.
803        assert!(
804            out.contains("call     [MyNamespace.MathUtils:Length"),
805            "output should embed the parent's real disasm body, not a summary: {out}"
806        );
807    }
808
809    #[test]
810    fn cmd_disasm_falls_back_to_generic_hint_when_no_callers() {
811        let idx = JitIndex::parse(SAMPLE);
812        // A name that is neither a standalone method nor referenced by
813        // any call instruction → no call-graph entry, generic hint.
814        let out = idx.cmd_disasm("GhostMethod:Nope");
815        assert!(
816            out.contains("no methods found") || out.contains("NoInlining"),
817            "should fall back to the generic hint: {out}"
818        );
819        assert!(
820            !out.contains("parent:"),
821            "must not fabricate parent listings: {out}"
822        );
823    }
824
825    #[test]
826    fn canonical_verb_maps_jitdasm_to_disasm() {
827        assert_eq!(canonical_verb("jitdasm"), "disasm");
828        assert_eq!(canonical_verb("disasm"), "disasm");
829        assert_eq!(canonical_verb("methods"), "methods");
830        assert_eq!(canonical_verb("garbage"), "garbage");
831    }
832
833    #[test]
834    fn parse_finds_all_methods() {
835        let idx = JitIndex::parse(SAMPLE);
836        assert_eq!(idx.methods.len(), 4);
837    }
838
839    #[test]
840    fn parse_method_names() {
841        let idx = JitIndex::parse(SAMPLE);
842        let names: Vec<&str> = idx.methods.iter().map(|m| m.name.as_str()).collect();
843        assert!(names.iter().any(|n| n.contains("DotProduct") && !n.contains("Scalar")));
844        assert!(names.iter().any(|n| n.contains("ScalarDotProduct")));
845        assert!(names.iter().any(|n| n.contains("Normalize")));
846        assert!(names.iter().any(|n| n.contains("Pipeline:Run")));
847    }
848
849    #[test]
850    fn parse_code_bytes() {
851        let idx = JitIndex::parse(SAMPLE);
852        let dot = idx.methods.iter().find(|m| m.name.contains("DotProduct") && !m.name.contains("Scalar")).unwrap();
853        assert_eq!(dot.code_bytes, 250);
854        let scalar = idx.methods.iter().find(|m| m.name.contains("ScalarDotProduct")).unwrap();
855        assert_eq!(scalar.code_bytes, 96);
856        let norm = idx.methods.iter().find(|m| m.name.contains("Normalize")).unwrap();
857        assert_eq!(norm.code_bytes, 64);
858        let pipeline = idx.methods.iter().find(|m| m.name.contains("Pipeline")).unwrap();
859        assert_eq!(pipeline.code_bytes, 48);
860    }
861
862    #[test]
863    fn cmd_methods_lists_all() {
864        let idx = JitIndex::parse(SAMPLE);
865        let out = idx.cmd_methods("");
866        assert!(out.contains("DotProduct"));
867        assert!(out.contains("ScalarDotProduct"));
868        assert!(out.contains("Normalize"));
869        assert!(out.contains("Pipeline:Run"));
870        assert!(out.contains("250 bytes"));
871        assert!(out.contains("96 bytes"));
872        assert!(out.contains("64 bytes"));
873        assert!(out.contains("48 bytes"));
874    }
875
876    #[test]
877    fn cmd_simd_filtered_narrows_to_pattern() {
878        // Regression: `simd` used to scan every captured method,
879        // so scoping it (via the REPL default pattern) was impossible.
880        let idx = JitIndex::parse(SAMPLE);
881        let narrow = idx.cmd_simd_filtered("DotProduct");
882        let wide = idx.cmd_simd_filtered("");
883        // Narrowed output must be a strict subset of the wide output.
884        assert!(wide.len() >= narrow.len(), "wide should be >= narrow");
885        // Narrow must not mention methods outside the filter.
886        assert!(!narrow.contains("Normalize"), "narrow leaked Normalize:\n{narrow}");
887        assert!(!narrow.contains("Pipeline:Run"), "narrow leaked Pipeline:\n{narrow}");
888    }
889
890    #[test]
891    fn cmd_stats_narrows_by_method_token() {
892        // `:DotProduct` should match only the SimdOps:DotProduct
893        // method listing, not ScalarDotProduct.
894        let idx = JitIndex::parse(SAMPLE);
895        let out = idx.cmd_stats(":DotProduct");
896        assert!(out.contains("filter:"), "expected filter label: {out}");
897        assert!(out.contains("Methods:       1"), "expected 1 method:\n{out}");
898    }
899
900    #[test]
901    fn cmd_methods_filtered_by_class() {
902        let idx = JitIndex::parse(SAMPLE);
903        let out = idx.cmd_methods("SimdOps");
904        assert!(out.contains("DotProduct"));
905        assert!(out.contains("ScalarDotProduct"));
906        assert!(!out.contains("Normalize"));
907        assert!(!out.contains("Pipeline"));
908    }
909
910    #[test]
911    fn cmd_stats_all() {
912        let idx = JitIndex::parse(SAMPLE);
913        let out = idx.cmd_stats("");
914        assert!(out.contains("Methods:       4"));
915        assert!(out.contains("Total code:    458 bytes"));
916        assert!(out.contains("Bounds checks: 2"));
917    }
918
919    #[test]
920    fn cmd_stats_filtered_by_class() {
921        let idx = JitIndex::parse(SAMPLE);
922        let out = idx.cmd_stats("SimdOps");
923        assert!(out.contains("Methods:       2"));
924        assert!(out.contains("Total code:    346 bytes"));
925    }
926
927    #[test]
928    fn cmd_stats_filtered_by_method() {
929        let idx = JitIndex::parse(SAMPLE);
930        let out = idx.cmd_stats("Normalize");
931        assert!(out.contains("Methods:       1"));
932        assert!(out.contains("Total code:    64 bytes"));
933    }
934
935    #[test]
936    fn cmd_stats_filtered_by_namespace() {
937        let idx = JitIndex::parse(SAMPLE);
938        let out = idx.cmd_stats("MyNamespace");
939        assert!(out.contains("Methods:       4"));
940    }
941
942    #[test]
943    fn cmd_disasm_specific_method() {
944        let idx = JitIndex::parse(SAMPLE);
945        let out = idx.cmd_disasm("ScalarDotProduct");
946        assert!(out.contains("ScalarDotProduct"));
947        assert!(out.contains("vxorps   xmm0"));
948        assert!(!out.contains("vmovups")); // from DotProduct only
949    }
950
951    #[test]
952    fn cmd_search_instruction() {
953        let idx = JitIndex::parse(SAMPLE);
954        let out = idx.cmd_search("RNGCHKFAIL");
955        assert!(out.contains("DotProduct"));
956        assert!(out.contains("ScalarDotProduct"));
957        assert!(!out.contains("Normalize"));
958    }
959
960    #[test]
961    fn cmd_search_spills() {
962        let idx = JitIndex::parse(SAMPLE);
963        let out = idx.cmd_search("[rsp");
964        assert!(out.contains("Normalize"));
965    }
966
967    #[test]
968    fn cmd_hotspots_returns_sorted() {
969        let idx = JitIndex::parse(SAMPLE);
970        let out = idx.cmd_hotspots(10, "");
971        let lines: Vec<&str> = out.lines().collect();
972        assert!(lines[0].contains("250")); // DotProduct first (largest)
973    }
974
975    #[test]
976    fn cmd_simd_finds_vectorized() {
977        let idx = JitIndex::parse(SAMPLE);
978        let out = idx.cmd_simd_filtered("");
979        assert!(out.contains("DotProduct"));
980        assert!(out.contains("vmovups"));
981        assert!(out.contains("vmulps"));
982    }
983
984    #[test]
985    fn cmd_simd_ignores_vxorps_zero_init_idiom() {
986        // Regression: `vxorps xmm0, xmm0, xmm0` is the .NET JIT's
987        // standard zero-initialization preamble — it doesn't imply the
988        // method is vectorized. Counting it as a SIMD hit made scalar
989        // methods look indistinguishable from AVX hot loops in `simd`
990        // output, which was the opposite of the command's purpose.
991        let asm = "\
992; Assembly listing for method Broken.Program:SumSlow(System.Int32[]):int (Tier1)
993G_M1_IG01:
994            vxorps   xmm0, xmm0, xmm0
995            xor      eax, eax
996            mov      edx, dword ptr [rcx+0x08]
997            test     edx, edx
998            jle      SHORT G_M1_IG03
999G_M1_IG02:
1000            add      eax, dword ptr [rcx+4*r8+0x10]
1001            inc      r8d
1002            cmp      r9d, r8d
1003            jl       SHORT G_M1_IG02
1004G_M1_IG03:
1005            ret
1006
1007; Total bytes of code: 40
1008";
1009        let idx = JitIndex::parse(asm);
1010        let out = idx.cmd_simd_filtered("");
1011        assert!(
1012            out.contains("no SIMD instructions found"),
1013            "vxorps zero-init should not count as SIMD, got:\n{out}"
1014        );
1015        // The generic xmm-register counter on `stats` is still fine
1016        // showing xmm usage, but the top-level SIMD hit list must be
1017        // limited to *real* vector compute/IO.
1018    }
1019
1020    // --- calls / callers ---
1021
1022    #[test]
1023    fn cmd_calls_shows_targets() {
1024        let idx = JitIndex::parse(SAMPLE);
1025        let out = idx.cmd_calls("Pipeline");
1026        assert!(out.contains("Pipeline:Run"));
1027        assert!(out.contains("→ MyNamespace.MathUtils:Normalize"));
1028        assert!(out.contains("→ MyNamespace.SimdOps:DotProduct"));
1029        assert!(out.contains("→ MyNamespace.SimdOps:ScalarDotProduct"));
1030        assert!(out.contains("3 calls"));
1031    }
1032
1033    #[test]
1034    fn cmd_calls_normalize() {
1035        let idx = JitIndex::parse(SAMPLE);
1036        let out = idx.cmd_calls("Normalize");
1037        assert!(out.contains("→ MyNamespace.SimdOps:DotProduct"));
1038        assert!(out.contains("→ MyNamespace.MathUtils:Length"));
1039        assert!(out.contains("2 calls"));
1040    }
1041
1042    #[test]
1043    fn cmd_calls_no_calls() {
1044        let idx = JitIndex::parse(SAMPLE);
1045        // ScalarDotProduct only calls CORINFO_HELP_RNGCHKFAIL (a JIT helper, not a method)
1046        let out = idx.cmd_calls("ScalarDotProduct");
1047        assert!(out.contains("1 call")); // RNGCHKFAIL
1048    }
1049
1050    #[test]
1051    fn cmd_callers_dotproduct() {
1052        let idx = JitIndex::parse(SAMPLE);
1053        let out = idx.cmd_callers("DotProduct");
1054        // Normalize and Pipeline both call DotProduct
1055        assert!(out.contains("Normalize"));
1056        assert!(out.contains("Pipeline:Run"));
1057    }
1058
1059    #[test]
1060    fn cmd_callers_normalize() {
1061        let idx = JitIndex::parse(SAMPLE);
1062        let out = idx.cmd_callers("Normalize");
1063        // Only Pipeline calls Normalize
1064        assert!(out.contains("Pipeline:Run"));
1065        assert!(!out.contains("DotProduct"));
1066    }
1067
1068    #[test]
1069    fn cmd_callers_nobody() {
1070        let idx = JitIndex::parse(SAMPLE);
1071        let out = idx.cmd_callers("Pipeline");
1072        assert!(out.contains("no callers found"));
1073    }
1074
1075    #[test]
1076    fn extract_calls_strips_brackets() {
1077        let body = "       call     [Foo:Bar(int):void]\n       call     CORINFO_HELP_RNGCHKFAIL\n";
1078        let calls = JitIndex::extract_calls(body);
1079        assert_eq!(calls.len(), 2);
1080        assert_eq!(calls[0], "Foo:Bar(int):void");
1081        assert_eq!(calls[1], "CORINFO_HELP_RNGCHKFAIL");
1082    }
1083
1084    #[test]
1085    fn extract_calls_skips_comments() {
1086        let body = "; call this is a comment\n       call     [Real:Call()]\n";
1087        let calls = JitIndex::extract_calls(body);
1088        assert_eq!(calls.len(), 1);
1089        assert_eq!(calls[0], "Real:Call()");
1090    }
1091
1092    #[test]
1093    fn parse_code_bytes_with_equals_format() {
1094        // Some .NET versions emit "; Total bytes of code = 42"
1095        let asm = "; Assembly listing for method Foo:Bar()\n\
1096                    ; Emitting BLENDED_CODE for X64\n\n\
1097                    push rbp\n\
1098                    ret\n\
1099                    ; Total bytes of code = 42\n";
1100        let idx = JitIndex::parse(asm);
1101        assert_eq!(idx.methods.len(), 1);
1102        assert_eq!(idx.methods[0].code_bytes, 42);
1103    }
1104
1105    #[test]
1106    fn parse_code_bytes_plain_format() {
1107        // Standard format: "; Total bytes of code 250"
1108        let asm = "; Assembly listing for method Foo:Baz()\n\n\
1109                    nop\n\
1110                    ; Total bytes of code 250\n";
1111        let idx = JitIndex::parse(asm);
1112        assert_eq!(idx.methods.len(), 1);
1113        assert_eq!(idx.methods[0].code_bytes, 250);
1114    }
1115}