Skip to main content

wasmtime_cli/commands/
hot_blocks.rs

1//! Implementation of the `wasmtime hot-blocks` subcommand.
2
3use crate::common::{RunCommon, RunTarget};
4use capstone::arch::BuildsCapstone;
5use clap::Parser;
6use std::borrow::Cow;
7use std::collections::BTreeMap;
8use std::io::{self, BufWriter, Write};
9use std::path::{Path, PathBuf};
10use std::process::Command;
11use std::str::FromStr;
12use tempfile::tempdir;
13use wasmtime::{
14    CodeBuilder, CodeHint, Engine, FuncIndex, ModuleFunction, Result, StaticModuleIndex, bail,
15    error::Context as _, format_err,
16};
17
18/// Profile a WebAssembly module or component's execution and print the hottest
19/// basic blocks.
20///
21/// This command compiles the given Wasm module/component, runs it under `perf
22/// record`, and then analyzes the resulting profile to find the hottest basic
23/// blocks in the compiled code. Each basic block is printed with its assembly,
24/// CLIF IR, and original Wasm instructions.
25///
26/// This subcommand is only available on Linux.
27#[derive(Parser)]
28#[command(name = "hot-blocks")]
29pub struct HotBlocksCommand {
30    #[command(flatten)]
31    run: RunCommon,
32
33    /// Print the hottest basic blocks that cover at least this percent of
34    /// total execution samples.
35    ///
36    /// Must be a number between 0 and 100 inclusive.
37    #[clap(short, long, default_value = "50")]
38    percent: f64,
39
40    /// The kind of perf event to record.
41    #[clap(short, long, value_enum, default_value = "cpu-cycles")]
42    event: Event,
43
44    /// The sampling frequency to use with `perf record -F`.
45    ///
46    /// Higher values give more samples but may slow execution.
47    #[clap(short = 'F', long)]
48    frequency: Option<u64>,
49
50    /// The file to write the output to. When omitted, output goes to stdout.
51    #[clap(short, long)]
52    output: Option<PathBuf>,
53
54    /// The WebAssembly module or component to profile.
55    #[arg(required = true, value_name = "MODULE")]
56    module: PathBuf,
57
58    /// Arguments to pass to the WebAssembly module.
59    #[arg(trailing_var_arg = true)]
60    module_args: Vec<String>,
61}
62
63/// The kind of perf event to record.
64#[derive(Clone, Debug, clap::ValueEnum)]
65pub enum Event {
66    /// Record instructions retired.
67    ///
68    /// Corresponds to `perf record -e instructions`.
69    Instructions,
70    /// Record CPU cycles.
71    ///
72    /// Corresponds to `perf record -e cpu-cycles`.
73    CpuCycles,
74}
75
76impl Event {
77    fn perf_event(&self) -> &str {
78        match self {
79            Event::Instructions => "instructions",
80            Event::CpuCycles => "cpu-cycles",
81        }
82    }
83}
84
85/// A zero-based index into the list of basic blocks for a function.
86#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
87struct BlockIndex(usize);
88
89/// A byte offset into a function's compiled code.
90#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
91struct FunctionOffset(usize);
92
93impl HotBlocksCommand {
94    /// Executes the command.
95    pub fn execute(mut self) -> Result<()> {
96        self.run.common.init_logging()?;
97
98        if !(0.0..=100.0).contains(&self.percent) {
99            bail!("--percent must be between 0 and 100 inclusive");
100        }
101
102        // Ensure address maps are enabled (error if explicitly disabled).
103        if self.run.common.debug.address_map == Some(false) {
104            bail!(
105                "address maps must be enabled for hot-blocks profiling; do not pass -Daddress-map=n"
106            );
107        }
108        self.run.common.debug.address_map = Some(true);
109
110        let tmp_dir = tempdir().context("failed to create temp directory")?;
111
112        // Compile the input Wasm to a .cwasm, emitting CLIF to the temp dir.
113        let clif_dir = tmp_dir.path().join("clif");
114        std::fs::create_dir(&clif_dir)?;
115        let cwasm_path = tmp_dir.path().join("module.cwasm");
116
117        let wasm_bytes =
118            Cow::Owned(std::fs::read(&self.module).with_context(|| {
119                format!("failed to read Wasm module: {}", self.module.display())
120            })?);
121        #[cfg(feature = "wat")]
122        let wasm_bytes = wat::parse_bytes(&wasm_bytes).map_err(|mut e| {
123            e.set_path(&self.module);
124            e
125        })?;
126
127        let engine = self.compile_to_cwasm(&clif_dir, &cwasm_path, &wasm_bytes)?;
128
129        // Run perf record.
130        let perf_data_path = tmp_dir.path().join("perf.data");
131        self.run_perf_record(&cwasm_path, &perf_data_path)?;
132
133        // Run perf script and parse samples.
134        let (samples, total_samples) = self.run_perf_script(&perf_data_path)?;
135
136        let target = match self.run.common.target.as_deref() {
137            None => target_lexicon::Triple::host(),
138            Some(t) => target_lexicon::Triple::from_str(t)?,
139        };
140
141        // Build the WAT offset map using wasmprinter.
142        let wat_map = build_wat_offset_map(&wasm_bytes);
143
144        // Deserialize the cwasm to extract functions, text, and address map.
145        self.run.allow_precompiled = true;
146        let run_target = self.run.load_module(&engine, &cwasm_path, None)?;
147        let (functions, text, address_map) = match &run_target {
148            RunTarget::Core(module) => (
149                module.functions().collect::<Vec<_>>(),
150                module.text(),
151                module
152                    .address_map()
153                    .ok_or_else(|| {
154                        format_err!("address maps are not available in the compiled module")
155                    })?
156                    .collect::<Vec<_>>(),
157            ),
158            #[cfg(feature = "component-model")]
159            RunTarget::Component(component) => (
160                component.functions().collect::<Vec<_>>(),
161                component.text(),
162                component
163                    .address_map()
164                    .ok_or_else(|| {
165                        format_err!("address maps are not available in the compiled component")
166                    })?
167                    .collect::<Vec<_>>(),
168            ),
169        };
170
171        let mut output: Box<dyn Write> = match &self.output {
172            Some(path) => {
173                let file = std::fs::File::create(path)
174                    .with_context(|| format!("failed to create output file: {}", path.display()))?;
175                Box::new(BufWriter::new(file))
176            }
177            None => Box::new(io::stdout()),
178        };
179
180        self.format_hot_blocks(
181            &samples,
182            total_samples,
183            &functions,
184            &text,
185            &address_map,
186            &clif_dir,
187            &wat_map,
188            &target,
189            &mut *output,
190        )?;
191
192        Ok(())
193    }
194
195    /// Compile the input Wasm bytes to a `.cwasm` file, emitting CLIF to `clif_dir`.
196    ///
197    /// Returns the engine used for compilation.
198    fn compile_to_cwasm(
199        &mut self,
200        clif_dir: &Path,
201        cwasm_path: &Path,
202        wasm_bytes: &[u8],
203    ) -> Result<Engine> {
204        let mut config = self.run.common.config(None)?;
205        config.emit_clif(clif_dir);
206
207        let engine = Engine::new(&config)?;
208
209        let mut code = CodeBuilder::new(&engine);
210        code.wasm_binary_or_text(wasm_bytes, Some(&self.module))?;
211
212        let serialized = match code.hint() {
213            #[cfg(feature = "component-model")]
214            Some(CodeHint::Component) => code.compile_component_serialized()?,
215            #[cfg(not(feature = "component-model"))]
216            Some(CodeHint::Component) => {
217                bail!("component model support was disabled at compile time")
218            }
219            Some(CodeHint::Module) | None => code.compile_module_serialized()?,
220        };
221        std::fs::write(cwasm_path, &serialized)
222            .with_context(|| format!("failed to write cwasm: {}", cwasm_path.display()))?;
223
224        Ok(engine)
225    }
226
227    /// Run `perf record` on the compiled `.cwasm` file.
228    fn run_perf_record(&self, cwasm_path: &Path, perf_data_path: &Path) -> Result<()> {
229        let current_exe =
230            std::env::current_exe().context("failed to determine current executable")?;
231
232        let mut perf_cmd = Command::new("perf");
233        perf_cmd
234            .arg("record")
235            .arg("-e")
236            .arg(self.event.perf_event())
237            .arg("--no-buildid")
238            .arg("-o")
239            .arg(perf_data_path);
240
241        if let Some(freq) = self.frequency {
242            perf_cmd.arg("-F").arg(freq.to_string());
243        }
244
245        perf_cmd
246            .arg("--")
247            .arg(&current_exe)
248            .arg("run")
249            .arg("--allow-precompiled")
250            .arg("--profile=perfmap");
251
252        // Forward all RunCommon flags to the nested `wasmtime run` subprocess.
253        for arg in self.run.to_string().split_whitespace() {
254            perf_cmd.arg(arg);
255        }
256
257        perf_cmd.arg(cwasm_path.as_os_str());
258        for arg in &self.module_args {
259            perf_cmd.arg(arg);
260        }
261
262        let perf_output = perf_cmd
263            .output()
264            .context("failed to run `perf record`; is `perf` installed?")?;
265        if !perf_output.status.success() {
266            let stderr = String::from_utf8_lossy(&perf_output.stderr);
267            bail!("perf record failed:\n{stderr}");
268        }
269
270        Ok(())
271    }
272
273    /// Run `perf script` and parse the output into samples.
274    fn run_perf_script(&self, perf_data_path: &Path) -> Result<(Vec<PerfSample>, usize)> {
275        let perf_script_output = Command::new("perf")
276            .arg("script")
277            .arg("-i")
278            .arg(perf_data_path)
279            .arg("-F")
280            .arg("ip,sym,symoff,dso")
281            .output()
282            .context("failed to run `perf script`")?;
283        if !perf_script_output.status.success() {
284            let stderr = String::from_utf8_lossy(&perf_script_output.stderr);
285            bail!("perf script failed:\n{stderr}");
286        }
287
288        let script_text = String::from_utf8_lossy(&perf_script_output.stdout);
289        Ok(parse_perf_script(&script_text))
290    }
291
292    /// Format hot blocks output.
293    fn format_hot_blocks(
294        &self,
295        samples: &[PerfSample],
296        total_samples: usize,
297        functions: &[ModuleFunction],
298        text: &[u8],
299        address_map: &[(usize, Option<u32>)],
300        clif_dir: &Path,
301        wat_map: &BTreeMap<u32, String>,
302        target: &target_lexicon::Triple,
303        output: &mut dyn Write,
304    ) -> Result<()> {
305        let wasm_samples = samples.len();
306        writeln!(
307            output,
308            "Collected {total_samples} total samples; {wasm_samples} ({:.2}%) Wasm samples.",
309            wasm_samples as f64 / total_samples as f64 * 100.0,
310        )?;
311        writeln!(output)?;
312
313        if wasm_samples == 0 {
314            writeln!(output, "No samples collected within WebAssembly code.")?;
315            return Ok(());
316        }
317
318        // Build a map from (module, func_index) to &ModuleFunction for fast lookups.
319        let func_map: BTreeMap<(StaticModuleIndex, FuncIndex), &ModuleFunction> =
320            functions.iter().map(|f| ((f.module, f.index), f)).collect();
321
322        // For each function that has samples, build basic blocks lazily.
323        let mut func_blocks: BTreeMap<(StaticModuleIndex, FuncIndex), Vec<BasicBlock>> =
324            BTreeMap::new();
325
326        // Count samples per (module, func_index, block_index).
327        let mut block_samples: BTreeMap<(StaticModuleIndex, FuncIndex, BlockIndex), u64> =
328            BTreeMap::new();
329        // Also count samples per (module, func_index, block_index, offset_in_func).
330        let mut inst_samples: BTreeMap<
331            (StaticModuleIndex, FuncIndex, BlockIndex, FunctionOffset),
332            u64,
333        > = BTreeMap::new();
334
335        for sample in samples {
336            let Some(func) = find_function_for_sample(sample, functions) else {
337                continue;
338            };
339            let key = (func.module, func.index);
340
341            // Lazily build basic blocks for this function.
342            let blocks = func_blocks.entry(key).or_insert_with(|| {
343                let body = &text[func.offset..][..func.len];
344                let clif_lines =
345                    read_clif_file(clif_dir, func.module, func.index, func.name.as_deref());
346                build_basic_blocks(body, func.offset, address_map, &clif_lines, wat_map, target)
347                    .unwrap_or_default()
348            });
349
350            let offset_in_func = FunctionOffset(usize::try_from(sample.offset).unwrap());
351            if let Some(block_idx) = find_block_for_offset(blocks, offset_in_func) {
352                *block_samples.entry((key.0, key.1, block_idx)).or_default() += 1;
353                *inst_samples
354                    .entry((key.0, key.1, block_idx, offset_in_func))
355                    .or_default() += 1;
356            }
357        }
358
359        // Sort by most samples to least.
360        let mut sorted_blocks: Vec<_> = block_samples.into_iter().collect();
361        sorted_blocks.sort_by(|a, b| b.1.cmp(&a.1));
362
363        let total_f64 = total_samples as f64;
364
365        // Print hot blocks until we reach the percent threshold.
366        let mut samples_printed: u64 = 0;
367        for ((mod_idx, func_idx, block_idx), block_sample_count) in &sorted_blocks {
368            let percent_printed = samples_printed as f64 / total_f64 * 100.0;
369            if percent_printed >= self.percent {
370                break;
371            }
372
373            let block_percent = *block_sample_count as f64 / total_f64 * 100.0;
374
375            // Look up the function name from the map.
376            let func_name = func_map
377                .get(&(*mod_idx, *func_idx))
378                .and_then(|f| f.name.clone())
379                .unwrap_or_else(|| {
380                    format!(
381                        "wasm[{}]::function[{}]",
382                        mod_idx.as_u32(),
383                        func_idx.as_u32()
384                    )
385                });
386
387            let blocks = func_blocks.get(&(*mod_idx, *func_idx)).unwrap();
388            let block = &blocks[block_idx.0];
389
390            // Trim leading instructions that have no samples.
391            let first_sampled = block
392                .instructions
393                .iter()
394                .position(|inst| {
395                    inst_samples
396                        .get(&(
397                            *mod_idx,
398                            *func_idx,
399                            *block_idx,
400                            FunctionOffset(inst.offset_in_func),
401                        ))
402                        .copied()
403                        .unwrap_or(0)
404                        > 0
405                })
406                .unwrap_or(0);
407            let visible_instructions = &block.instructions[first_sampled..];
408
409            writeln!(
410                output,
411                "`{func_name}` :: block {:#x} :: {block_percent:.2}% total samples",
412                block.instructions[first_sampled].offset_in_func,
413            )?;
414            writeln!(output)?;
415
416            // Calculate column widths.
417            let max_asm_len = visible_instructions
418                .iter()
419                .map(|i| i.assembly.len())
420                .max()
421                .unwrap_or(10);
422            let max_clif_len = visible_instructions
423                .iter()
424                .map(|i| i.clif.as_ref().map_or(1, |c| c.len()))
425                .max()
426                .unwrap_or(6);
427
428            let asm_width = max_asm_len.clamp(10, 60);
429            let clif_width = max_clif_len.clamp(6, 40);
430
431            writeln!(
432                output,
433                "{:>10}   {:<asm_width$}   {:<clif_width$}   {}",
434                "[Samples]", "[Assembly]", "[CLIF]", "[Wasm]"
435            )?;
436
437            let mut prev_clif: Option<(&str, Option<u32>)> = None;
438            let mut prev_wasm: Option<(&str, Option<u32>)> = None;
439
440            for inst in visible_instructions {
441                let sample_count = inst_samples
442                    .get(&(
443                        *mod_idx,
444                        *func_idx,
445                        *block_idx,
446                        FunctionOffset(inst.offset_in_func),
447                    ))
448                    .copied()
449                    .unwrap_or(0);
450
451                let sample_str = if sample_count > 0 {
452                    format!("{:.2}%", sample_count as f64 / total_f64 * 100.0)
453                } else {
454                    String::new()
455                };
456
457                let asm_str = &inst.assembly[..inst.assembly.len().min(asm_width)];
458
459                // Determine CLIF display, using ditto marks for repeated same-offset instructions.
460                let clif_display = if let Some(ref clif_text) = inst.clif {
461                    let current = (clif_text.as_str(), inst.wasm_offset);
462                    if prev_clif == Some(current) {
463                        "\"".to_string()
464                    } else {
465                        prev_clif = Some((clif_text.as_str(), inst.wasm_offset));
466                        clif_text.clone()
467                    }
468                } else {
469                    prev_clif = None;
470                    "-".to_string()
471                };
472                let clif_display = &clif_display[..clif_display.len().min(clif_width)];
473
474                // Determine Wasm display, using ditto marks for repeated same-offset instructions.
475                let wasm_display = if let Some(ref wasm_text) = inst.wasm {
476                    let current = (wasm_text.as_str(), inst.wasm_offset);
477                    if prev_wasm == Some(current) {
478                        "\"".to_string()
479                    } else {
480                        prev_wasm = Some((wasm_text.as_str(), inst.wasm_offset));
481                        wasm_text.clone()
482                    }
483                } else {
484                    prev_wasm = None;
485                    "-".to_string()
486                };
487                let wasm_display = &wasm_display[..wasm_display.len().min(40)];
488
489                writeln!(
490                    output,
491                    "{sample_str:>10}   {asm_str:<asm_width$}   {clif_display:<clif_width$}   {wasm_display}",
492                )?;
493            }
494            writeln!(output)?;
495
496            samples_printed += block_sample_count;
497        }
498
499        Ok(())
500    }
501}
502
503/// A parsed sample from `perf script` output.
504#[derive(Debug, Clone)]
505struct PerfSample {
506    /// The symbol name from perf (e.g., "wasm[0]::function[3]").
507    symbol: String,
508    /// The offset within the symbol.
509    offset: u64,
510}
511
512/// Parse `perf script -F ip,sym,symoff,dso` output to extract samples that
513/// come from a perf map (i.e. compiled WebAssembly code and trampolines).
514fn parse_perf_script(output: &str) -> (Vec<PerfSample>, usize) {
515    let mut samples = Vec::new();
516    let mut total_samples = 0;
517    for line in output.lines() {
518        total_samples += 1;
519        if let Some(sample) = parse_perf_script_line(line.trim()) {
520            samples.push(sample);
521        }
522    }
523    (samples, total_samples)
524}
525
526fn parse_perf_script_line(line: &str) -> Option<PerfSample> {
527    // perf script -F ip,sym,symoff,dso gives lines like:
528    //   7f1234567890 wasm[0]::function[3]+0x10 (/tmp/perf-1234.map)
529    //   7f1234567890 wasm[0]::function[3]+0x10 (/path/to/module.cwasm)
530    // Filter by whether the DSO is a perf map or a cwasm file.
531
532    // Check for a `.map)` or `.cwasm)` suffix.
533    if !line.ends_with(".map)") && !line.ends_with(".cwasm)") {
534        return None;
535    }
536
537    // Skip the instruction pointer prefix.
538    let rest = line.trim_start_matches(|c: char| c.is_ascii_hexdigit() || c == ' ');
539
540    // Find "symbol+0xoffset"
541    let (sym_with_offset, _dso) = rest.split_once(" (").unwrap_or((rest, ""));
542    let sym_with_offset = sym_with_offset.trim();
543
544    let (symbol, offset_str) = sym_with_offset
545        .rsplit_once('+')
546        .unwrap_or((sym_with_offset, "0x0"));
547    let offset_str = offset_str.trim_start_matches("0x").trim_start_matches("0X");
548    let offset = u64::from_str_radix(offset_str, 16).unwrap_or(0);
549
550    Some(PerfSample {
551        symbol: symbol.to_string(),
552        offset,
553    })
554}
555
556/// An instruction within a basic block.
557#[derive(Debug, Clone)]
558struct BlockInstruction {
559    /// Offset within the function.
560    offset_in_func: usize,
561    /// Assembly text (e.g., "movq [rbx], rcx").
562    assembly: String,
563    /// Associated CLIF text, if any.
564    clif: Option<String>,
565    /// The wasm bytecode offset for this instruction, if known.
566    wasm_offset: Option<u32>,
567    /// Associated Wasm text (WAT disassembly), if any.
568    wasm: Option<String>,
569}
570
571/// A basic block in a compiled function.
572#[derive(Debug, Clone)]
573struct BasicBlock {
574    /// Instructions in this block.
575    instructions: Vec<BlockInstruction>,
576}
577
578/// Build a capstone disassembler for the given target architecture.
579fn build_capstone(target: &target_lexicon::Triple) -> Result<capstone::Capstone> {
580    let mut cs = match target.architecture {
581        target_lexicon::Architecture::Aarch64(_) => capstone::Capstone::new()
582            .arm64()
583            .mode(capstone::arch::arm64::ArchMode::Arm)
584            .detail(true)
585            .build()
586            .map_err(|e| format_err!("{e}"))?,
587        target_lexicon::Architecture::Riscv64(_) => capstone::Capstone::new()
588            .riscv()
589            .mode(capstone::arch::riscv::ArchMode::RiscV64)
590            .detail(true)
591            .build()
592            .map_err(|e| format_err!("{e}"))?,
593        target_lexicon::Architecture::S390x => capstone::Capstone::new()
594            .sysz()
595            .mode(capstone::arch::sysz::ArchMode::Default)
596            .detail(true)
597            .build()
598            .map_err(|e| format_err!("{e}"))?,
599        target_lexicon::Architecture::X86_64 => capstone::Capstone::new()
600            .x86()
601            .mode(capstone::arch::x86::ArchMode::Mode64)
602            .detail(true)
603            .build()
604            .map_err(|e| format_err!("{e}"))?,
605        _ => bail!("unsupported target architecture: {target}"),
606    };
607    // Skip over anything that looks like data (inline constant pools, etc.).
608    cs.set_skipdata(true).unwrap();
609    Ok(cs)
610}
611
612/// Build basic blocks for a function by disassembling its code and splitting
613/// at control flow boundaries.
614fn build_basic_blocks(
615    func_body: &[u8],
616    func_offset: usize,
617    address_map: &[(usize, Option<u32>)],
618    clif_lines: &[(Option<u32>, String)],
619    wat_map: &BTreeMap<u32, String>,
620    target: &target_lexicon::Triple,
621) -> Result<Vec<BasicBlock>> {
622    let cs = build_capstone(target)?;
623    let insts =
624        crate::disas::disas_with_capstone(&cs, func_body, u64::try_from(func_offset).unwrap())?;
625
626    // Build a map from code offset -> wasm offset for instructions in this function.
627    let mut offset_to_wasm: BTreeMap<usize, Option<u32>> = BTreeMap::new();
628    for &(code_offset, wasm_offset) in address_map {
629        if code_offset >= func_offset && code_offset < func_offset + func_body.len() {
630            offset_to_wasm.insert(code_offset, wasm_offset);
631        }
632    }
633
634    // Build a map from wasm offset -> CLIF text.
635    let mut wasm_to_clif: BTreeMap<u32, Vec<&str>> = BTreeMap::new();
636    for (wasm_off, clif_text) in clif_lines {
637        if let Some(off) = wasm_off {
638            wasm_to_clif.entry(*off).or_default().push(clif_text);
639        }
640    }
641
642    // Build annotated instructions and split into basic blocks.
643    let mut blocks = Vec::new();
644    let mut current_block = Vec::new();
645
646    for inst in &insts {
647        let addr = usize::try_from(inst.address).unwrap();
648        let offset_in_func = addr - func_offset;
649
650        // Find wasm offset for this instruction.
651        let wasm_offset = find_wasm_offset_for_address(&offset_to_wasm, addr);
652
653        // Find CLIF text for this wasm offset.
654        let clif = wasm_offset
655            .and_then(|wo| wasm_to_clif.get(&wo))
656            .map(|lines| lines.join("; "));
657
658        // Find Wasm text for this wasm offset from the WAT map.
659        let wasm = wasm_offset.and_then(|wo| wat_map.get(&wo).cloned());
660
661        current_block.push(BlockInstruction {
662            offset_in_func,
663            assembly: inst.disassembly.clone(),
664            clif,
665            wasm_offset,
666            wasm,
667        });
668
669        if inst.is_jump || inst.is_return {
670            blocks.push(BasicBlock {
671                instructions: std::mem::take(&mut current_block),
672            });
673        }
674    }
675    // Don't forget the last block if it didn't end with a branch.
676    if !current_block.is_empty() {
677        blocks.push(BasicBlock {
678            instructions: current_block,
679        });
680    }
681
682    Ok(blocks)
683}
684
685/// Find the wasm offset for a given code address by looking up the nearest
686/// entry in the address map that is <= the address.
687fn find_wasm_offset_for_address(
688    offset_to_wasm: &BTreeMap<usize, Option<u32>>,
689    addr: usize,
690) -> Option<u32> {
691    offset_to_wasm
692        .range(..=addr)
693        .next_back()
694        .and_then(|(_, wasm_off)| *wasm_off)
695}
696
697/// Build a map from wasm bytecode offset to WAT disassembly text using wasmprinter.
698fn build_wat_offset_map(wasm_bytes: &[u8]) -> BTreeMap<u32, String> {
699    let mut map = BTreeMap::new();
700    let printer = wasmprinter::Config::new();
701    let mut storage = String::new();
702    let Ok(chunks) = printer.offsets_and_lines(wasm_bytes, &mut storage) else {
703        return map;
704    };
705    for (offset, wat_line) in chunks {
706        if let Some(offset) = offset {
707            let trimmed = wat_line.trim();
708            if !trimmed.is_empty() && !trimmed.starts_with('(') && !trimmed.starts_with(')') {
709                map.insert(u32::try_from(offset).unwrap(), trimmed.to_string());
710            }
711        }
712    }
713    map
714}
715
716/// Read CLIF file for a given function, returning pairs of
717/// (wasm_offset, clif_line).
718fn read_clif_file(
719    clif_dir: &Path,
720    mod_idx: StaticModuleIndex,
721    func_index: FuncIndex,
722    func_name: Option<&str>,
723) -> Vec<(Option<u32>, String)> {
724    let contents = find_and_read_clif(clif_dir, mod_idx, func_index, func_name);
725    let Some(contents) = contents else {
726        return Vec::new();
727    };
728
729    let mut result = Vec::new();
730    for line in contents.lines() {
731        if line.is_empty() {
732            continue;
733        }
734        // CLIF lines come in these formats:
735        //   "@0042                           v12 = load.i64 v10+8"  (with wasm offset)
736        //   "                                v12 = ..."            (no wasm offset, 32-char indent)
737        //   "block0(v0: i64, ...):"                                (block headers, etc.)
738        let trimmed = line.trim_start();
739        if trimmed.starts_with('@') {
740            let offset = u32::from_str_radix(&trimmed[1..5], 16).ok();
741            // Find the instruction text after the padding.
742            let text = trimmed[5..].trim().to_string();
743            result.push((offset, text));
744        } else if line.starts_with(' ') {
745            let text = trimmed.to_string();
746            result.push((None, text));
747        } else {
748            result.push((None, trimmed.to_string()));
749        }
750    }
751    result
752}
753
754/// Find and read a CLIF file for a function, using the naming convention from
755/// `finish_with_info` in `crates/cranelift/src/compiler.rs`.
756fn find_and_read_clif(
757    clif_dir: &Path,
758    mod_idx: StaticModuleIndex,
759    func_index: FuncIndex,
760    func_name: Option<&str>,
761) -> Option<String> {
762    let mod_idx = mod_idx.as_u32();
763    let func_idx = func_index.as_u32();
764
765    // Try with name: "wasm[N]--function[M]--name.clif"
766    if let Some(name) = func_name {
767        let short_name = name.rsplit("::").next().unwrap_or(name);
768        let path = clif_dir.join(format!(
769            "wasm[{mod_idx}]--function[{func_idx}]--{short_name}.clif"
770        ));
771        if let Ok(contents) = std::fs::read_to_string(&path) {
772            return Some(contents);
773        }
774    }
775
776    // Try without name: "wasm[N]--function[M].clif"
777    let path = clif_dir.join(format!("wasm[{mod_idx}]--function[{func_idx}].clif"));
778    if let Ok(contents) = std::fs::read_to_string(&path) {
779        return Some(contents);
780    }
781
782    None
783}
784
785/// Parse a perfmap-style function name like "wasm[0]::function[3]" to extract
786/// the module and function indices.
787fn parse_wasm_func_name(name: &str) -> Option<(StaticModuleIndex, FuncIndex)> {
788    // Pattern: "wasm[<module>]::function[<func>]"
789    let rest = name.strip_prefix("wasm[")?;
790    let (mod_idx_str, rest) = rest.split_once(']')?;
791    let rest = rest.strip_prefix("::function[")?;
792    let (func_idx_str, _) = rest.split_once(']')?;
793    let mod_idx: u32 = mod_idx_str.parse().ok()?;
794    let func_idx: u32 = func_idx_str.parse().ok()?;
795    Some((
796        StaticModuleIndex::from_u32(mod_idx),
797        FuncIndex::from_u32(func_idx),
798    ))
799}
800
801/// Match a perf sample's symbol to a ModuleFunction using binary search by
802/// (module, func_index).
803///
804/// `functions` must be sorted by `(module, index)` (ascending), which is the
805/// natural order since module and function indices increase monotonically.
806fn find_function_for_sample<'a>(
807    sample: &PerfSample,
808    functions: &'a [ModuleFunction],
809) -> Option<&'a ModuleFunction> {
810    let (mod_idx, func_idx) = parse_wasm_func_name(&sample.symbol)?;
811    functions
812        .binary_search_by_key(&(mod_idx, func_idx), |f| (f.module, f.index))
813        .ok()
814        .map(|i| &functions[i])
815}
816
817/// Find which basic block an offset falls into, using binary search.
818fn find_block_for_offset(
819    blocks: &[BasicBlock],
820    offset_in_func: FunctionOffset,
821) -> Option<BlockIndex> {
822    let idx = blocks
823        .binary_search_by_key(&offset_in_func.0, |b| b.instructions[0].offset_in_func)
824        .unwrap_or_else(|i| i.saturating_sub(1));
825    let block = blocks.get(idx)?;
826    let last_offset = block.instructions.last()?.offset_in_func;
827    if offset_in_func.0 >= block.instructions[0].offset_in_func && offset_in_func.0 <= last_offset {
828        Some(BlockIndex(idx))
829    } else {
830        None
831    }
832}
833
834#[cfg(all(
835    test,
836    // These tests don't exercise any unsafe code and so they are
837    // generally uninteresting to run under MIRI.
838    not(miri)
839))]
840mod test {
841    use super::*;
842
843    #[test]
844    fn test_parse_perf_script_line_map_dso() {
845        let line = " 7f1234567890 wasm[0]::function[3]+0x10 (/tmp/perf-1234.map)";
846        let sample = parse_perf_script_line(line).unwrap();
847        assert_eq!(sample.symbol, "wasm[0]::function[3]");
848        assert_eq!(sample.offset, 0x10);
849    }
850
851    #[test]
852    fn test_parse_perf_script_line_no_offset() {
853        let line = "7f1234567890 wasm[0]::function[0]+0x0 (/tmp/perf-123.map)";
854        let sample = parse_perf_script_line(line).unwrap();
855        assert_eq!(sample.symbol, "wasm[0]::function[0]");
856        assert_eq!(sample.offset, 0);
857    }
858
859    #[test]
860    fn test_parse_perf_script_line_non_map_dso() {
861        // Non-.map / non-.cwasm DSO should be filtered out.
862        let line = "7f1234567890 main+0x10 (/usr/bin/wasmtime)";
863        assert!(parse_perf_script_line(line).is_none());
864    }
865
866    #[test]
867    fn test_parse_perf_script_line_cwasm_dso() {
868        let line = " 7f1234567890 wasm[0]::function[1]+0x20 (/tmp/.tmpABC123/module.cwasm)";
869        let sample = parse_perf_script_line(line).unwrap();
870        assert_eq!(sample.symbol, "wasm[0]::function[1]");
871        assert_eq!(sample.offset, 0x20);
872    }
873
874    #[test]
875    fn test_parse_perf_script_line_trampoline() {
876        // Trampolines in perf maps should be captured.
877        let line = "7f1234567890 trampoline+0x5 (/tmp/perf-1234.map)";
878        let sample = parse_perf_script_line(line).unwrap();
879        assert_eq!(sample.symbol, "trampoline");
880        assert_eq!(sample.offset, 0x5);
881    }
882
883    #[test]
884    fn test_parse_wasm_func_name() {
885        assert_eq!(
886            parse_wasm_func_name("wasm[0]::function[3]"),
887            Some((StaticModuleIndex::from_u32(0), FuncIndex::from_u32(3)))
888        );
889        assert_eq!(
890            parse_wasm_func_name("wasm[1]::function[42]"),
891            Some((StaticModuleIndex::from_u32(1), FuncIndex::from_u32(42)))
892        );
893        assert_eq!(parse_wasm_func_name("main"), None);
894    }
895
896    #[test]
897    #[cfg(target_arch = "x86_64")]
898    fn test_mocked_hot_blocks() {
899        // Build a tiny x86_64 function: two blocks
900        //   Block 0: nop; nop; jmp +0 (falls through)
901        //   Block 1: nop; ret
902        let func_body: &[u8] = &[
903            0x90, // nop          (offset 0)
904            0x90, // nop          (offset 1)
905            0xeb, 0x00, // jmp +0 (offset 2, 2 bytes) -> ends block 0
906            0x90, // nop          (offset 4)
907            0xc3, // ret          (offset 5) -> ends block 1
908        ];
909
910        let func_offset = 0x1000usize;
911
912        let address_map = vec![
913            (func_offset, Some(0x0010u32)),
914            (func_offset + 1, Some(0x0011)),
915            (func_offset + 2, Some(0x0012)),
916            (func_offset + 4, Some(0x0013)),
917            (func_offset + 5, Some(0x0014)),
918        ];
919
920        let clif_lines = vec![
921            (Some(0x0010u32), "v1 = iconst.i32 0".to_string()),
922            (Some(0x0011u32), "v2 = iconst.i32 1".to_string()),
923            (Some(0x0012u32), "jump block1".to_string()),
924            (Some(0x0013u32), "v3 = iadd v1, v2".to_string()),
925            (Some(0x0014u32), "return v3".to_string()),
926        ];
927
928        let mut wat_map = BTreeMap::new();
929        wat_map.insert(0x0010, "i32.const 0".to_string());
930        wat_map.insert(0x0011, "i32.const 1".to_string());
931        wat_map.insert(0x0012, "br 0".to_string());
932        wat_map.insert(0x0013, "i32.add".to_string());
933        wat_map.insert(0x0014, "return".to_string());
934
935        let target = target_lexicon::Triple::host();
936        let blocks = build_basic_blocks(
937            func_body,
938            func_offset,
939            &address_map,
940            &clif_lines,
941            &wat_map,
942            &target,
943        )
944        .unwrap();
945
946        assert_eq!(blocks.len(), 2, "expected 2 basic blocks");
947        assert_eq!(blocks[0].instructions.len(), 3, "block 0: nop, nop, jmp");
948        assert_eq!(blocks[1].instructions.len(), 2, "block 1: nop, ret");
949
950        // Verify CLIF annotations.
951        assert_eq!(
952            blocks[0].instructions[0].clif.as_deref(),
953            Some("v1 = iconst.i32 0")
954        );
955        assert_eq!(blocks[1].instructions[1].clif.as_deref(), Some("return v3"));
956
957        // Verify Wasm annotations come from WAT map.
958        assert_eq!(
959            blocks[0].instructions[0].wasm.as_deref(),
960            Some("i32.const 0")
961        );
962        assert_eq!(blocks[1].instructions[1].wasm.as_deref(), Some("return"));
963
964        // Test find_block_for_offset.
965        assert_eq!(
966            find_block_for_offset(&blocks, FunctionOffset(0)),
967            Some(BlockIndex(0))
968        );
969        assert_eq!(
970            find_block_for_offset(&blocks, FunctionOffset(1)),
971            Some(BlockIndex(0))
972        );
973        assert_eq!(
974            find_block_for_offset(&blocks, FunctionOffset(2)),
975            Some(BlockIndex(0))
976        );
977        assert_eq!(
978            find_block_for_offset(&blocks, FunctionOffset(4)),
979            Some(BlockIndex(1))
980        );
981        assert_eq!(
982            find_block_for_offset(&blocks, FunctionOffset(5)),
983            Some(BlockIndex(1))
984        );
985
986        // Verify block instructions have assembly text.
987        assert!(blocks[0].instructions[0].assembly.contains("nop"));
988        assert!(blocks[0].instructions[2].assembly.contains("jmp"));
989    }
990
991    #[test]
992    fn test_parse_perf_script() {
993        let input = "\
994 7f0001001000 wasm[0]::function[3]+0x0 (/tmp/perf-1234.map)
995 7f0001001005 wasm[0]::function[3]+0x5 (/tmp/perf-1234.map)
996 7f0001001000 wasm[0]::function[3]+0x0 (/tmp/perf-1234.map)
997 7f0001002000 some_native_func+0x10 (/usr/bin/wasmtime)
998 7f0001001010 wasm[0]::function[5]+0x10 (/tmp/perf-1234.map)
999";
1000        let (samples, total) = parse_perf_script(input);
1001        // The native func line is filtered out (not a .map DSO).
1002        assert_eq!(samples.len(), 4);
1003        assert_eq!(total, 5);
1004        assert_eq!(samples[0].symbol, "wasm[0]::function[3]");
1005        assert_eq!(samples[0].offset, 0);
1006        assert_eq!(samples[1].symbol, "wasm[0]::function[3]");
1007        assert_eq!(samples[1].offset, 5);
1008        assert_eq!(samples[2].symbol, "wasm[0]::function[3]");
1009        assert_eq!(samples[2].offset, 0);
1010        assert_eq!(samples[3].symbol, "wasm[0]::function[5]");
1011        assert_eq!(samples[3].offset, 0x10);
1012    }
1013
1014    #[test]
1015    fn test_read_clif_file() {
1016        let tmp = tempdir().unwrap();
1017        let clif_content = "\
1018@0010                           v1 = iconst.i32 0
1019@0011                           v2 = iconst.i32 1
1020                                v3 = iadd v1, v2
1021@0012                           return v3
1022";
1023        std::fs::write(tmp.path().join("wasm[0]--function[0].clif"), clif_content).unwrap();
1024
1025        let lines = read_clif_file(
1026            tmp.path(),
1027            StaticModuleIndex::from_u32(0),
1028            FuncIndex::from_u32(0),
1029            None,
1030        );
1031        assert_eq!(lines.len(), 4);
1032        assert_eq!(lines[0].0, Some(0x0010));
1033        assert!(lines[0].1.contains("iconst.i32 0"));
1034        assert_eq!(lines[2].0, None);
1035        assert!(lines[2].1.contains("iadd"));
1036    }
1037
1038    #[test]
1039    fn test_wat_offset_map() {
1040        // A minimal valid Wasm module with one function containing i32.add.
1041        let wat = r#"(module (func (param i32 i32) (result i32) local.get 0 local.get 1 i32.add))"#;
1042        let wasm = wat::parse_str(wat).unwrap();
1043        let map = build_wat_offset_map(&wasm);
1044
1045        // The map should contain entries for the Wasm instructions.
1046        let has_i32_add = map.values().any(|v| v.contains("i32.add"));
1047        assert!(
1048            has_i32_add,
1049            "expected wat offset map to contain i32.add, got: {map:?}"
1050        );
1051    }
1052
1053    #[test]
1054    #[cfg(target_arch = "x86_64")]
1055    fn test_ditto_marks() {
1056        // Test that repeated CLIF/Wasm annotations use ditto marks.
1057        // Build a function where multiple assembly instructions map to the
1058        // same wasm offset.
1059        let func_body: &[u8] = &[
1060            0x90, // nop  (offset 0) -> wasm @0010
1061            0x90, // nop  (offset 1) -> wasm @0010 (same)
1062            0x90, // nop  (offset 2) -> wasm @0011 (different)
1063            0xc3, // ret  (offset 3) -> wasm @0011 (same)
1064        ];
1065
1066        let func_offset = 0usize;
1067        let address_map = vec![
1068            (func_offset, Some(0x0010u32)),
1069            (func_offset + 1, Some(0x0010)),
1070            (func_offset + 2, Some(0x0011)),
1071            (func_offset + 3, Some(0x0011)),
1072        ];
1073
1074        let clif_lines = vec![
1075            (Some(0x0010u32), "v1 = iconst.i32 42".to_string()),
1076            (Some(0x0011u32), "return v1".to_string()),
1077        ];
1078
1079        let mut wat_map = BTreeMap::new();
1080        wat_map.insert(0x0010, "i32.const 42".to_string());
1081        wat_map.insert(0x0011, "return".to_string());
1082
1083        let target = target_lexicon::Triple::host();
1084        let _blocks = build_basic_blocks(
1085            func_body,
1086            func_offset,
1087            &address_map,
1088            &clif_lines,
1089            &wat_map,
1090            &target,
1091        )
1092        .unwrap();
1093
1094        // Create samples for all instructions.
1095        let samples = vec![
1096            PerfSample {
1097                symbol: "wasm[0]::function[0]".to_string(),
1098                offset: 0,
1099            },
1100            PerfSample {
1101                symbol: "wasm[0]::function[0]".to_string(),
1102                offset: 1,
1103            },
1104            PerfSample {
1105                symbol: "wasm[0]::function[0]".to_string(),
1106                offset: 2,
1107            },
1108            PerfSample {
1109                symbol: "wasm[0]::function[0]".to_string(),
1110                offset: 3,
1111            },
1112        ];
1113
1114        let functions = vec![ModuleFunction {
1115            module: StaticModuleIndex::from_u32(0),
1116            index: FuncIndex::from_u32(0),
1117            name: Some("wasm[0]::function[0]::test".to_string()),
1118            offset: 0,
1119            len: func_body.len(),
1120        }];
1121
1122        let cmd = HotBlocksCommand {
1123            run: RunCommon {
1124                common: wasmtime_cli_flags::CommonOptions::default(),
1125                allow_precompiled: false,
1126                profile: None,
1127                dirs: Vec::new(),
1128                vars: Vec::new(),
1129                #[cfg(feature = "gdbstub")]
1130                gdbstub: None,
1131            },
1132            percent: 100.0,
1133            event: Event::CpuCycles,
1134            frequency: None,
1135            output: None,
1136            module: PathBuf::from("dummy.wasm"),
1137            module_args: Vec::new(),
1138        };
1139
1140        let mut output = Vec::new();
1141        cmd.format_hot_blocks(
1142            &samples,
1143            samples.len(),
1144            &functions,
1145            func_body,
1146            &address_map,
1147            Path::new("/nonexistent"),
1148            &wat_map,
1149            &target,
1150            &mut output,
1151        )
1152        .unwrap();
1153
1154        let output_str = String::from_utf8(output).unwrap();
1155        // The second nop at offset 1 should show ditto marks for both CLIF and Wasm
1156        // since it has the same wasm offset (0x0010) as the first nop.
1157        assert!(
1158            output_str.contains('"'),
1159            "expected ditto marks in output, got:\n{output_str}"
1160        );
1161    }
1162
1163    #[test]
1164    fn test_find_function_binary_search() {
1165        let functions = vec![
1166            ModuleFunction {
1167                module: StaticModuleIndex::from_u32(0),
1168                index: FuncIndex::from_u32(0),
1169                name: None,
1170                offset: 0x100,
1171                len: 0x50,
1172            },
1173            ModuleFunction {
1174                module: StaticModuleIndex::from_u32(0),
1175                index: FuncIndex::from_u32(1),
1176                name: None,
1177                offset: 0x150,
1178                len: 0x30,
1179            },
1180            ModuleFunction {
1181                module: StaticModuleIndex::from_u32(0),
1182                index: FuncIndex::from_u32(3),
1183                name: None,
1184                offset: 0x200,
1185                len: 0x40,
1186            },
1187        ];
1188
1189        let sample = PerfSample {
1190            symbol: "wasm[0]::function[1]".to_string(),
1191            offset: 0x10,
1192        };
1193        let func = find_function_for_sample(&sample, &functions).unwrap();
1194        assert_eq!(func.index, FuncIndex::from_u32(1));
1195
1196        let sample = PerfSample {
1197            symbol: "wasm[0]::function[3]".to_string(),
1198            offset: 0x5,
1199        };
1200        let func = find_function_for_sample(&sample, &functions).unwrap();
1201        assert_eq!(func.index, FuncIndex::from_u32(3));
1202
1203        // Non-existent function.
1204        let sample = PerfSample {
1205            symbol: "wasm[0]::function[99]".to_string(),
1206            offset: 0,
1207        };
1208        assert!(find_function_for_sample(&sample, &functions).is_none());
1209    }
1210}