crucible_test_context/coverage/dwarf.rs
1//! DWARF debug info parsing for source-level coverage mapping.
2//!
3//! Maps SBF program counter (PC) values to real source file paths and line numbers
4//! using DWARF debug info from an unstripped ELF binary.
5
6use std::collections::{HashMap, HashSet};
7
8/// Source location for a single PC.
9#[derive(Clone, Debug)]
10pub struct SourceLocation {
11 pub file: String,
12 pub line: u32,
13}
14
15/// Pre-computed PC-to-source mapping, built once from a debug binary.
16///
17/// All PCs are eagerly resolved at init time to avoid lifetime issues
18/// with `addr2line::Context` and to provide O(1) lookup during LCOV generation.
19#[derive(Clone, Debug)]
20pub struct DwarfSourceMap {
21 /// PC (instruction index) -> all source locations (full inline chain).
22 /// Multiple locations per PC: innermost (inlined callee) to outermost (call site).
23 /// When a PC is hit, ALL locations in the chain should be counted as hit.
24 pub pc_map: HashMap<usize, Vec<SourceLocation>>,
25 /// Function entry PC -> (name, source location)
26 pub fn_map: HashMap<usize, (String, SourceLocation)>,
27 /// All executable source lines per file (from DWARF).
28 /// Lines in this set had code generated for them by the compiler.
29 /// Includes both direct code and inlined call sites.
30 pub executable_lines: HashMap<String, HashSet<u32>>,
31}
32
33/// Parse DWARF from a debug binary and build a cached PC-to-source map.
34///
35/// Returns `None` if the binary has no `.debug_info` section.
36///
37/// The debug binary must be the unstripped ELF (e.g., from
38/// `target/sbpf-solana-solana/release/<name>.so`), not the stripped
39/// one in `target/deploy/`.
40///
41/// PC-to-ELF address mapping: PCs from register tracing are instruction
42/// indices. Each SBF instruction = 8 bytes. The `.text` section starts
43/// at ELF vaddr (typically `0x120`). So: `elf_addr = text_vaddr + (pc * 8)`.
44pub fn build_dwarf_source_map(debug_binary: &[u8]) -> Option<DwarfSourceMap> {
45 use object::{Object, ObjectSection};
46
47 let object_file = object::File::parse(debug_binary).ok()?;
48
49 // Check for .debug_info section - if missing, no DWARF data
50 if object_file.section_by_name(".debug_info").is_none() {
51 return None;
52 }
53
54 // Find .text section for vaddr and size
55 let text_section = object_file.section_by_name(".text")?;
56 let text_vaddr = text_section.address();
57 let text_size = text_section.size();
58 let max_pc = (text_size / 8) as usize;
59
60 // Load DWARF sections from the object file
61 let load_section = |id: gimli::SectionId| -> Result<gimli::EndianSlice<'_, gimli::LittleEndian>, gimli::Error> {
62 let data = object_file
63 .section_by_name(id.name())
64 .and_then(|s| s.data().ok())
65 .unwrap_or(&[]);
66 Ok(gimli::EndianSlice::new(data, gimli::LittleEndian))
67 };
68 let dwarf = gimli::Dwarf::load(&load_section).ok()?;
69 let context = addr2line::Context::from_dwarf(dwarf).ok()?;
70
71 // Infer workspace root from FUZZ_SYMBOLS for resolving relative DWARF paths.
72 // e.g. "/home/user/project/target/sbpf-.../release/prog.so" → "/home/user/project"
73 let source_root: Option<String> = std::env::var("FUZZ_SYMBOLS")
74 .ok()
75 .and_then(|p| p.find("/target/").map(|idx| p[..idx].to_string()));
76
77 // Resolve a DWARF file path to an absolute path.
78 // Tries canonicalize first (works if CWD matches), then prepends source_root.
79 let resolve_path = |file: &str| -> String {
80 if let Ok(abs) = std::fs::canonicalize(file) {
81 return abs.to_string_lossy().into_owned();
82 }
83 if let Some(ref root) = source_root {
84 let full = format!("{}/{}", root, file);
85 if let Ok(abs) = std::fs::canonicalize(&full) {
86 return abs.to_string_lossy().into_owned();
87 }
88 }
89 file.to_string()
90 };
91
92 let mut pc_map: HashMap<usize, Vec<SourceLocation>> = HashMap::new();
93 let mut fn_map = HashMap::new();
94 let mut executable_lines: HashMap<String, HashSet<u32>> = HashMap::new();
95
96 // Single pass: use find_frames() for every PC to get the full inline chain.
97 // This resolves both source locations and function names in one pass.
98 // Each frame in the chain represents a level of inlining:
99 // frame[0] = innermost (the inlined callee)
100 // frame[N] = outermost (the actual call site in user code)
101 // We record ALL frames' source locations so that inlined call sites get
102 // proper hit counts.
103 for pc in 0..max_pc {
104 let elf_addr = text_vaddr + (pc as u64) * 8;
105
106 let mut frames = match context.find_frames(elf_addr).skip_all_loads() {
107 Ok(frames) => frames,
108 Err(_) => continue,
109 };
110
111 let mut locations = Vec::new();
112 let mut first_function_name: Option<String> = None;
113
114 // Collect all frames (innermost to outermost)
115 loop {
116 match frames.next() {
117 Ok(Some(frame)) => {
118 // Record function name from the innermost frame that has one
119 if first_function_name.is_none() {
120 if let Some(ref function) = frame.function {
121 first_function_name = Some(
122 function
123 .demangle()
124 .map(|d: std::borrow::Cow<'_, str>| d.into_owned())
125 .unwrap_or_else(|_| {
126 function
127 .raw_name()
128 .map(|r: std::borrow::Cow<'_, str>| r.into_owned())
129 .unwrap_or_else(|_| format!("fn_{}", pc))
130 }),
131 );
132 }
133 }
134
135 // Record source location from this frame
136 if let Some(loc) = frame.location {
137 if let (Some(file), Some(line)) = (loc.file, loc.line) {
138 let file_path = resolve_path(file);
139
140 executable_lines
141 .entry(file_path.clone())
142 .or_default()
143 .insert(line);
144
145 locations.push(SourceLocation {
146 file: file_path,
147 line,
148 });
149 }
150 }
151 }
152 Ok(None) => break,
153 Err(_) => break,
154 }
155 }
156
157 if !locations.is_empty() {
158 // Record function name using the outermost location (call site)
159 if let Some(name) = first_function_name {
160 let outermost = locations.last().unwrap();
161 fn_map
162 .entry(pc)
163 .or_insert_with(|| (name, outermost.clone()));
164 }
165
166 pc_map.insert(pc, locations);
167 }
168 }
169
170 // Phase 2: Scan the DWARF line number table for additional source line mappings.
171 //
172 // find_frames() returns one location per inline level at each PC, but the
173 // DWARF line table often has multiple rows at the SAME address for multi-line
174 // expressions (chained method calls like `.foo()\n.bar()`, multi-line let
175 // bindings, match arm patterns, etc.). These "continuation lines" get no
176 // pc_map entry from find_frames() and would appear as blank in genhtml.
177 //
178 // find_location_range() iterates the raw line table and yields each entry,
179 // including multiple entries at the same address. By adding these to pc_map
180 // and executable_lines, continuation lines get proper DA records and inherit
181 // hit counts from their shared PC.
182 if let Ok(locs) = context.find_location_range(text_vaddr, text_vaddr + text_size) {
183 for (addr, _len, loc) in locs {
184 if let (Some(file), Some(line)) = (loc.file, loc.line) {
185 if addr < text_vaddr || addr >= text_vaddr + text_size {
186 continue;
187 }
188 let pc = ((addr - text_vaddr) / 8) as usize;
189
190 let file_path = resolve_path(file);
191
192 executable_lines
193 .entry(file_path.clone())
194 .or_default()
195 .insert(line);
196
197 let locations = pc_map.entry(pc).or_default();
198 if !locations
199 .iter()
200 .any(|l| l.file == file_path && l.line == line)
201 {
202 locations.push(SourceLocation {
203 file: file_path,
204 line,
205 });
206 }
207 }
208 }
209 }
210
211 // Also parse symtab for function names not found via DWARF frames
212 let symbol_map = object_file.symbol_map();
213 for sym in symbol_map.symbols() {
214 let addr = sym.address();
215 if addr >= text_vaddr && addr < text_vaddr + text_size {
216 let pc = ((addr - text_vaddr) / 8) as usize;
217 if !fn_map.contains_key(&pc) {
218 if let Some(locs) = pc_map.get(&pc) {
219 if let Some(loc) = locs.last() {
220 fn_map.insert(pc, (sym.name().to_string(), loc.clone()));
221 }
222 }
223 }
224 }
225 }
226
227 Some(DwarfSourceMap {
228 pc_map,
229 fn_map,
230 executable_lines,
231 })
232}