Skip to main content

crucible_test_context/coverage/
dwarf.rs

1//! DWARF debug info parsing for source-level coverage mapping.
2//!
3//! Maps SBF program counter (PC) values to real source file paths and line numbers
4//! using DWARF debug info from an unstripped ELF binary.
5
6use std::collections::{HashMap, HashSet};
7
8/// Source location for a single PC.
9#[derive(Clone, Debug)]
10pub struct SourceLocation {
11    pub file: String,
12    pub line: u32,
13}
14
15/// Pre-computed PC-to-source mapping, built once from a debug binary.
16///
17/// All PCs are eagerly resolved at init time to avoid lifetime issues
18/// with `addr2line::Context` and to provide O(1) lookup during LCOV generation.
19#[derive(Clone, Debug)]
20pub struct DwarfSourceMap {
21    /// PC (instruction index) -> all source locations (full inline chain).
22    /// Multiple locations per PC: innermost (inlined callee) to outermost (call site).
23    /// When a PC is hit, ALL locations in the chain should be counted as hit.
24    pub pc_map: HashMap<usize, Vec<SourceLocation>>,
25    /// Function entry PC -> (name, source location)
26    pub fn_map: HashMap<usize, (String, SourceLocation)>,
27    /// All executable source lines per file (from DWARF).
28    /// Lines in this set had code generated for them by the compiler.
29    /// Includes both direct code and inlined call sites.
30    pub executable_lines: HashMap<String, HashSet<u32>>,
31}
32
33/// Parse DWARF from a debug binary and build a cached PC-to-source map.
34///
35/// Returns `None` if the binary has no `.debug_info` section.
36///
37/// The debug binary must be the unstripped ELF (e.g., from
38/// `target/sbpf-solana-solana/release/<name>.so`), not the stripped
39/// one in `target/deploy/`.
40///
41/// PC-to-ELF address mapping: PCs from register tracing are instruction
42/// indices. Each SBF instruction = 8 bytes. The `.text` section starts
43/// at ELF vaddr (typically `0x120`). So: `elf_addr = text_vaddr + (pc * 8)`.
44pub fn build_dwarf_source_map(debug_binary: &[u8]) -> Option<DwarfSourceMap> {
45    use object::{Object, ObjectSection};
46
47    let object_file = object::File::parse(debug_binary).ok()?;
48
49    // Check for .debug_info section - if missing, no DWARF data
50    if object_file.section_by_name(".debug_info").is_none() {
51        return None;
52    }
53
54    // Find .text section for vaddr and size
55    let text_section = object_file.section_by_name(".text")?;
56    let text_vaddr = text_section.address();
57    let text_size = text_section.size();
58    let max_pc = (text_size / 8) as usize;
59
60    // Load DWARF sections from the object file
61    let load_section = |id: gimli::SectionId| -> Result<gimli::EndianSlice<'_, gimli::LittleEndian>, gimli::Error> {
62        let data = object_file
63            .section_by_name(id.name())
64            .and_then(|s| s.data().ok())
65            .unwrap_or(&[]);
66        Ok(gimli::EndianSlice::new(data, gimli::LittleEndian))
67    };
68    let dwarf = gimli::Dwarf::load(&load_section).ok()?;
69    let context = addr2line::Context::from_dwarf(dwarf).ok()?;
70
71    // Infer workspace root from FUZZ_SYMBOLS for resolving relative DWARF paths.
72    // e.g. "/home/user/project/target/sbpf-.../release/prog.so" → "/home/user/project"
73    let source_root: Option<String> = std::env::var("FUZZ_SYMBOLS")
74        .ok()
75        .and_then(|p| p.find("/target/").map(|idx| p[..idx].to_string()));
76
77    // Resolve a DWARF file path to an absolute path.
78    // Tries canonicalize first (works if CWD matches), then prepends source_root.
79    let resolve_path = |file: &str| -> String {
80        if let Ok(abs) = std::fs::canonicalize(file) {
81            return abs.to_string_lossy().into_owned();
82        }
83        if let Some(ref root) = source_root {
84            let full = format!("{}/{}", root, file);
85            if let Ok(abs) = std::fs::canonicalize(&full) {
86                return abs.to_string_lossy().into_owned();
87            }
88        }
89        file.to_string()
90    };
91
92    let mut pc_map: HashMap<usize, Vec<SourceLocation>> = HashMap::new();
93    let mut fn_map = HashMap::new();
94    let mut executable_lines: HashMap<String, HashSet<u32>> = HashMap::new();
95
96    // Single pass: use find_frames() for every PC to get the full inline chain.
97    // This resolves both source locations and function names in one pass.
98    // Each frame in the chain represents a level of inlining:
99    //   frame[0] = innermost (the inlined callee)
100    //   frame[N] = outermost (the actual call site in user code)
101    // We record ALL frames' source locations so that inlined call sites get
102    // proper hit counts.
103    for pc in 0..max_pc {
104        let elf_addr = text_vaddr + (pc as u64) * 8;
105
106        let mut frames = match context.find_frames(elf_addr).skip_all_loads() {
107            Ok(frames) => frames,
108            Err(_) => continue,
109        };
110
111        let mut locations = Vec::new();
112        let mut first_function_name: Option<String> = None;
113
114        // Collect all frames (innermost to outermost)
115        loop {
116            match frames.next() {
117                Ok(Some(frame)) => {
118                    // Record function name from the innermost frame that has one
119                    if first_function_name.is_none() {
120                        if let Some(ref function) = frame.function {
121                            first_function_name = Some(
122                                function
123                                    .demangle()
124                                    .map(|d: std::borrow::Cow<'_, str>| d.into_owned())
125                                    .unwrap_or_else(|_| {
126                                        function
127                                            .raw_name()
128                                            .map(|r: std::borrow::Cow<'_, str>| r.into_owned())
129                                            .unwrap_or_else(|_| format!("fn_{}", pc))
130                                    }),
131                            );
132                        }
133                    }
134
135                    // Record source location from this frame
136                    if let Some(loc) = frame.location {
137                        if let (Some(file), Some(line)) = (loc.file, loc.line) {
138                            let file_path = resolve_path(file);
139
140                            executable_lines
141                                .entry(file_path.clone())
142                                .or_default()
143                                .insert(line);
144
145                            locations.push(SourceLocation {
146                                file: file_path,
147                                line,
148                            });
149                        }
150                    }
151                }
152                Ok(None) => break,
153                Err(_) => break,
154            }
155        }
156
157        if !locations.is_empty() {
158            // Record function name using the outermost location (call site)
159            if let Some(name) = first_function_name {
160                let outermost = locations.last().unwrap();
161                fn_map
162                    .entry(pc)
163                    .or_insert_with(|| (name, outermost.clone()));
164            }
165
166            pc_map.insert(pc, locations);
167        }
168    }
169
170    // Phase 2: Scan the DWARF line number table for additional source line mappings.
171    //
172    // find_frames() returns one location per inline level at each PC, but the
173    // DWARF line table often has multiple rows at the SAME address for multi-line
174    // expressions (chained method calls like `.foo()\n.bar()`, multi-line let
175    // bindings, match arm patterns, etc.). These "continuation lines" get no
176    // pc_map entry from find_frames() and would appear as blank in genhtml.
177    //
178    // find_location_range() iterates the raw line table and yields each entry,
179    // including multiple entries at the same address. By adding these to pc_map
180    // and executable_lines, continuation lines get proper DA records and inherit
181    // hit counts from their shared PC.
182    if let Ok(locs) = context.find_location_range(text_vaddr, text_vaddr + text_size) {
183        for (addr, _len, loc) in locs {
184            if let (Some(file), Some(line)) = (loc.file, loc.line) {
185                if addr < text_vaddr || addr >= text_vaddr + text_size {
186                    continue;
187                }
188                let pc = ((addr - text_vaddr) / 8) as usize;
189
190                let file_path = resolve_path(file);
191
192                executable_lines
193                    .entry(file_path.clone())
194                    .or_default()
195                    .insert(line);
196
197                let locations = pc_map.entry(pc).or_default();
198                if !locations
199                    .iter()
200                    .any(|l| l.file == file_path && l.line == line)
201                {
202                    locations.push(SourceLocation {
203                        file: file_path,
204                        line,
205                    });
206                }
207            }
208        }
209    }
210
211    // Also parse symtab for function names not found via DWARF frames
212    let symbol_map = object_file.symbol_map();
213    for sym in symbol_map.symbols() {
214        let addr = sym.address();
215        if addr >= text_vaddr && addr < text_vaddr + text_size {
216            let pc = ((addr - text_vaddr) / 8) as usize;
217            if !fn_map.contains_key(&pc) {
218                if let Some(locs) = pc_map.get(&pc) {
219                    if let Some(loc) = locs.last() {
220                        fn_map.insert(pc, (sym.name().to_string(), loc.clone()));
221                    }
222                }
223            }
224        }
225    }
226
227    Some(DwarfSourceMap {
228        pc_map,
229        fn_map,
230        executable_lines,
231    })
232}