Skip to main content

sbpf_coverage/
lib.rs

1use addr2line::gimli::{DW_AT_language, DW_AT_producer, DW_TAG_compile_unit};
2pub use addr2line::{self, Loader};
3use anyhow::{Result, anyhow, bail};
4use byteorder::{LittleEndian, ReadBytesExt};
5pub use object::{Object, ObjectSection};
6use std::{
7    collections::{BTreeMap, HashSet},
8    fs::{File, OpenOptions, metadata},
9    io::Write,
10    path::{Path, PathBuf},
11};
12
13mod branch;
14mod trace_disassemble;
15
16mod start_address;
17use start_address::start_address;
18
19pub mod toolchain;
20pub mod util;
21use util::StripCurrentDir;
22
23use crate::util::{
24    compute_hash, find_files_with_extension, get_dwarf_attribute, get_section_start_address,
25};
26
27mod vaddr;
28
29#[derive(Debug)]
30pub struct DebugPath {
31    pub path: PathBuf,
32    pub producer: Option<String>,
33    pub lang: Option<String>,
34}
35
36#[derive(Clone, Debug, Default, Eq, PartialEq)]
37struct Entry<'a> {
38    file: &'a str,
39    line: u32,
40}
41
42struct Dwarf {
43    debug_path: DebugPath,
44    #[allow(dead_code)]
45    so_path: PathBuf,
46    so_hash: String,
47    start_address: u64,
48    text_section_offset: u64,
49    #[allow(dead_code, reason = "`vaddr` points into `loader`")]
50    loader: &'static Loader,
51    vaddr_entry_map: BTreeMap<u64, Entry<'static>>,
52}
53
54enum Outcome {
55    Lcov(PathBuf),
56    TraceDisassemble,
57}
58
59type Vaddrs = Vec<u64>;
60type Insns = Vec<u64>;
61type Regs = Vec<[u64; 12]>;
62
63type VaddrEntryMap<'a> = BTreeMap<u64, Entry<'a>>;
64
65type FileLineCountMap<'a> = BTreeMap<&'a str, BTreeMap<u32, usize>>;
66
67pub fn run(
68    sbf_trace_dir: PathBuf,
69    src_paths: HashSet<PathBuf>,
70    sbf_paths: Vec<PathBuf>,
71    debug: bool,
72    trace_disassemble: bool,
73    no_color: bool,
74) -> Result<()> {
75    let mut lcov_paths = Vec::new();
76
77    let debug_paths = debug_paths(sbf_paths)?;
78
79    let dwarfs = debug_paths
80        .into_iter()
81        .map(|path| build_dwarf(path, &src_paths, trace_disassemble))
82        .collect::<Result<Vec<_>>>()
83        .expect("Can't build dwarf");
84
85    if dwarfs.is_empty() {
86        bail!("Found no .so/.debug/.so.debug files containing debug sections.");
87    }
88
89    if debug {
90        for dwarf in dwarfs {
91            dump_vaddr_entry_map(dwarf.vaddr_entry_map);
92        }
93        eprintln!("Exiting debug mode.");
94        return Ok(());
95    }
96
97    let mut regs_paths = find_files_with_extension(std::slice::from_ref(&sbf_trace_dir), "regs");
98    if regs_paths.is_empty() {
99        bail!(
100            "Found no regs files in: {}
101Are you sure you run your tests with register tracing enabled",
102            sbf_trace_dir.strip_current_dir().display(),
103        );
104    }
105    // Sort paths by modification time.
106    regs_paths.sort_by_key(|p| {
107        std::fs::metadata(p)
108            .and_then(|m| m.modified())
109            .unwrap_or(std::time::UNIX_EPOCH)
110    });
111
112    for regs_path in &regs_paths {
113        match process_regs_path(&dwarfs, regs_path, &src_paths, trace_disassemble, no_color) {
114            Ok(Outcome::Lcov(lcov_path)) => {
115                lcov_paths.push(lcov_path.strip_current_dir().to_path_buf());
116            }
117            Ok(Outcome::TraceDisassemble) => {}
118            _ => {
119                eprintln!(
120                    "Skipping Regs file: {} (no matching executable)",
121                    regs_path.strip_current_dir().display()
122                );
123            }
124        }
125    }
126
127    if !trace_disassemble {
128        eprintln!(
129            "
130Processed {} of {} regs files
131
132Lcov files written: {lcov_paths:#?}
133
134If you are done generating lcov files, try running:
135
136    genhtml --output-directory coverage {}/*.lcov --rc branch_coverage=1 && open coverage/index.html
137",
138            lcov_paths.len(),
139            regs_paths.len(),
140            sbf_trace_dir.as_path().strip_current_dir().display()
141        );
142    }
143
144    Ok(())
145}
146
147fn debug_paths(sbf_paths: Vec<PathBuf>) -> Result<Vec<DebugPath>> {
148    // It's possible that the debug information is in the .so file itself
149    let so_files = find_files_with_extension(&sbf_paths, "so");
150    // It's also possible that it ends with .debug
151    let debug_files = find_files_with_extension(&sbf_paths, "debug");
152
153    let mut maybe_list = so_files;
154    maybe_list.extend(debug_files);
155
156    // Collect only those files that contain debug sections
157    let full_list: Vec<DebugPath> = maybe_list
158        .into_iter()
159        .filter_map(|maybe_path| {
160            let data = std::fs::read(&maybe_path).ok()?;
161            let object = object::read::File::parse(&*data).ok()?;
162            // check it has debug sections
163            let has_debug = object
164                .sections()
165                .any(|section| section.name().is_ok_and(|n| n.starts_with(".debug_")));
166            // get compiler information if any
167            let producer = get_dwarf_attribute(&object, DW_TAG_compile_unit, DW_AT_producer).ok();
168            // get lang information if any
169            let lang = get_dwarf_attribute(&object, DW_TAG_compile_unit, DW_AT_language).ok();
170
171            has_debug.then_some(DebugPath {
172                path: maybe_path,
173                producer,
174                lang,
175            })
176        })
177        .collect();
178
179    eprintln!("Debug symbols found:");
180    for dp in full_list.iter() {
181        eprintln!(
182            "  {} (producer: {}, lang: {})",
183            dp.path.strip_current_dir().display(),
184            dp.producer.as_deref().unwrap_or("unknown"),
185            dp.lang.as_deref().unwrap_or("unknown"),
186        );
187    }
188    Ok(full_list)
189}
190
191fn build_dwarf(
192    debug_path: DebugPath,
193    src_paths: &HashSet<PathBuf>,
194    trace_disassemble: bool,
195) -> Result<Dwarf> {
196    let start_address = start_address(&debug_path.path)?;
197
198    let loader = Loader::new(&debug_path.path).map_err(|error| {
199        anyhow!(
200            "failed to build loader for {}: {}",
201            debug_path.path.display(),
202            error
203        )
204    })?;
205
206    let loader = Box::leak(Box::new(loader));
207
208    let vaddr_entry_map =
209        build_vaddr_entry_map(loader, &debug_path.path, src_paths, trace_disassemble)?;
210
211    // Suppose debug_path is program.debug, swap with .so and try
212    let mut so_path = debug_path.path.with_extension("so");
213    let so_content = match std::fs::read(&so_path) {
214        Err(e) => {
215            if e.kind() == std::io::ErrorKind::NotFound {
216                // We might have program.so.debug - simply cut debug and try
217                so_path = debug_path.path.with_extension("");
218                std::fs::read(&so_path)?
219            } else {
220                return Err(e.into());
221            }
222        }
223        Ok(c) => c,
224    };
225    let so_hash = compute_hash(&so_content);
226    eprintln!(
227        "DWARF: {} -> {} (exec sha256: {})",
228        debug_path.path.strip_current_dir().display(),
229        so_path.strip_current_dir().display(),
230        &so_hash[..16],
231    );
232
233    Ok(Dwarf {
234        debug_path,
235        so_path,
236        so_hash,
237        start_address,
238        loader,
239        vaddr_entry_map,
240        text_section_offset: get_section_start_address(loader, ".text")?,
241    })
242}
243
244fn process_regs_path(
245    dwarfs: &[Dwarf],
246    regs_path: &Path,
247    src_paths: &HashSet<PathBuf>,
248    trace_disassemble: bool,
249    no_color: bool,
250) -> Result<Outcome> {
251    eprintln!();
252    let exec_sha256 = std::fs::read_to_string(regs_path.with_extension("exec.sha256"))?;
253    let (mut vaddrs, regs) = read_vaddrs(regs_path)?;
254    eprintln!(
255        "Regs: {} ({} entries, exec sha256: {})",
256        regs_path.strip_current_dir().display(),
257        vaddrs.len(),
258        &exec_sha256[..16],
259    );
260    let insns = read_insns(&regs_path.with_extension("insns"))?;
261
262    let dwarf = find_applicable_dwarf(dwarfs, regs_path, &exec_sha256, &mut vaddrs)?;
263
264    assert!(
265        vaddrs
266            .first()
267            .is_some_and(|&vaddr| vaddr == dwarf.start_address)
268    );
269
270    if trace_disassemble {
271        return trace_disassemble::trace_disassemble(
272            src_paths, regs_path, &vaddrs, dwarf, !no_color,
273        );
274    }
275
276    // smoelius: If a sequence of Regs refer to the same file and line, treat them as
277    // one hit to that file and line.
278    // vaddrs.dedup_by_key::<_, Option<&Entry>>(|vaddr| dwarf.vaddr_entry_map.get(vaddr));
279
280    if let Ok(branches) = branch::get_branches(&vaddrs, &insns, &regs, dwarf) {
281        let _ = branch::write_branch_coverage(&branches, regs_path, src_paths);
282    }
283
284    // smoelius: A `vaddr` could not have an entry because its file does not exist. Keep only those
285    // `vaddr`s that have entries.
286    let vaddrs = vaddrs
287        .into_iter()
288        .filter(|vaddr| dwarf.vaddr_entry_map.contains_key(vaddr))
289        .collect::<Vec<_>>();
290
291    eprintln!("Line hits: {}", vaddrs.len());
292
293    let file_line_count_map = build_file_line_count_map(&dwarf.vaddr_entry_map, vaddrs);
294
295    write_lcov_file(regs_path, file_line_count_map).map(Outcome::Lcov)
296}
297
298fn build_vaddr_entry_map<'a>(
299    loader: &'a Loader,
300    debug_path: &Path,
301    src_paths: &HashSet<PathBuf>,
302    trace_disassemble: bool,
303) -> Result<VaddrEntryMap<'a>> {
304    let mut vaddr_entry_map = VaddrEntryMap::new();
305    let metadata = metadata(debug_path)?;
306    for vaddr in (0..metadata.len()).step_by(size_of::<u64>()) {
307        let location = loader.find_location(vaddr).map_err(|error| {
308            anyhow!("failed to find location for address 0x{vaddr:x}: {}", error)
309        })?;
310        let Some(location) = location else {
311            continue;
312        };
313        let Some(file) = location.file else {
314            continue;
315        };
316        if !trace_disassemble {
317            // smoelius: Ignore files that do not exist.
318            if !Path::new(file).try_exists()? {
319                continue;
320            }
321            // procdump: ignore files other than what user has provided.
322            if !src_paths
323                .iter()
324                .any(|src_path| file.starts_with(&src_path.to_string_lossy().to_string()))
325            {
326                continue;
327            }
328        }
329        let Some(line) = location.line else {
330            continue;
331        };
332        // smoelius: Even though we ignore columns, fetch them should we ever want to act on them.
333        // let Some(_column) = location.column else {
334        //     continue;
335        // };
336        let entry = vaddr_entry_map.entry(vaddr).or_default();
337        entry.file = file;
338        entry.line = line;
339    }
340    Ok(vaddr_entry_map)
341}
342
343fn dump_vaddr_entry_map(vaddr_entry_map: BTreeMap<u64, Entry<'_>>) {
344    let mut prev = String::new();
345    for (vaddr, Entry { file, line }) in vaddr_entry_map {
346        let curr = format!("{file}:{line}");
347        if prev != curr {
348            eprintln!("0x{vaddr:x}: {curr}");
349            prev = curr;
350        }
351    }
352}
353
354fn read_insns(insns_path: &Path) -> Result<Insns> {
355    let mut insns = Vec::new();
356    let mut insns_file = File::open(insns_path)?;
357    while let Ok(insn) = insns_file.read_u64::<LittleEndian>() {
358        insns.push(insn);
359    }
360    Ok(insns)
361}
362
363fn read_vaddrs(regs_path: &Path) -> Result<(Vaddrs, Regs)> {
364    let mut regs = Regs::new();
365    let mut vaddrs = Vaddrs::new();
366    let mut regs_file = File::open(regs_path)?;
367
368    let mut data_trace = [0u64; 12];
369    'outer: loop {
370        for item in &mut data_trace {
371            match regs_file.read_u64::<LittleEndian>() {
372                Err(_) => break 'outer,
373                Ok(reg) => *item = reg,
374            }
375        }
376
377        // NB: the pc is instruction indexed, not byte indexed, keeps it aligned to 8 bytes - hence << 3 -> *8
378        let vaddr = data_trace[11] << 3;
379
380        vaddrs.push(vaddr);
381        regs.push(data_trace);
382    }
383
384    Ok((vaddrs, regs))
385}
386
387fn find_applicable_dwarf<'a>(
388    dwarfs: &'a [Dwarf],
389    regs_path: &Path,
390    exec_sha256: &str,
391    vaddrs: &mut [u64],
392) -> Result<&'a Dwarf> {
393    let dwarf = dwarfs
394        .iter()
395        .find(|dwarf| dwarf.so_hash == exec_sha256)
396        .ok_or(anyhow!(
397            "Cannot find the shared object that corresponds to: {}",
398            exec_sha256
399        ))?;
400
401    eprintln!(
402        "Matched: {} -> {} (exec sha256: {})",
403        regs_path.strip_current_dir().display(),
404        dwarf.debug_path.path.strip_current_dir().display(),
405        &dwarf.so_hash[..16],
406    );
407    let vaddr_first = *vaddrs.first().ok_or(anyhow!("Vaddrs is empty!"))?;
408    assert!(dwarf.start_address >= vaddr_first);
409    let shift = dwarf.start_address - vaddr_first;
410
411    // smoelius: Make the shift "permanent".
412    for vaddr in vaddrs.iter_mut() {
413        *vaddr += shift;
414    }
415
416    Ok(dwarf)
417}
418
419fn build_file_line_count_map<'a>(
420    vaddr_entry_map: &BTreeMap<u64, Entry<'a>>,
421    vaddrs: Vaddrs,
422) -> FileLineCountMap<'a> {
423    let mut file_line_count_map = FileLineCountMap::new();
424    for Entry { file, line } in vaddr_entry_map.values() {
425        let line_count_map = file_line_count_map.entry(file).or_default();
426        line_count_map.insert(*line, 0);
427    }
428
429    for vaddr in vaddrs {
430        // smoelius: A `vaddr` could not have an entry because its file does not exist.
431        let Some(entry) = vaddr_entry_map.get(&vaddr) else {
432            continue;
433        };
434        let Some(line_count_map) = file_line_count_map.get_mut(entry.file) else {
435            continue;
436        };
437        let Some(count) = line_count_map.get_mut(&entry.line) else {
438            continue;
439        };
440        *count += 1;
441    }
442
443    file_line_count_map
444}
445
446fn write_lcov_file(regs_path: &Path, file_line_count_map: FileLineCountMap<'_>) -> Result<PathBuf> {
447    let lcov_path = regs_path.with_extension("lcov");
448
449    let mut file = OpenOptions::new()
450        .create(true)
451        .truncate(true)
452        .write(true)
453        .open(&lcov_path)?;
454
455    for (source_file, line_count_map) in file_line_count_map {
456        // smoelius: Stripping `current_dir` from `source_file` has not effect on what's displayed.
457        writeln!(file, "SF:{source_file}")?;
458        for (line, count) in line_count_map {
459            writeln!(file, "DA:{line},{count}")?;
460        }
461        writeln!(file, "end_of_record")?;
462    }
463
464    Ok(lcov_path)
465}