Skip to main content

sbpf_coverage/
lib.rs

1use addr2line::Loader;
2use anyhow::{Result, anyhow, bail};
3use byteorder::{LittleEndian, ReadBytesExt};
4use object::{Object, ObjectSection};
5use std::{
6    collections::{BTreeMap, HashSet},
7    fs::{File, OpenOptions, metadata},
8    io::Write,
9    path::{Path, PathBuf},
10};
11
12mod branch;
13mod trace_disassemble;
14
15mod start_address;
16use start_address::start_address;
17
18pub mod util;
19use util::StripCurrentDir;
20
21use crate::util::{compute_hash, find_files_with_extension};
22
23mod vaddr;
24
25#[derive(Clone, Debug, Default, Eq, PartialEq)]
26struct Entry<'a> {
27    file: &'a str,
28    line: u32,
29}
30
31struct Dwarf {
32    path: PathBuf,
33    #[allow(dead_code)]
34    so_path: PathBuf,
35    so_hash: String,
36    start_address: u64,
37    #[allow(dead_code, reason = "`vaddr` points into `loader`")]
38    loader: &'static Loader,
39    vaddr_entry_map: BTreeMap<u64, Entry<'static>>,
40}
41
42enum Outcome {
43    Lcov(PathBuf),
44    TraceDisassemble,
45}
46
47type Vaddrs = Vec<u64>;
48type Insns = Vec<u64>;
49type Regs = Vec<[u64; 12]>;
50
51type VaddrEntryMap<'a> = BTreeMap<u64, Entry<'a>>;
52
53type FileLineCountMap<'a> = BTreeMap<&'a str, BTreeMap<u32, usize>>;
54
55pub fn run(
56    sbf_trace_dir: PathBuf,
57    src_paths: HashSet<PathBuf>,
58    sbf_paths: Vec<PathBuf>,
59    debug: bool,
60    trace_disassemble: bool,
61    no_color: bool,
62) -> Result<()> {
63    let mut lcov_paths = Vec::new();
64
65    let debug_paths = debug_paths(sbf_paths)?;
66
67    let dwarfs = debug_paths
68        .into_iter()
69        .map(|path| build_dwarf(&path, &src_paths, trace_disassemble))
70        .collect::<Result<Vec<_>>>()
71        .expect("Can't build dwarf");
72
73    if dwarfs.is_empty() {
74        bail!("Found no .so/.debug/.so.debug files containing debug sections.");
75    }
76
77    if debug {
78        for dwarf in dwarfs {
79            dump_vaddr_entry_map(dwarf.vaddr_entry_map);
80        }
81        eprintln!("Exiting debug mode.");
82        return Ok(());
83    }
84
85    let regs_paths = find_files_with_extension(std::slice::from_ref(&sbf_trace_dir), "regs");
86    if regs_paths.is_empty() {
87        bail!(
88            "Found no regs files in: {}
89Are you sure you run your tests with register tracing enabled",
90            sbf_trace_dir.strip_current_dir().display(),
91        );
92    }
93
94    for regs_path in &regs_paths {
95        match process_regs_path(&dwarfs, regs_path, &src_paths, trace_disassemble, no_color) {
96            Ok(Outcome::Lcov(lcov_path)) => {
97                lcov_paths.push(lcov_path.strip_current_dir().to_path_buf());
98            }
99            Ok(Outcome::TraceDisassemble) => {
100                return Ok(());
101            }
102            _ => {
103                eprintln!(
104                    "Skipping Regs file: {} (no matching executable)",
105                    regs_path.strip_current_dir().display()
106                );
107            }
108        }
109    }
110
111    eprintln!(
112        "
113Processed {} of {} regs files
114
115Lcov files written: {lcov_paths:#?}
116
117If you are done generating lcov files, try running:
118
119    genhtml --output-directory coverage {}/*.lcov --rc branch_coverage=1 && open coverage/index.html
120",
121        lcov_paths.len(),
122        regs_paths.len(),
123        sbf_trace_dir.as_path().strip_current_dir().display()
124    );
125
126    Ok(())
127}
128
129fn debug_paths(sbf_paths: Vec<PathBuf>) -> Result<Vec<PathBuf>> {
130    // It's possible that the debug information is in the .so file itself
131    let so_files = find_files_with_extension(&sbf_paths, "so");
132    // It's also possible that it ends with .debug
133    let debug_files = find_files_with_extension(&sbf_paths, "debug");
134
135    let mut maybe_list = so_files;
136    maybe_list.extend(debug_files);
137
138    // Collect only those files that contain debug sections
139    let full_list = maybe_list
140        .into_iter()
141        .filter(|maybe_path| {
142            let Ok(data) = std::fs::read(maybe_path) else {
143                return false;
144            };
145            let Ok(object) = object::read::File::parse(&*data) else {
146                return false;
147            };
148            // check it has debug sections
149            object
150                .sections()
151                .any(|section| section.name().is_ok_and(|n| n.starts_with(".debug_")))
152        })
153        .collect();
154
155    eprintln!("Files containing debug sections: {:#?}", full_list);
156    Ok(full_list)
157}
158
159fn build_dwarf(
160    debug_path: &Path,
161    src_paths: &HashSet<PathBuf>,
162    trace_disassemble: bool,
163) -> Result<Dwarf> {
164    let start_address = start_address(debug_path)?;
165
166    let loader = Loader::new(debug_path).map_err(|error| {
167        anyhow!(
168            "failed to build loader for {}: {}",
169            debug_path.display(),
170            error
171        )
172    })?;
173
174    let loader = Box::leak(Box::new(loader));
175
176    eprintln!(
177        "Trying to build a DWARF entry with debug path: {}",
178        debug_path.strip_current_dir().display()
179    );
180
181    let vaddr_entry_map = build_vaddr_entry_map(loader, debug_path, src_paths, trace_disassemble)?;
182
183    // Suppose debug_path is program.debug, swap with .so and try
184    let mut so_path = debug_path.with_extension("so");
185    let so_content = match std::fs::read(&so_path) {
186        Err(e) => {
187            if e.kind() == std::io::ErrorKind::NotFound {
188                // We might have program.so.debug - simply cut debug and try
189                so_path = debug_path.with_extension("");
190                std::fs::read(&so_path)?
191            } else {
192                return Err(e.into());
193            }
194        }
195        Ok(c) => c,
196    };
197    let so_hash = compute_hash(&so_content);
198    eprintln!(
199        "Found a match:\n{} to\n{} (SHA-256: {})",
200        debug_path.strip_current_dir().display(),
201        so_path.strip_current_dir().display(),
202        &so_hash[..16],
203    );
204
205    Ok(Dwarf {
206        path: debug_path.to_path_buf(),
207        so_path,
208        so_hash,
209        start_address,
210        loader,
211        vaddr_entry_map,
212    })
213}
214
215fn process_regs_path(
216    dwarfs: &[Dwarf],
217    regs_path: &Path,
218    src_paths: &HashSet<PathBuf>,
219    trace_disassemble: bool,
220    no_color: bool,
221) -> Result<Outcome> {
222    eprintln!();
223    let exec_sha256 = std::fs::read_to_string(regs_path.with_extension("exec.sha256"))?;
224    eprintln!(
225        "Regs file: {} (expecting executable with SHA-256: {})",
226        regs_path.strip_current_dir().display(),
227        &exec_sha256[..16]
228    );
229
230    let (mut vaddrs, regs) = read_vaddrs(regs_path)?;
231    eprintln!("Regs read: {}", vaddrs.len());
232    let insns = read_insns(&regs_path.with_extension("insns"))?;
233
234    let dwarf = find_applicable_dwarf(dwarfs, regs_path, &exec_sha256, &mut vaddrs)?;
235
236    eprintln!(
237        "Applicable dwarf: {}",
238        dwarf.path.strip_current_dir().display()
239    );
240
241    assert!(
242        vaddrs
243            .first()
244            .is_some_and(|&vaddr| vaddr == dwarf.start_address)
245    );
246
247    if trace_disassemble {
248        return trace_disassemble::trace_disassemble(regs_path, &vaddrs, dwarf, !no_color);
249    }
250
251    // smoelius: If a sequence of Regs refer to the same file and line, treat them as
252    // one hit to that file and line.
253    // vaddrs.dedup_by_key::<_, Option<&Entry>>(|vaddr| dwarf.vaddr_entry_map.get(vaddr));
254
255    if let Ok(branches) = branch::get_branches(&vaddrs, &insns, &regs, dwarf) {
256        let _ = branch::write_branch_coverage(&branches, regs_path, src_paths);
257    }
258
259    // smoelius: A `vaddr` could not have an entry because its file does not exist. Keep only those
260    // `vaddr`s that have entries.
261    let vaddrs = vaddrs
262        .into_iter()
263        .filter(|vaddr| dwarf.vaddr_entry_map.contains_key(vaddr))
264        .collect::<Vec<_>>();
265
266    eprintln!("Line hits: {}", vaddrs.len());
267
268    let file_line_count_map = build_file_line_count_map(&dwarf.vaddr_entry_map, vaddrs);
269
270    write_lcov_file(regs_path, file_line_count_map).map(Outcome::Lcov)
271}
272
273fn build_vaddr_entry_map<'a>(
274    loader: &'a Loader,
275    debug_path: &Path,
276    src_paths: &HashSet<PathBuf>,
277    trace_disassemble: bool,
278) -> Result<VaddrEntryMap<'a>> {
279    let mut vaddr_entry_map = VaddrEntryMap::new();
280    let metadata = metadata(debug_path)?;
281    for vaddr in (0..metadata.len()).step_by(size_of::<u64>()) {
282        let location = loader.find_location(vaddr).map_err(|error| {
283            anyhow!("failed to find location for address 0x{vaddr:x}: {}", error)
284        })?;
285        let Some(location) = location else {
286            continue;
287        };
288        let Some(file) = location.file else {
289            continue;
290        };
291        if !trace_disassemble {
292            // smoelius: Ignore files that do not exist.
293            if !Path::new(file).try_exists()? {
294                continue;
295            }
296            // procdump: ignore files other than what user has provided.
297            if !src_paths
298                .iter()
299                .any(|src_path| file.starts_with(&src_path.to_string_lossy().to_string()))
300            {
301                continue;
302            }
303        }
304        let Some(line) = location.line else {
305            continue;
306        };
307        // smoelius: Even though we ignore columns, fetch them should we ever want to act on them.
308        // let Some(_column) = location.column else {
309        //     continue;
310        // };
311        let entry = vaddr_entry_map.entry(vaddr).or_default();
312        entry.file = file;
313        entry.line = line;
314    }
315    Ok(vaddr_entry_map)
316}
317
318fn dump_vaddr_entry_map(vaddr_entry_map: BTreeMap<u64, Entry<'_>>) {
319    let mut prev = String::new();
320    for (vaddr, Entry { file, line }) in vaddr_entry_map {
321        let curr = format!("{file}:{line}");
322        if prev != curr {
323            eprintln!("0x{vaddr:x}: {curr}");
324            prev = curr;
325        }
326    }
327}
328
329fn read_insns(insns_path: &Path) -> Result<Insns> {
330    let mut insns = Vec::new();
331    let mut insns_file = File::open(insns_path)?;
332    while let Ok(insn) = insns_file.read_u64::<LittleEndian>() {
333        insns.push(insn);
334    }
335    Ok(insns)
336}
337
338fn read_vaddrs(regs_path: &Path) -> Result<(Vaddrs, Regs)> {
339    let mut regs = Regs::new();
340    let mut vaddrs = Vaddrs::new();
341    let mut regs_file = File::open(regs_path)?;
342
343    let mut data_trace = [0u64; 12];
344    'outer: loop {
345        for item in &mut data_trace {
346            match regs_file.read_u64::<LittleEndian>() {
347                Err(_) => break 'outer,
348                Ok(reg) => *item = reg,
349            }
350        }
351
352        // NB: the pc is instruction indexed, not byte indexed, keeps it aligned to 8 bytes - hence << 3 -> *8
353        let vaddr = data_trace[11] << 3;
354
355        vaddrs.push(vaddr);
356        regs.push(data_trace);
357    }
358
359    Ok((vaddrs, regs))
360}
361
362fn find_applicable_dwarf<'a>(
363    dwarfs: &'a [Dwarf],
364    regs_path: &Path,
365    exec_sha256: &str,
366    vaddrs: &mut [u64],
367) -> Result<&'a Dwarf> {
368    let dwarf = dwarfs
369        .iter()
370        .find(|dwarf| dwarf.so_hash == exec_sha256)
371        .ok_or(anyhow!(
372            "Cannot find the shared object that corresponds to: {}",
373            exec_sha256
374        ))?;
375
376    eprintln!(
377        "Matching Regs file {} to executable with SHA-256: {}",
378        regs_path.strip_current_dir().display(),
379        &dwarf.so_hash[..16]
380    );
381    let vaddr_first = *vaddrs.first().ok_or(anyhow!("Vaddrs is empty!"))?;
382    assert!(dwarf.start_address >= vaddr_first);
383    let shift = dwarf.start_address - vaddr_first;
384
385    // smoelius: Make the shift "permanent".
386    for vaddr in vaddrs.iter_mut() {
387        *vaddr += shift;
388    }
389
390    Ok(dwarf)
391}
392
393fn build_file_line_count_map<'a>(
394    vaddr_entry_map: &BTreeMap<u64, Entry<'a>>,
395    vaddrs: Vaddrs,
396) -> FileLineCountMap<'a> {
397    let mut file_line_count_map = FileLineCountMap::new();
398    for Entry { file, line } in vaddr_entry_map.values() {
399        let line_count_map = file_line_count_map.entry(file).or_default();
400        line_count_map.insert(*line, 0);
401    }
402
403    for vaddr in vaddrs {
404        // smoelius: A `vaddr` could not have an entry because its file does not exist.
405        let Some(entry) = vaddr_entry_map.get(&vaddr) else {
406            continue;
407        };
408        let Some(line_count_map) = file_line_count_map.get_mut(entry.file) else {
409            continue;
410        };
411        let Some(count) = line_count_map.get_mut(&entry.line) else {
412            continue;
413        };
414        *count += 1;
415    }
416
417    file_line_count_map
418}
419
420fn write_lcov_file(regs_path: &Path, file_line_count_map: FileLineCountMap<'_>) -> Result<PathBuf> {
421    let lcov_path = regs_path.with_extension("lcov");
422
423    let mut file = OpenOptions::new()
424        .create(true)
425        .truncate(true)
426        .write(true)
427        .open(&lcov_path)?;
428
429    for (source_file, line_count_map) in file_line_count_map {
430        // smoelius: Stripping `current_dir` from `source_file` has not effect on what's displayed.
431        writeln!(file, "SF:{source_file}")?;
432        for (line, count) in line_count_map {
433            writeln!(file, "DA:{line},{count}")?;
434        }
435        writeln!(file, "end_of_record")?;
436    }
437
438    Ok(lcov_path)
439}