sbpf-coverage 0.2.12

use addr2line::gimli::{DW_AT_language, DW_AT_producer, DW_TAG_compile_unit};
pub use addr2line::{self, Loader};
use anyhow::{Result, anyhow, bail};
use byteorder::{LittleEndian, ReadBytesExt};
pub use object::{Object, ObjectSection};
use std::{
    collections::{BTreeMap, HashSet},
    fs::{File, OpenOptions, metadata},
    io::Write,
    path::{Path, PathBuf},
};

mod branch;
mod trace_disassemble;

mod start_address;
use start_address::start_address;

pub mod toolchain;
pub mod util;
use util::StripCurrentDir;

use crate::util::{
    compute_hash, find_files_with_extension, get_dwarf_attribute, get_section_start_address,
};

mod vaddr;

#[derive(Debug)]
pub struct DebugPath {
    pub path: PathBuf,
    pub producer: Option<String>,
    pub lang: Option<String>,
}

#[derive(Clone, Debug, Default, Eq, PartialEq)]
struct Entry<'a> {
    file: &'a str,
    line: u32,
}

struct Dwarf {
    debug_path: DebugPath,
    #[allow(dead_code)]
    so_path: PathBuf,
    so_hash: String,
    #[allow(dead_code, reason = "might be of use in the future")]
    start_address: u64,
    text_section_offset: u64,
    #[allow(dead_code, reason = "`vaddr` points into `loader`")]
    loader: &'static Loader,
    vaddr_entry_map: BTreeMap<u64, Entry<'static>>,
}

enum Outcome {
    Lcov(PathBuf),
    TraceDisassemble,
}

type Vaddrs = Vec<u64>;
type Insns = Vec<u64>;
type Regs = Vec<[u64; 12]>;

type VaddrEntryMap<'a> = BTreeMap<u64, Entry<'a>>;

type FileLineCountMap<'a> = BTreeMap<&'a str, BTreeMap<u32, usize>>;

pub fn run(
    sbf_trace_dir: PathBuf,
    src_paths: HashSet<PathBuf>,
    sbf_paths: Vec<PathBuf>,
    debug: bool,
    trace_disassemble: bool,
    no_color: bool,
) -> Result<()> {
    let mut lcov_paths = Vec::new();

    let debug_paths = debug_paths(sbf_paths)?;

    let dwarfs = debug_paths
        .into_iter()
        .map(|path| build_dwarf(path, &src_paths, trace_disassemble))
        .collect::<Result<Vec<_>>>()
        .expect("Can't build dwarf");

    if dwarfs.is_empty() {
        bail!("Found no .so/.debug/.so.debug files containing debug sections.");
    }

    if debug {
        for dwarf in dwarfs {
            dump_vaddr_entry_map(dwarf.vaddr_entry_map);
        }
        eprintln!("Exiting debug mode.");
        return Ok(());
    }

    let mut regs_paths = find_files_with_extension(std::slice::from_ref(&sbf_trace_dir), "regs");
    if regs_paths.is_empty() {
        bail!(
            "Found no regs files in: {}
Are you sure you run your tests with register tracing enabled",
            sbf_trace_dir.strip_current_dir().display(),
        );
    }
    // Sort paths by modification time.
    regs_paths.sort_by_key(|p| {
        std::fs::metadata(p)
            .and_then(|m| m.modified())
            .unwrap_or(std::time::UNIX_EPOCH)
    });

    for regs_path in &regs_paths {
        match process_regs_path(&dwarfs, regs_path, &src_paths, trace_disassemble, no_color) {
            Ok(Outcome::Lcov(lcov_path)) => {
                lcov_paths.push(lcov_path.strip_current_dir().to_path_buf());
            }
            Ok(Outcome::TraceDisassemble) => {}
            _ => {
                eprintln!(
                    "Skipping Regs file: {} (no matching executable)",
                    regs_path.strip_current_dir().display()
                );
            }
        }
    }

    if !trace_disassemble {
        eprintln!(
            "
Processed {} of {} regs files

Lcov files written: {lcov_paths:#?}

If you are done generating lcov files, try running:

    genhtml --output-directory coverage {}/*.lcov --rc branch_coverage=1 && open coverage/index.html
",
            lcov_paths.len(),
            regs_paths.len(),
            sbf_trace_dir.as_path().strip_current_dir().display()
        );
    }

    Ok(())
}

fn debug_paths(sbf_paths: Vec<PathBuf>) -> Result<Vec<DebugPath>> {
    // It's possible that the debug information is in the .so file itself
    let so_files = find_files_with_extension(&sbf_paths, "so");
    // It's also possible that it ends with .debug
    let debug_files = find_files_with_extension(&sbf_paths, "debug");

    let mut maybe_list = so_files;
    maybe_list.extend(debug_files);

    // Collect only those files that contain debug sections
    let full_list: Vec<DebugPath> = maybe_list
        .into_iter()
        .filter_map(|maybe_path| {
            let data = std::fs::read(&maybe_path).ok()?;
            let object = object::read::File::parse(&*data).ok()?;
            // check it has debug sections
            let has_debug = object
                .sections()
                .any(|section| section.name().is_ok_and(|n| n.starts_with(".debug_")));
            // get compiler information if any
            let producer = get_dwarf_attribute(&object, DW_TAG_compile_unit, DW_AT_producer).ok();
            // get lang information if any
            let lang = get_dwarf_attribute(&object, DW_TAG_compile_unit, DW_AT_language).ok();

            has_debug.then_some(DebugPath {
                path: maybe_path,
                producer,
                lang,
            })
        })
        .collect();

    eprintln!("Debug symbols found:");
    for dp in full_list.iter() {
        eprintln!(
            "  {} (producer: {}, lang: {})",
            dp.path.strip_current_dir().display(),
            dp.producer.as_deref().unwrap_or("unknown"),
            dp.lang.as_deref().unwrap_or("unknown"),
        );
    }
    Ok(full_list)
}

fn build_dwarf(
    debug_path: DebugPath,
    src_paths: &HashSet<PathBuf>,
    trace_disassemble: bool,
) -> Result<Dwarf> {
    let start_address = start_address(&debug_path.path)?;

    let loader = Loader::new(&debug_path.path).map_err(|error| {
        anyhow!(
            "failed to build loader for {}: {}",
            debug_path.path.display(),
            error
        )
    })?;

    let loader = Box::leak(Box::new(loader));

    let vaddr_entry_map =
        build_vaddr_entry_map(loader, &debug_path.path, src_paths, trace_disassemble)?;

    // Suppose debug_path is program.debug, swap with .so and try
    let mut so_path = debug_path.path.with_extension("so");
    let so_content = match std::fs::read(&so_path) {
        Err(e) => {
            if e.kind() == std::io::ErrorKind::NotFound {
                // We might have program.so.debug - simply cut debug and try
                so_path = debug_path.path.with_extension("");
                std::fs::read(&so_path)?
            } else {
                return Err(e.into());
            }
        }
        Ok(c) => c,
    };
    let so_hash = compute_hash(&so_content);
    eprintln!(
        "DWARF: {} -> {} (exec sha256: {})",
        debug_path.path.strip_current_dir().display(),
        so_path.strip_current_dir().display(),
        &so_hash[..16],
    );

    Ok(Dwarf {
        debug_path,
        so_path,
        so_hash,
        start_address,
        loader,
        vaddr_entry_map,
        text_section_offset: get_section_start_address(loader, ".text")?,
    })
}

fn process_regs_path(
    dwarfs: &[Dwarf],
    regs_path: &Path,
    src_paths: &HashSet<PathBuf>,
    trace_disassemble: bool,
    no_color: bool,
) -> Result<Outcome> {
    eprintln!();
    let exec_sha256 = std::fs::read_to_string(regs_path.with_extension("exec.sha256"))?;
    let (mut vaddrs, regs) = read_vaddrs(regs_path)?;
    eprintln!(
        "Regs: {} ({} entries, exec sha256: {})",
        regs_path.strip_current_dir().display(),
        vaddrs.len(),
        &exec_sha256[..16],
    );
    let insns = read_insns(&regs_path.with_extension("insns"))?;

    let dwarf = find_applicable_dwarf(dwarfs, regs_path, &exec_sha256, &mut vaddrs)?;

    if trace_disassemble {
        return trace_disassemble::trace_disassemble(
            src_paths, regs_path, &vaddrs, dwarf, !no_color,
        );
    }

    // smoelius: If a sequence of Regs refer to the same file and line, treat them as
    // one hit to that file and line.
    // vaddrs.dedup_by_key::<_, Option<&Entry>>(|vaddr| dwarf.vaddr_entry_map.get(vaddr));

    if let Ok(branches) = branch::get_branches(&vaddrs, &insns, &regs, dwarf) {
        let _ = branch::write_branch_coverage(&branches, regs_path, src_paths);
    }

    // smoelius: A `vaddr` could not have an entry because its file does not exist. Keep only those
    // `vaddr`s that have entries.
    let vaddrs = vaddrs
        .into_iter()
        .filter(|vaddr| dwarf.vaddr_entry_map.contains_key(vaddr))
        .collect::<Vec<_>>();

    eprintln!("Line hits: {}", vaddrs.len());

    let file_line_count_map = build_file_line_count_map(&dwarf.vaddr_entry_map, vaddrs);

    write_lcov_file(regs_path, file_line_count_map).map(Outcome::Lcov)
}

fn build_vaddr_entry_map<'a>(
    loader: &'a Loader,
    debug_path: &Path,
    src_paths: &HashSet<PathBuf>,
    trace_disassemble: bool,
) -> Result<VaddrEntryMap<'a>> {
    let mut vaddr_entry_map = VaddrEntryMap::new();
    let metadata = metadata(debug_path)?;
    for vaddr in (0..metadata.len()).step_by(size_of::<u64>()) {
        let location = loader.find_location(vaddr).map_err(|error| {
            anyhow!("failed to find location for address 0x{vaddr:x}: {}", error)
        })?;
        let Some(location) = location else {
            continue;
        };
        let Some(file) = location.file else {
            continue;
        };
        if !trace_disassemble {
            // smoelius: Ignore files that do not exist.
            if !Path::new(file).try_exists()? {
                continue;
            }
            // procdump: ignore files other than what user has provided.
            if !src_paths
                .iter()
                .any(|src_path| file.starts_with(&src_path.to_string_lossy().to_string()))
            {
                continue;
            }
        }
        let Some(line) = location.line else {
            continue;
        };
        // smoelius: Even though we ignore columns, fetch them should we ever want to act on them.
        // let Some(_column) = location.column else {
        //     continue;
        // };
        let entry = vaddr_entry_map.entry(vaddr).or_default();
        entry.file = file;
        entry.line = line;
    }
    Ok(vaddr_entry_map)
}

fn dump_vaddr_entry_map(vaddr_entry_map: BTreeMap<u64, Entry<'_>>) {
    let mut prev = String::new();
    for (vaddr, Entry { file, line }) in vaddr_entry_map {
        let curr = format!("{file}:{line}");
        if prev != curr {
            eprintln!("0x{vaddr:x}: {curr}");
            prev = curr;
        }
    }
}

fn read_insns(insns_path: &Path) -> Result<Insns> {
    let mut insns = Vec::new();
    let mut insns_file = File::open(insns_path)?;
    while let Ok(insn) = insns_file.read_u64::<LittleEndian>() {
        insns.push(insn);
    }
    Ok(insns)
}

fn read_vaddrs(regs_path: &Path) -> Result<(Vaddrs, Regs)> {
    let mut regs = Regs::new();
    let mut vaddrs = Vaddrs::new();
    let mut regs_file = File::open(regs_path)?;

    let mut data_trace = [0u64; 12];
    'outer: loop {
        for item in &mut data_trace {
            match regs_file.read_u64::<LittleEndian>() {
                Err(_) => break 'outer,
                Ok(reg) => *item = reg,
            }
        }

        // NB: the pc is instruction indexed, not byte indexed, keeps it aligned to 8 bytes - hence << 3 -> *8
        let vaddr = data_trace[11] << 3;

        vaddrs.push(vaddr);
        regs.push(data_trace);
    }

    Ok((vaddrs, regs))
}

fn find_applicable_dwarf<'a>(
    dwarfs: &'a [Dwarf],
    regs_path: &Path,
    exec_sha256: &str,
    vaddrs: &mut [u64],
) -> Result<&'a Dwarf> {
    let dwarf = dwarfs
        .iter()
        .find(|dwarf| dwarf.so_hash == exec_sha256)
        .ok_or(anyhow!(
            "Cannot find the shared object that corresponds to: {}",
            exec_sha256
        ))?;

    eprintln!(
        "Matched: {} -> {} (exec sha256: {})",
        regs_path.strip_current_dir().display(),
        dwarf.debug_path.path.strip_current_dir().display(),
        &dwarf.so_hash[..16],
    );

    // Raw vaddrs from the register dump are byte offsets without the text section base.
    // Add text_section_offset so they match the DWARF address space used for lookups.
    for vaddr in vaddrs.iter_mut() {
        *vaddr += dwarf.text_section_offset;
    }

    Ok(dwarf)
}

fn build_file_line_count_map<'a>(
    vaddr_entry_map: &BTreeMap<u64, Entry<'a>>,
    vaddrs: Vaddrs,
) -> FileLineCountMap<'a> {
    let mut file_line_count_map = FileLineCountMap::new();
    for Entry { file, line } in vaddr_entry_map.values() {
        let line_count_map = file_line_count_map.entry(file).or_default();
        line_count_map.insert(*line, 0);
    }

    for vaddr in vaddrs {
        // smoelius: A `vaddr` could not have an entry because its file does not exist.
        let Some(entry) = vaddr_entry_map.get(&vaddr) else {
            continue;
        };
        let Some(line_count_map) = file_line_count_map.get_mut(entry.file) else {
            continue;
        };
        let Some(count) = line_count_map.get_mut(&entry.line) else {
            continue;
        };
        *count += 1;
    }

    file_line_count_map
}

fn write_lcov_file(regs_path: &Path, file_line_count_map: FileLineCountMap<'_>) -> Result<PathBuf> {
    let lcov_path = regs_path.with_extension("lcov");

    let mut file = OpenOptions::new()
        .create(true)
        .truncate(true)
        .write(true)
        .open(&lcov_path)?;

    for (source_file, line_count_map) in file_line_count_map {
        // smoelius: Stripping `current_dir` from `source_file` has not effect on what's displayed.
        writeln!(file, "SF:{source_file}")?;
        for (line, count) in line_count_map {
            writeln!(file, "DA:{line},{count}")?;
        }
        writeln!(file, "end_of_record")?;
    }

    Ok(lcov_path)
}