use std::collections::BTreeSet;
use std::path::Path;
use anyhow::{Context, Result};
use camino::Utf8PathBuf;
use serde::{Deserialize, Serialize};
#[derive(Deserialize)]
pub struct CoverageExport {
pub data: Vec<CoverageData>,
}
#[derive(Deserialize)]
pub struct CoverageData {
#[serde(default)]
pub functions: Vec<CoverageFunction>,
}
#[derive(Deserialize)]
pub struct CoverageFunction {
#[serde(default)]
pub count: u64,
#[serde(default)]
pub filenames: Vec<String>,
#[serde(default)]
pub regions: Vec<CoverageRegion>,
}
pub const CRATE_ROOT_SENTINEL_END: i64 = i64::MAX;
#[derive(Debug)]
pub struct CoverageRegion {
pub line_start: i64,
pub line_end: i64,
pub count: u64,
pub file_id: usize,
}
impl<'de> Deserialize<'de> for CoverageRegion {
fn deserialize<D: serde::Deserializer<'de>>(d: D) -> std::result::Result<Self, D::Error> {
let raw: Vec<serde_json::Value> = Vec::deserialize(d)?;
let get_u64 = |i: usize| -> std::result::Result<u64, D::Error> {
raw.get(i)
.and_then(|v| v.as_u64())
.ok_or_else(|| serde::de::Error::custom(format!("region missing field {i}")))
};
Ok(CoverageRegion {
line_start: get_u64(0)? as i64,
line_end: get_u64(2)? as i64,
count: get_u64(4)?,
file_id: get_u64(5)? as usize,
})
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct HitRange {
pub file: Utf8PathBuf,
pub line_start: i64,
pub line_end: i64,
}
impl HitRange {
pub fn sentinel(file: Utf8PathBuf) -> Self {
Self {
file,
line_start: 1,
line_end: CRATE_ROOT_SENTINEL_END,
}
}
}
pub fn to_db_relative(path: &Path) -> Option<Utf8PathBuf> {
let utf8 = Utf8PathBuf::try_from(path.to_path_buf()).ok()?;
if cfg!(windows) && utf8.as_str().contains('\\') {
Some(Utf8PathBuf::from(utf8.as_str().replace('\\', "/")))
} else {
Some(utf8)
}
}
pub fn extract_hit_ranges(json: &str, canonical_root: &Path) -> Result<BTreeSet<HitRange>> {
let export: CoverageExport =
serde_json::from_str(json).context("failed to parse llvm-cov export JSON")?;
let mut per_file: Vec<(usize, i64, i64)> = Vec::new();
let mut ranges = BTreeSet::new();
for data in &export.data {
for func in &data.functions {
if func.count == 0 {
continue;
}
per_file.clear();
for region in &func.regions {
if region.count == 0 {
continue;
}
if let Some(entry) = per_file.iter_mut().find(|(id, _, _)| *id == region.file_id) {
entry.1 = entry.1.min(region.line_start);
entry.2 = entry.2.max(region.line_end);
} else {
per_file.push((region.file_id, region.line_start, region.line_end));
}
}
for &(file_id, start, end) in &per_file {
let Some(filename) = func.filenames.get(file_id) else {
continue;
};
let path = Path::new(filename);
let Ok(rel) = path.strip_prefix(canonical_root) else {
continue;
};
let Some(utf8) = to_db_relative(rel) else {
continue;
};
ranges.insert(HitRange {
file: utf8,
line_start: start,
line_end: end,
});
}
}
}
Ok(ranges)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extracts_function_ranges_per_file() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), "").unwrap();
std::fs::write(root.join("src/utils.rs"), "").unwrap();
let canon = root.canonicalize().unwrap();
let abs_lib = root.join("src/lib.rs").canonicalize().unwrap();
let abs_utils = root.join("src/utils.rs").canonicalize().unwrap();
let abs_lib = serde_json::to_string(&abs_lib.display().to_string()).unwrap();
let abs_utils = serde_json::to_string(&abs_utils.display().to_string()).unwrap();
let json = format!(
r#"{{
"data": [{{
"functions": [
{{
"count": 1,
"filenames": [{abs_lib}],
"regions": [
[10, 0, 12, 0, 5, 0, 0, 0],
[11, 0, 15, 0, 3, 0, 0, 0]
]
}},
{{
"count": 1,
"filenames": [{abs_lib}],
"regions": [
[20, 0, 25, 0, 1, 0, 0, 0],
[22, 0, 23, 0, 0, 0, 0, 0]
]
}},
{{
"count": 1,
"filenames": [{abs_utils}],
"regions": [[5, 0, 7, 0, 2, 0, 0, 0]]
}},
{{
"count": 0,
"filenames": [{abs_lib}],
"regions": [[100, 0, 200, 0, 0, 0, 0, 0]]
}},
{{
"count": 1,
"filenames": ["/rustc/abc/library/std/src/io.rs"],
"regions": [[1, 0, 5, 0, 1, 0, 0, 0]]
}}
]
}}]
}}"#,
);
let ranges = extract_hit_ranges(&json, &canon).unwrap();
let expected: BTreeSet<HitRange> = [
HitRange {
file: Utf8PathBuf::from("src/lib.rs"),
line_start: 10,
line_end: 15,
},
HitRange {
file: Utf8PathBuf::from("src/lib.rs"),
line_start: 20,
line_end: 25,
},
HitRange {
file: Utf8PathBuf::from("src/utils.rs"),
line_start: 5,
line_end: 7,
},
]
.into_iter()
.collect();
assert_eq!(ranges, expected);
}
#[test]
fn dedupes_generic_monomorphizations() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), "").unwrap();
let canon = root.canonicalize().unwrap();
let abs = root.join("src/lib.rs").canonicalize().unwrap();
let abs = serde_json::to_string(&abs.display().to_string()).unwrap();
let json = format!(
r#"{{
"data": [{{
"functions": [
{{
"count": 1,
"filenames": [{abs}],
"regions": [[1, 0, 5, 0, 1, 0, 0, 0]]
}},
{{
"count": 1,
"filenames": [{abs}],
"regions": [[1, 0, 5, 0, 1, 0, 0, 0]]
}}
]
}}]
}}"#,
);
let ranges = extract_hit_ranges(&json, &canon).unwrap();
assert_eq!(ranges.len(), 1);
}
#[test]
fn sentinel_uses_the_canonical_end_value() {
let r = HitRange::sentinel(Utf8PathBuf::from("src/lib.rs"));
assert_eq!(r.line_start, 1);
assert_eq!(r.line_end, CRATE_ROOT_SENTINEL_END);
assert_eq!(r.line_end, i64::MAX);
}
}