cargo_crap/merge.rs
1//! Join complexity data (per-function) with coverage data (per-file) into
2//! CRAP entries.
3//!
4//! ## The path-matching problem
5//!
6//! This is where the silent failure mode lives. The complexity pass gives
7//! us absolute paths (whatever was passed to `analyze_tree`). LCOV files
8//! can contain:
9//!
10//! 1. **Absolute paths** — `/home/alice/project/src/foo.rs`
11//! 2. **Workspace-relative paths** — `src/foo.rs`
12//! 3. **Crate-relative paths in a workspace** — `crates/core/src/foo.rs`
13//! 4. **Paths with `./` or `../` components** — `./src/foo.rs`
14//!
15//! `cargo llvm-cov` by default emits workspace-relative paths. `cargo tarpaulin`
16//! emits absolute paths. CI systems with symlinked or containerized
17//! checkouts mix both. A naïve `HashMap<PathBuf, _>` lookup will silently
18//! return `None` for 100% of files and report every function as "0%
19//! covered" — which is exactly the class of bug where a green CI suddenly
20//! starts red-lining a whole codebase.
21//!
22//! Our strategy: build a lookup keyed on **canonicalized suffix matches**.
23//! For every coverage path we can't canonicalize (because it's relative),
24//! we try progressively shorter suffixes against canonical complexity paths.
25
26use crate::complexity::FunctionComplexity;
27use crate::coverage::FileCoverage;
28use crate::score::crap;
29use serde::{Deserialize, Serialize};
30use std::collections::{HashMap, HashSet};
31use std::path::{Path, PathBuf};
32
33/// One row in the final report.
34#[derive(Debug, Clone, Serialize, serde::Deserialize)]
35pub struct CrapEntry {
36 pub file: PathBuf,
37 pub function: String,
38 pub line: usize,
39 pub cyclomatic: f64,
40 /// Percentage; may be `None` if we could not find coverage data for
41 /// this file at all. That's different from "0% covered" — it means the
42 /// coverage report didn't mention the file.
43 pub coverage: Option<f64>,
44 pub crap: f64,
45 /// Cargo workspace member name, set by `--workspace` runs after the
46 /// entry's file path has been suffix-matched against a member root.
47 /// Always `None` for non-workspace runs and for older baselines that
48 /// pre-date this field.
49 #[serde(rename = "crate", default, skip_serializing_if = "Option::is_none")]
50 pub crate_name: Option<String>,
51}
52
53/// How to treat functions we have complexity data for but no coverage data.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
55#[serde(rename_all = "lowercase")]
56pub enum MissingCoveragePolicy {
57 /// Assume 0% coverage. Pessimistic — good for CI gates, where unmapped
58 /// files are a red flag worth surfacing.
59 Pessimistic,
60 /// Assume 100% coverage. Optimistic — suitable for interactive use where
61 /// you've scoped coverage to a subset of the tree intentionally.
62 Optimistic,
63 /// Skip the function entirely; don't emit a row.
64 Skip,
65}
66
67/// Output of [`merge`]: the scored entries plus any source files that had no
68/// matching entry in the LCOV report.
69pub struct MergeResult {
70 /// CRAP entries sorted by score descending.
71 pub entries: Vec<CrapEntry>,
72 /// Source files for which no coverage data could be found in the LCOV
73 /// report. Only populated when a non-empty coverage map was provided.
74 /// Non-empty here is a strong signal of a path-matching problem.
75 pub unmapped_files: Vec<PathBuf>,
76}
77
78/// Merge complexity and coverage data into a sorted [`MergeResult`]
79/// (entries ranked highest score first).
80#[expect(
81 clippy::needless_pass_by_value,
82 reason = "callers always have a fresh HashMap they don't reuse; taking by value matches the consuming pipeline and avoids `&cov` boilerplate at every call site"
83)]
84#[must_use]
85pub fn merge(
86 complexity: Vec<FunctionComplexity>,
87 coverage: HashMap<PathBuf, FileCoverage>,
88 policy: MissingCoveragePolicy,
89) -> MergeResult {
90 let index = PathIndex::build(&coverage);
91 let has_coverage = !coverage.is_empty();
92
93 let mut mapped_files: HashSet<PathBuf> = HashSet::new();
94 let mut seen_files: HashSet<PathBuf> = HashSet::new();
95
96 let mut entries: Vec<CrapEntry> = complexity
97 .into_iter()
98 .filter_map(|fc| {
99 let cov = index
100 .lookup(&fc.file)
101 .map(|cov_file| cov_file.coverage_in_span(fc.start_line, fc.end_line));
102
103 if has_coverage {
104 if cov.is_some() {
105 mapped_files.insert(fc.file.clone());
106 }
107 seen_files.insert(fc.file.clone());
108 }
109
110 let cov_for_scoring = match (cov, policy) {
111 (Some(c), _) => c,
112 (None, MissingCoveragePolicy::Pessimistic) => 0.0,
113 (None, MissingCoveragePolicy::Optimistic) => 100.0,
114 (None, MissingCoveragePolicy::Skip) => return None,
115 };
116
117 let crap_score = crap(fc.cyclomatic, cov_for_scoring);
118 Some(CrapEntry {
119 file: fc.file,
120 function: fc.name,
121 line: fc.start_line,
122 cyclomatic: fc.cyclomatic,
123 coverage: cov,
124 crap: crap_score,
125 crate_name: None,
126 })
127 })
128 .collect();
129
130 entries.sort_by(|a, b| {
131 b.crap
132 .partial_cmp(&a.crap)
133 .unwrap_or(std::cmp::Ordering::Equal)
134 });
135
136 let mut unmapped_files: Vec<PathBuf> = seen_files
137 .into_iter()
138 .filter(|f| !mapped_files.contains(f))
139 .collect();
140 unmapped_files.sort();
141
142 MergeResult {
143 entries,
144 unmapped_files,
145 }
146}
147
148/// A path lookup index that handles absolute-vs-relative mismatches between
149/// the complexity pass (which has whatever was on the command line) and the
150/// coverage file (which has whatever the coverage tool decided to write).
151struct PathIndex<'a> {
152 /// Canonicalized absolute paths → coverage data. Fast path.
153 by_absolute: HashMap<PathBuf, &'a FileCoverage>,
154 /// Original (possibly relative) paths kept for suffix matching. We keep
155 /// them as `(full_path, coverage)` so we can suffix-compare cheaply.
156 by_relative: Vec<(PathBuf, &'a FileCoverage)>,
157}
158
159impl<'a> PathIndex<'a> {
160 fn build(coverage: &'a HashMap<PathBuf, FileCoverage>) -> Self {
161 let mut by_absolute = HashMap::new();
162 let mut by_relative = Vec::new();
163
164 for (raw_path, cov) in coverage {
165 // CRITICAL: we only canonicalize *absolute* paths here. A relative
166 // path like `src/lib.rs` in an LCOV file means "some file whose
167 // component-suffix is this" — it must NOT be resolved against the
168 // caller's CWD, because the CWD is an accident of invocation.
169 // Early versions of this code called `canonicalize()` unconditionally;
170 // if the CWD happened to contain a matching path, the coverage
171 // entry would silently bind to the wrong file and every real
172 // function would come back as 0% covered. The integration test
173 // `end_to_end_pipeline_produces_ranked_scores` exists specifically
174 // to catch a regression back into that behavior.
175 if raw_path.is_absolute() {
176 match raw_path.canonicalize() {
177 Ok(abs) => {
178 by_absolute.insert(abs, cov);
179 },
180 Err(_) => {
181 // Absolute but non-existent (e.g., coverage was
182 // produced in a container at a different path).
183 // Fall back to suffix matching.
184 by_relative.push((raw_path.clone(), cov));
185 },
186 }
187 } else {
188 by_relative.push((raw_path.clone(), cov));
189 }
190 }
191
192 Self {
193 by_absolute,
194 by_relative,
195 }
196 }
197
198 fn lookup(
199 &self,
200 query: &Path,
201 ) -> Option<&'a FileCoverage> {
202 // Fast path: direct canonical match.
203 if let Ok(abs) = query.canonicalize()
204 && let Some(cov) = self.by_absolute.get(&abs)
205 {
206 return Some(*cov);
207 }
208
209 // Slow path: suffix match. A coverage path `src/foo.rs` matches a
210 // complexity path `.../project/src/foo.rs` if the former is a
211 // component-wise suffix of the latter.
212 for (rel, cov) in &self.by_relative {
213 if path_has_suffix(query, rel) {
214 return Some(*cov);
215 }
216 }
217
218 None
219 }
220}
221
222/// True if `haystack` ends with `needle`, compared component by component.
223///
224/// This is stricter than a byte-level `ends_with`: `foo/bar.rs` must not
225/// match `oofoo/bar.rs`. Cross-platform separators are handled because
226/// `Path::components` normalizes them.
227fn path_has_suffix(
228 haystack: &Path,
229 needle: &Path,
230) -> bool {
231 let hay: Vec<_> = haystack.components().collect();
232 let nee: Vec<_> = needle.components().collect();
233 if nee.len() > hay.len() {
234 return false;
235 }
236 hay[hay.len() - nee.len()..] == nee[..]
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242 use std::collections::BTreeMap;
243 use std::path::PathBuf;
244
245 fn cov_with(lines: &[(u32, u64)]) -> FileCoverage {
246 FileCoverage {
247 lines: lines.iter().copied().collect::<BTreeMap<_, _>>(),
248 }
249 }
250
251 #[test]
252 fn suffix_match_works_for_relative_coverage_paths() {
253 // Simulates the realistic case: coverage file was generated with
254 // `cargo llvm-cov` in the workspace root, producing relative paths.
255 let mut cov_map = HashMap::new();
256 cov_map.insert(PathBuf::from("src/foo.rs"), cov_with(&[(10, 1), (11, 1)]));
257 let index = PathIndex::build(&cov_map);
258
259 let complexity_path = PathBuf::from("/home/alice/project/src/foo.rs");
260 let result = index.lookup(&complexity_path);
261 assert!(result.is_some(), "expected suffix match to succeed");
262 }
263
264 #[test]
265 fn suffix_match_rejects_partial_component_matches() {
266 // `oofoo.rs` should NOT match `foo.rs` — that's a byte-level
267 // ends_with bug we're explicitly avoiding.
268 let a = PathBuf::from("/project/src/oofoo.rs");
269 let b = PathBuf::from("foo.rs");
270 assert!(!path_has_suffix(&a, &b));
271 }
272
273 #[test]
274 fn equal_length_paths_match_when_identical() {
275 // Kills: replace > with == and > with >= in the nee.len() > hay.len() guard.
276 // If the guard fired for equal-length paths, identical paths would return false.
277 let a = PathBuf::from("/project/src/foo.rs");
278 let b = PathBuf::from("/project/src/foo.rs");
279 assert!(
280 path_has_suffix(&a, &b),
281 "identical paths must match as a suffix"
282 );
283 }
284
285 #[test]
286 fn longer_needle_does_not_match() {
287 // Needle longer than haystack must always return false.
288 let hay = PathBuf::from("src/foo.rs");
289 let needle = PathBuf::from("/abs/project/src/foo.rs");
290 assert!(!path_has_suffix(&hay, &needle));
291 }
292
293 #[test]
294 fn merge_sorts_by_descending_crap() {
295 let complexity = vec![
296 FunctionComplexity {
297 file: PathBuf::from("a.rs"),
298 name: "easy".into(),
299 start_line: 1,
300 end_line: 3,
301 cyclomatic: 1.0,
302 },
303 FunctionComplexity {
304 file: PathBuf::from("a.rs"),
305 name: "hard".into(),
306 start_line: 10,
307 end_line: 30,
308 cyclomatic: 10.0,
309 },
310 ];
311 let result = merge(
312 complexity,
313 HashMap::new(),
314 MissingCoveragePolicy::Pessimistic,
315 );
316 assert_eq!(result.entries[0].function, "hard");
317 assert_eq!(result.entries[1].function, "easy");
318 }
319
320 #[test]
321 fn skip_policy_drops_rows_without_coverage() {
322 let complexity = vec![FunctionComplexity {
323 file: PathBuf::from("nowhere.rs"),
324 name: "foo".into(),
325 start_line: 1,
326 end_line: 5,
327 cyclomatic: 3.0,
328 }];
329 let result = merge(complexity, HashMap::new(), MissingCoveragePolicy::Skip);
330 assert!(result.entries.is_empty());
331 }
332
333 #[test]
334 fn relative_coverage_paths_are_not_resolved_against_cwd() {
335 // REGRESSION TEST. A relative path in the coverage file must never
336 // be canonicalized against the process's CWD, because that causes a
337 // silent-binding bug: `src/lib.rs` in LCOV would resolve to
338 // `<cwd>/src/lib.rs` (which likely exists — it's the tool's own
339 // source), and then the lookup for a DIFFERENT file ending in
340 // `src/lib.rs` would miss, returning `None` for every function.
341 //
342 // We construct exactly this scenario: a relative coverage path that
343 // happens to match something real under CWD, and a complexity path
344 // that is the "intended" target elsewhere.
345 let mut cov_map = HashMap::new();
346 cov_map.insert(PathBuf::from("src/lib.rs"), cov_with(&[(10, 1)]));
347 let index = PathIndex::build(&cov_map);
348
349 // The relative path must live in `by_relative`, NOT `by_absolute`,
350 // even if a file by that relative name happens to exist under CWD.
351 assert!(
352 index.by_absolute.is_empty(),
353 "relative coverage paths must not populate by_absolute"
354 );
355 assert_eq!(index.by_relative.len(), 1);
356
357 // Lookup for an unrelated absolute path ending in src/lib.rs must
358 // succeed via suffix match.
359 let found = index.lookup(Path::new("/somewhere/else/src/lib.rs"));
360 assert!(found.is_some());
361 }
362
363 #[test]
364 fn unmapped_files_reported_when_lcov_provided() {
365 let mut cov_map = HashMap::new();
366 cov_map.insert(PathBuf::from("src/foo.rs"), cov_with(&[(1, 1)]));
367
368 let complexity = vec![
369 FunctionComplexity {
370 file: PathBuf::from("/project/src/foo.rs"),
371 name: "matched".into(),
372 start_line: 1,
373 end_line: 3,
374 cyclomatic: 1.0,
375 },
376 FunctionComplexity {
377 file: PathBuf::from("/project/src/bar.rs"),
378 name: "unmatched".into(),
379 start_line: 1,
380 end_line: 3,
381 cyclomatic: 1.0,
382 },
383 ];
384
385 let result = merge(complexity, cov_map, MissingCoveragePolicy::Pessimistic);
386 assert_eq!(
387 result.unmapped_files,
388 vec![PathBuf::from("/project/src/bar.rs")]
389 );
390 }
391
392 #[test]
393 fn no_unmapped_files_when_no_lcov_provided() {
394 let complexity = vec![FunctionComplexity {
395 file: PathBuf::from("src/foo.rs"),
396 name: "foo".into(),
397 start_line: 1,
398 end_line: 3,
399 cyclomatic: 1.0,
400 }];
401 let result = merge(
402 complexity,
403 HashMap::new(),
404 MissingCoveragePolicy::Pessimistic,
405 );
406 assert!(
407 result.unmapped_files.is_empty(),
408 "no lcov → no unmapped warnings"
409 );
410 }
411}