Skip to main content

cargo_crap/
merge.rs

1//! Join complexity data (per-function) with coverage data (per-file) into
2//! CRAP entries.
3//!
4//! ## The path-matching problem
5//!
6//! This is where the silent failure mode lives. The complexity pass gives
7//! us absolute paths (whatever was passed to `analyze_tree`). LCOV files
8//! can contain:
9//!
10//! 1. **Absolute paths**  — `/home/alice/project/src/foo.rs`
11//! 2. **Workspace-relative paths** — `src/foo.rs`
12//! 3. **Crate-relative paths in a workspace** — `crates/core/src/foo.rs`
13//! 4. **Paths with `./` or `../` components** — `./src/foo.rs`
14//!
15//! `cargo llvm-cov` by default emits workspace-relative paths. `cargo tarpaulin`
16//! emits absolute paths. CI systems with symlinked or containerized
17//! checkouts mix both. A naïve `HashMap<PathBuf, _>` lookup will silently
18//! return `None` for 100% of files and report every function as "0%
19//! covered" — which is exactly the class of bug where a green CI suddenly
20//! starts red-lining a whole codebase.
21//!
22//! Our strategy: build a lookup keyed on **canonicalized suffix matches**.
23//! For every coverage path we can't canonicalize (because it's relative),
24//! we try progressively shorter suffixes against canonical complexity paths.
25
26use crate::complexity::FunctionComplexity;
27use crate::coverage::FileCoverage;
28use crate::score::crap;
29use serde::{Deserialize, Serialize};
30use std::collections::{HashMap, HashSet};
31use std::path::{Path, PathBuf};
32
33/// One row in the final report.
34#[derive(Debug, Clone, Serialize, serde::Deserialize)]
35pub struct CrapEntry {
36    pub file: PathBuf,
37    pub function: String,
38    pub line: usize,
39    pub cyclomatic: f64,
40    /// Percentage; may be `None` if we could not find coverage data for
41    /// this file at all. That's different from "0% covered" — it means the
42    /// coverage report didn't mention the file.
43    pub coverage: Option<f64>,
44    pub crap: f64,
45    /// Cargo workspace member name, set by `--workspace` runs after the
46    /// entry's file path has been suffix-matched against a member root.
47    /// Always `None` for non-workspace runs and for older baselines that
48    /// pre-date this field.
49    #[serde(rename = "crate", default, skip_serializing_if = "Option::is_none")]
50    pub crate_name: Option<String>,
51}
52
53/// How to treat functions we have complexity data for but no coverage data.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
55#[serde(rename_all = "lowercase")]
56pub enum MissingCoveragePolicy {
57    /// Assume 0% coverage. Pessimistic — good for CI gates, where unmapped
58    /// files are a red flag worth surfacing.
59    Pessimistic,
60    /// Assume 100% coverage. Optimistic — suitable for interactive use where
61    /// you've scoped coverage to a subset of the tree intentionally.
62    Optimistic,
63    /// Skip the function entirely; don't emit a row.
64    Skip,
65}
66
67/// Output of [`merge`]: the scored entries plus any source files that had no
68/// matching entry in the LCOV report.
69pub struct MergeResult {
70    /// CRAP entries sorted by score descending.
71    pub entries: Vec<CrapEntry>,
72    /// Source files for which no coverage data could be found in the LCOV
73    /// report. Only populated when a non-empty coverage map was provided.
74    /// Non-empty here is a strong signal of a path-matching problem.
75    pub unmapped_files: Vec<PathBuf>,
76}
77
78/// Merge complexity and coverage data into a sorted [`MergeResult`]
79/// (entries ranked highest score first).
80#[expect(
81    clippy::needless_pass_by_value,
82    reason = "callers always have a fresh HashMap they don't reuse; taking by value matches the consuming pipeline and avoids `&cov` boilerplate at every call site"
83)]
84#[must_use]
85pub fn merge(
86    complexity: Vec<FunctionComplexity>,
87    coverage: HashMap<PathBuf, FileCoverage>,
88    policy: MissingCoveragePolicy,
89) -> MergeResult {
90    let index = PathIndex::build(&coverage);
91    let has_coverage = !coverage.is_empty();
92
93    let mut mapped_files: HashSet<PathBuf> = HashSet::new();
94    let mut seen_files: HashSet<PathBuf> = HashSet::new();
95
96    let mut entries: Vec<CrapEntry> = complexity
97        .into_iter()
98        .filter_map(|fc| {
99            let cov = index
100                .lookup(&fc.file)
101                .map(|cov_file| cov_file.coverage_in_span(fc.start_line, fc.end_line));
102
103            if has_coverage {
104                if cov.is_some() {
105                    mapped_files.insert(fc.file.clone());
106                }
107                seen_files.insert(fc.file.clone());
108            }
109
110            let cov_for_scoring = match (cov, policy) {
111                (Some(c), _) => c,
112                (None, MissingCoveragePolicy::Pessimistic) => 0.0,
113                (None, MissingCoveragePolicy::Optimistic) => 100.0,
114                (None, MissingCoveragePolicy::Skip) => return None,
115            };
116
117            let crap_score = crap(fc.cyclomatic, cov_for_scoring);
118            Some(CrapEntry {
119                file: fc.file,
120                function: fc.name,
121                line: fc.start_line,
122                cyclomatic: fc.cyclomatic,
123                coverage: cov,
124                crap: crap_score,
125                crate_name: None,
126            })
127        })
128        .collect();
129
130    entries.sort_by(|a, b| {
131        b.crap
132            .partial_cmp(&a.crap)
133            .unwrap_or(std::cmp::Ordering::Equal)
134    });
135
136    let mut unmapped_files: Vec<PathBuf> = seen_files
137        .into_iter()
138        .filter(|f| !mapped_files.contains(f))
139        .collect();
140    unmapped_files.sort();
141
142    MergeResult {
143        entries,
144        unmapped_files,
145    }
146}
147
148/// A path lookup index that handles absolute-vs-relative mismatches between
149/// the complexity pass (which has whatever was on the command line) and the
150/// coverage file (which has whatever the coverage tool decided to write).
151struct PathIndex<'a> {
152    /// Canonicalized absolute paths → coverage data. Fast path.
153    by_absolute: HashMap<PathBuf, &'a FileCoverage>,
154    /// Original (possibly relative) paths kept for suffix matching. We keep
155    /// them as `(full_path, coverage)` so we can suffix-compare cheaply.
156    by_relative: Vec<(PathBuf, &'a FileCoverage)>,
157}
158
159impl<'a> PathIndex<'a> {
160    fn build(coverage: &'a HashMap<PathBuf, FileCoverage>) -> Self {
161        let mut by_absolute = HashMap::new();
162        let mut by_relative = Vec::new();
163
164        for (raw_path, cov) in coverage {
165            // CRITICAL: we only canonicalize *absolute* paths here. A relative
166            // path like `src/lib.rs` in an LCOV file means "some file whose
167            // component-suffix is this" — it must NOT be resolved against the
168            // caller's CWD, because the CWD is an accident of invocation.
169            // Early versions of this code called `canonicalize()` unconditionally;
170            // if the CWD happened to contain a matching path, the coverage
171            // entry would silently bind to the wrong file and every real
172            // function would come back as 0% covered. The integration test
173            // `end_to_end_pipeline_produces_ranked_scores` exists specifically
174            // to catch a regression back into that behavior.
175            if raw_path.is_absolute() {
176                match raw_path.canonicalize() {
177                    Ok(abs) => {
178                        by_absolute.insert(abs, cov);
179                    },
180                    Err(_) => {
181                        // Absolute but non-existent (e.g., coverage was
182                        // produced in a container at a different path).
183                        // Fall back to suffix matching.
184                        by_relative.push((raw_path.clone(), cov));
185                    },
186                }
187            } else {
188                by_relative.push((raw_path.clone(), cov));
189            }
190        }
191
192        Self {
193            by_absolute,
194            by_relative,
195        }
196    }
197
198    fn lookup(
199        &self,
200        query: &Path,
201    ) -> Option<&'a FileCoverage> {
202        // Fast path: direct canonical match.
203        if let Ok(abs) = query.canonicalize()
204            && let Some(cov) = self.by_absolute.get(&abs)
205        {
206            return Some(*cov);
207        }
208
209        // Slow path: suffix match. A coverage path `src/foo.rs` matches a
210        // complexity path `.../project/src/foo.rs` if the former is a
211        // component-wise suffix of the latter.
212        for (rel, cov) in &self.by_relative {
213            if path_has_suffix(query, rel) {
214                return Some(*cov);
215            }
216        }
217
218        None
219    }
220}
221
222/// True if `haystack` ends with `needle`, compared component by component.
223///
224/// This is stricter than a byte-level `ends_with`: `foo/bar.rs` must not
225/// match `oofoo/bar.rs`. Cross-platform separators are handled because
226/// `Path::components` normalizes them.
227fn path_has_suffix(
228    haystack: &Path,
229    needle: &Path,
230) -> bool {
231    let hay: Vec<_> = haystack.components().collect();
232    let nee: Vec<_> = needle.components().collect();
233    if nee.len() > hay.len() {
234        return false;
235    }
236    hay[hay.len() - nee.len()..] == nee[..]
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    use std::collections::BTreeMap;
243    use std::path::PathBuf;
244
245    fn cov_with(lines: &[(u32, u64)]) -> FileCoverage {
246        FileCoverage {
247            lines: lines.iter().copied().collect::<BTreeMap<_, _>>(),
248        }
249    }
250
251    #[test]
252    fn suffix_match_works_for_relative_coverage_paths() {
253        // Simulates the realistic case: coverage file was generated with
254        // `cargo llvm-cov` in the workspace root, producing relative paths.
255        let mut cov_map = HashMap::new();
256        cov_map.insert(PathBuf::from("src/foo.rs"), cov_with(&[(10, 1), (11, 1)]));
257        let index = PathIndex::build(&cov_map);
258
259        let complexity_path = PathBuf::from("/home/alice/project/src/foo.rs");
260        let result = index.lookup(&complexity_path);
261        assert!(result.is_some(), "expected suffix match to succeed");
262    }
263
264    #[test]
265    fn suffix_match_rejects_partial_component_matches() {
266        // `oofoo.rs` should NOT match `foo.rs` — that's a byte-level
267        // ends_with bug we're explicitly avoiding.
268        let a = PathBuf::from("/project/src/oofoo.rs");
269        let b = PathBuf::from("foo.rs");
270        assert!(!path_has_suffix(&a, &b));
271    }
272
273    #[test]
274    fn equal_length_paths_match_when_identical() {
275        // Kills: replace > with == and > with >= in the nee.len() > hay.len() guard.
276        // If the guard fired for equal-length paths, identical paths would return false.
277        let a = PathBuf::from("/project/src/foo.rs");
278        let b = PathBuf::from("/project/src/foo.rs");
279        assert!(
280            path_has_suffix(&a, &b),
281            "identical paths must match as a suffix"
282        );
283    }
284
285    #[test]
286    fn longer_needle_does_not_match() {
287        // Needle longer than haystack must always return false.
288        let hay = PathBuf::from("src/foo.rs");
289        let needle = PathBuf::from("/abs/project/src/foo.rs");
290        assert!(!path_has_suffix(&hay, &needle));
291    }
292
293    #[test]
294    fn merge_sorts_by_descending_crap() {
295        let complexity = vec![
296            FunctionComplexity {
297                file: PathBuf::from("a.rs"),
298                name: "easy".into(),
299                start_line: 1,
300                end_line: 3,
301                cyclomatic: 1.0,
302            },
303            FunctionComplexity {
304                file: PathBuf::from("a.rs"),
305                name: "hard".into(),
306                start_line: 10,
307                end_line: 30,
308                cyclomatic: 10.0,
309            },
310        ];
311        let result = merge(
312            complexity,
313            HashMap::new(),
314            MissingCoveragePolicy::Pessimistic,
315        );
316        assert_eq!(result.entries[0].function, "hard");
317        assert_eq!(result.entries[1].function, "easy");
318    }
319
320    #[test]
321    fn skip_policy_drops_rows_without_coverage() {
322        let complexity = vec![FunctionComplexity {
323            file: PathBuf::from("nowhere.rs"),
324            name: "foo".into(),
325            start_line: 1,
326            end_line: 5,
327            cyclomatic: 3.0,
328        }];
329        let result = merge(complexity, HashMap::new(), MissingCoveragePolicy::Skip);
330        assert!(result.entries.is_empty());
331    }
332
333    #[test]
334    fn relative_coverage_paths_are_not_resolved_against_cwd() {
335        // REGRESSION TEST. A relative path in the coverage file must never
336        // be canonicalized against the process's CWD, because that causes a
337        // silent-binding bug: `src/lib.rs` in LCOV would resolve to
338        // `<cwd>/src/lib.rs` (which likely exists — it's the tool's own
339        // source), and then the lookup for a DIFFERENT file ending in
340        // `src/lib.rs` would miss, returning `None` for every function.
341        //
342        // We construct exactly this scenario: a relative coverage path that
343        // happens to match something real under CWD, and a complexity path
344        // that is the "intended" target elsewhere.
345        let mut cov_map = HashMap::new();
346        cov_map.insert(PathBuf::from("src/lib.rs"), cov_with(&[(10, 1)]));
347        let index = PathIndex::build(&cov_map);
348
349        // The relative path must live in `by_relative`, NOT `by_absolute`,
350        // even if a file by that relative name happens to exist under CWD.
351        assert!(
352            index.by_absolute.is_empty(),
353            "relative coverage paths must not populate by_absolute"
354        );
355        assert_eq!(index.by_relative.len(), 1);
356
357        // Lookup for an unrelated absolute path ending in src/lib.rs must
358        // succeed via suffix match.
359        let found = index.lookup(Path::new("/somewhere/else/src/lib.rs"));
360        assert!(found.is_some());
361    }
362
363    #[test]
364    fn unmapped_files_reported_when_lcov_provided() {
365        let mut cov_map = HashMap::new();
366        cov_map.insert(PathBuf::from("src/foo.rs"), cov_with(&[(1, 1)]));
367
368        let complexity = vec![
369            FunctionComplexity {
370                file: PathBuf::from("/project/src/foo.rs"),
371                name: "matched".into(),
372                start_line: 1,
373                end_line: 3,
374                cyclomatic: 1.0,
375            },
376            FunctionComplexity {
377                file: PathBuf::from("/project/src/bar.rs"),
378                name: "unmatched".into(),
379                start_line: 1,
380                end_line: 3,
381                cyclomatic: 1.0,
382            },
383        ];
384
385        let result = merge(complexity, cov_map, MissingCoveragePolicy::Pessimistic);
386        assert_eq!(
387            result.unmapped_files,
388            vec![PathBuf::from("/project/src/bar.rs")]
389        );
390    }
391
392    #[test]
393    fn no_unmapped_files_when_no_lcov_provided() {
394        let complexity = vec![FunctionComplexity {
395            file: PathBuf::from("src/foo.rs"),
396            name: "foo".into(),
397            start_line: 1,
398            end_line: 3,
399            cyclomatic: 1.0,
400        }];
401        let result = merge(
402            complexity,
403            HashMap::new(),
404            MissingCoveragePolicy::Pessimistic,
405        );
406        assert!(
407            result.unmapped_files.is_empty(),
408            "no lcov → no unmapped warnings"
409        );
410    }
411}