cargo-crap 0.0.2

Change Risk Anti-Patterns (CRAP) metric for Rust projects
Documentation
//! Join complexity data (per-function) with coverage data (per-file) into
//! CRAP entries.
//!
//! ## The path-matching problem
//!
//! This is where the silent failure mode lives. The complexity pass gives
//! us absolute paths (whatever was passed to `analyze_tree`). LCOV files
//! can contain:
//!
//! 1. **Absolute paths**  — `/home/alice/project/src/foo.rs`
//! 2. **Workspace-relative paths** — `src/foo.rs`
//! 3. **Crate-relative paths in a workspace** — `crates/core/src/foo.rs`
//! 4. **Paths with `./` or `../` components** — `./src/foo.rs`
//!
//! `cargo llvm-cov` by default emits workspace-relative paths. `cargo tarpaulin`
//! emits absolute paths. CI systems with symlinked or containerized
//! checkouts mix both. A naïve `HashMap<PathBuf, _>` lookup will silently
//! return `None` for 100% of files and report every function as "0%
//! covered" — which is exactly the class of bug where a green CI suddenly
//! starts red-lining a whole codebase.
//!
//! Our strategy: build a lookup keyed on **canonicalized suffix matches**.
//! For every coverage path we can't canonicalize (because it's relative),
//! we try progressively shorter suffixes against canonical complexity paths.

use crate::complexity::FunctionComplexity;
use crate::coverage::FileCoverage;
use crate::score::crap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};

/// One row in the final report.
#[derive(Debug, Clone, Serialize, serde::Deserialize)]
pub struct CrapEntry {
    pub file: PathBuf,
    pub function: String,
    pub line: usize,
    pub cyclomatic: f64,
    /// Percentage; may be `None` if we could not find coverage data for
    /// this file at all. That's different from "0% covered" — it means the
    /// coverage report didn't mention the file.
    pub coverage: Option<f64>,
    pub crap: f64,
}

/// How to treat functions we have complexity data for but no coverage data.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum MissingCoveragePolicy {
    /// Assume 0% coverage. Pessimistic — good for CI gates, where unmapped
    /// files are a red flag worth surfacing.
    Pessimistic,
    /// Assume 100% coverage. Optimistic — suitable for interactive use where
    /// you've scoped coverage to a subset of the tree intentionally.
    Optimistic,
    /// Skip the function entirely; don't emit a row.
    Skip,
}

/// Merge complexity and coverage data into a sorted `Vec<CrapEntry>`
/// (highest score first).
pub fn merge(
    complexity: Vec<FunctionComplexity>,
    coverage: HashMap<PathBuf, FileCoverage>,
    policy: MissingCoveragePolicy,
) -> Vec<CrapEntry> {
    let index = PathIndex::build(&coverage);

    let mut entries: Vec<CrapEntry> = complexity
        .into_iter()
        .filter_map(|fc| {
            let cov = index
                .lookup(&fc.file)
                .map(|cov_file| cov_file.coverage_in_span(fc.start_line, fc.end_line));

            let cov_for_scoring = match (cov, policy) {
                (Some(c), _) => c,
                (None, MissingCoveragePolicy::Pessimistic) => 0.0,
                (None, MissingCoveragePolicy::Optimistic) => 100.0,
                (None, MissingCoveragePolicy::Skip) => return None,
            };

            let crap_score = crap(fc.cyclomatic, cov_for_scoring);
            Some(CrapEntry {
                file: fc.file,
                function: fc.name,
                line: fc.start_line,
                cyclomatic: fc.cyclomatic,
                coverage: cov,
                crap: crap_score,
            })
        })
        .collect();

    entries.sort_by(|a, b| {
        b.crap
            .partial_cmp(&a.crap)
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    entries
}

/// A path lookup index that handles absolute-vs-relative mismatches between
/// the complexity pass (which has whatever was on the command line) and the
/// coverage file (which has whatever the coverage tool decided to write).
struct PathIndex<'a> {
    /// Canonicalized absolute paths → coverage data. Fast path.
    by_absolute: HashMap<PathBuf, &'a FileCoverage>,
    /// Original (possibly relative) paths kept for suffix matching. We keep
    /// them as `(full_path, coverage)` so we can suffix-compare cheaply.
    by_relative: Vec<(PathBuf, &'a FileCoverage)>,
}

impl<'a> PathIndex<'a> {
    fn build(coverage: &'a HashMap<PathBuf, FileCoverage>) -> Self {
        let mut by_absolute = HashMap::new();
        let mut by_relative = Vec::new();

        for (raw_path, cov) in coverage {
            // CRITICAL: we only canonicalize *absolute* paths here. A relative
            // path like `src/lib.rs` in an LCOV file means "some file whose
            // component-suffix is this" — it must NOT be resolved against the
            // caller's CWD, because the CWD is an accident of invocation.
            // Early versions of this code called `canonicalize()` unconditionally;
            // if the CWD happened to contain a matching path, the coverage
            // entry would silently bind to the wrong file and every real
            // function would come back as 0% covered. The integration test
            // `end_to_end_pipeline_produces_ranked_scores` exists specifically
            // to catch a regression back into that behavior.
            if raw_path.is_absolute() {
                match raw_path.canonicalize() {
                    Ok(abs) => {
                        by_absolute.insert(abs, cov);
                    },
                    Err(_) => {
                        // Absolute but non-existent (e.g., coverage was
                        // produced in a container at a different path).
                        // Fall back to suffix matching.
                        by_relative.push((raw_path.clone(), cov));
                    },
                }
            } else {
                by_relative.push((raw_path.clone(), cov));
            }
        }

        Self {
            by_absolute,
            by_relative,
        }
    }

    fn lookup(
        &self,
        query: &Path,
    ) -> Option<&'a FileCoverage> {
        // Fast path: direct canonical match.
        if let Ok(abs) = query.canonicalize()
            && let Some(cov) = self.by_absolute.get(&abs)
        {
            return Some(*cov);
        }

        // Slow path: suffix match. A coverage path `src/foo.rs` matches a
        // complexity path `.../project/src/foo.rs` if the former is a
        // component-wise suffix of the latter.
        for (rel, cov) in &self.by_relative {
            if path_has_suffix(query, rel) {
                return Some(*cov);
            }
        }

        None
    }
}

/// True if `haystack` ends with `needle`, compared component by component.
///
/// This is stricter than a byte-level `ends_with`: `foo/bar.rs` must not
/// match `oofoo/bar.rs`. Cross-platform separators are handled because
/// `Path::components` normalizes them.
fn path_has_suffix(
    haystack: &Path,
    needle: &Path,
) -> bool {
    let hay: Vec<_> = haystack.components().collect();
    let nee: Vec<_> = needle.components().collect();
    if nee.len() > hay.len() {
        return false;
    }
    hay[hay.len() - nee.len()..] == nee[..]
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::BTreeMap;
    use std::path::PathBuf;

    fn cov_with(lines: &[(u32, u64)]) -> FileCoverage {
        FileCoverage {
            lines: lines.iter().copied().collect::<BTreeMap<_, _>>(),
        }
    }

    #[test]
    fn suffix_match_works_for_relative_coverage_paths() {
        // Simulates the realistic case: coverage file was generated with
        // `cargo llvm-cov` in the workspace root, producing relative paths.
        let mut cov_map = HashMap::new();
        cov_map.insert(PathBuf::from("src/foo.rs"), cov_with(&[(10, 1), (11, 1)]));
        let index = PathIndex::build(&cov_map);

        let complexity_path = PathBuf::from("/home/alice/project/src/foo.rs");
        let result = index.lookup(&complexity_path);
        assert!(result.is_some(), "expected suffix match to succeed");
    }

    #[test]
    fn suffix_match_rejects_partial_component_matches() {
        // `oofoo.rs` should NOT match `foo.rs` — that's a byte-level
        // ends_with bug we're explicitly avoiding.
        let a = PathBuf::from("/project/src/oofoo.rs");
        let b = PathBuf::from("foo.rs");
        assert!(!path_has_suffix(&a, &b));
    }

    #[test]
    fn equal_length_paths_match_when_identical() {
        // Kills: replace > with == and > with >= in the nee.len() > hay.len() guard.
        // If the guard fired for equal-length paths, identical paths would return false.
        let a = PathBuf::from("/project/src/foo.rs");
        let b = PathBuf::from("/project/src/foo.rs");
        assert!(
            path_has_suffix(&a, &b),
            "identical paths must match as a suffix"
        );
    }

    #[test]
    fn longer_needle_does_not_match() {
        // Needle longer than haystack must always return false.
        let hay = PathBuf::from("src/foo.rs");
        let needle = PathBuf::from("/abs/project/src/foo.rs");
        assert!(!path_has_suffix(&hay, &needle));
    }

    #[test]
    fn merge_sorts_by_descending_crap() {
        let complexity = vec![
            FunctionComplexity {
                file: PathBuf::from("a.rs"),
                name: "easy".into(),
                start_line: 1,
                end_line: 3,
                cyclomatic: 1.0,
            },
            FunctionComplexity {
                file: PathBuf::from("a.rs"),
                name: "hard".into(),
                start_line: 10,
                end_line: 30,
                cyclomatic: 10.0,
            },
        ];
        let entries = merge(
            complexity,
            HashMap::new(),
            MissingCoveragePolicy::Pessimistic,
        );
        assert_eq!(entries[0].function, "hard");
        assert_eq!(entries[1].function, "easy");
    }

    #[test]
    fn skip_policy_drops_rows_without_coverage() {
        let complexity = vec![FunctionComplexity {
            file: PathBuf::from("nowhere.rs"),
            name: "foo".into(),
            start_line: 1,
            end_line: 5,
            cyclomatic: 3.0,
        }];
        let entries = merge(complexity, HashMap::new(), MissingCoveragePolicy::Skip);
        assert!(entries.is_empty());
    }

    #[test]
    fn relative_coverage_paths_are_not_resolved_against_cwd() {
        // REGRESSION TEST. A relative path in the coverage file must never
        // be canonicalized against the process's CWD, because that causes a
        // silent-binding bug: `src/lib.rs` in LCOV would resolve to
        // `<cwd>/src/lib.rs` (which likely exists — it's the tool's own
        // source), and then the lookup for a DIFFERENT file ending in
        // `src/lib.rs` would miss, returning `None` for every function.
        //
        // We construct exactly this scenario: a relative coverage path that
        // happens to match something real under CWD, and a complexity path
        // that is the "intended" target elsewhere.
        let mut cov_map = HashMap::new();
        cov_map.insert(PathBuf::from("src/lib.rs"), cov_with(&[(10, 1)]));
        let index = PathIndex::build(&cov_map);

        // The relative path must live in `by_relative`, NOT `by_absolute`,
        // even if a file by that relative name happens to exist under CWD.
        assert!(
            index.by_absolute.is_empty(),
            "relative coverage paths must not populate by_absolute"
        );
        assert_eq!(index.by_relative.len(), 1);

        // Lookup for an unrelated absolute path ending in src/lib.rs must
        // succeed via suffix match.
        let found = index.lookup(Path::new("/somewhere/else/src/lib.rs"));
        assert!(found.is_some());
    }
}