Skip to main content

testing_conventions/
patch_coverage.rs

1//! Patch (changed-line) coverage (Python — #132, parent #46).
2//!
3//! Enforces the README Coverage rule's changed-line guarantee: every line a diff
4//! touches must be covered by the unit suite. Where [`crate::coverage`] measures
5//! the *whole* suite against a floor (#26) and the #131 ratchet against a
6//! baseline, this measures only the lines `<base>...HEAD` added or modified —
7//! failing when any changed, executable line is left uncovered.
8//!
9//! Two inputs are combined:
10//!   - the **diff** — [`changed_lines`] runs `git diff --unified=0 <base>...HEAD`
11//!     and returns the new-side line numbers each file gained. This is the diff
12//!     machinery established here, shared by the forthcoming TypeScript / Rust
13//!     twins.
14//!   - the **coverage** — coverage.py's per-file `missing_lines` /
15//!     `missing_branches` ([`crate::coverage::measure_patch_report`]). A changed
16//!     line is uncovered when it is a missing line, or the source of a branch the
17//!     suite never took ([`uncovered_changed_lines`]). Non-executable changed
18//!     lines (comments, blanks) and `coverage`-exempt files have nothing to cover
19//!     and are skipped.
20//!
21//! Relationship to the commit-scoped co-change rule ([`crate::co_change`], #33):
22//! co-change enforces that a changed source and its colocated *test* move
23//! together; patch coverage enforces that the changed *lines* are actually
24//! exercised. They are complementary, not overlapping — co-change can pass (the
25//! test file changed) while patch coverage fails (the change isn't covered), and
26//! vice versa.
27
28use std::collections::{BTreeMap, BTreeSet};
29use std::path::Path;
30use std::process::Command;
31
32use anyhow::{bail, Context, Result};
33
34use crate::coverage::{self, FileCoverage};
35
36/// A changed source line the unit suite doesn't cover — a `root`-relative path
37/// and the 1-based new-side line number.
38#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
39pub struct Uncovered {
40    /// `root`-relative path of the changed file.
41    pub file: String,
42    /// The 1-based new-side line number that isn't covered.
43    pub line: u64,
44}
45
46/// Every line added or modified in `root`'s `<base>...HEAD` diff that the unit
47/// suite doesn't cover, sorted for deterministic output. `omit` is the
48/// `coverage`-rule exemptions (as in [`crate::coverage::measure`]) — an exempt
49/// file is omitted from the run, so its changed lines are lifted.
50///
51/// Scopes to `.py` sources (the Python arm this slice) and returns early — with
52/// no coverage run — when the diff touches none, so a PR that changes only docs or
53/// other languages doesn't pay for a measurement. Requires coverage.py + pytest +
54/// git; an unresolvable `base` surfaces as an error rather than a silent pass.
55pub fn check(root: &Path, base: &str, omit: &[String]) -> Result<Vec<Uncovered>> {
56    let mut changed = changed_lines(root, base)?;
57    changed.retain(|path, _| path.ends_with(".py"));
58    if changed.is_empty() {
59        return Ok(Vec::new());
60    }
61    let report = coverage::measure_patch_report(root, omit)?;
62    let files = relative_keys(report.files, root);
63    Ok(uncovered_changed_lines(&changed, &files))
64}
65
66/// The new-side lines each file gained in `repo`'s `<base>...HEAD` diff, keyed by
67/// `repo`-relative path. The diff machinery shared by the TS / Rust twins.
68///
69/// `<base>...HEAD` is the merge-base diff — the changes this branch introduced
70/// (what a PR shows). `--unified=0` drops context lines so every `+` line is a
71/// real addition; `--no-renames` keeps a rename a delete + an add (the added side
72/// is held to coverage); `--relative` reports paths relative to `repo`. Returns an
73/// error if `git diff` fails (e.g. `base` names no resolvable ref).
74pub fn changed_lines(repo: &Path, base: &str) -> Result<BTreeMap<String, BTreeSet<u64>>> {
75    let range = format!("{base}...HEAD");
76    let output = Command::new("git")
77        .current_dir(repo)
78        .args([
79            "diff",
80            "--no-color",
81            "--no-renames",
82            "--unified=0",
83            "--relative",
84            &range,
85        ])
86        .output()
87        .with_context(|| format!("running `git diff` in `{}`", repo.display()))?;
88    if !output.status.success() {
89        bail!(
90            "`git diff {range}` failed in `{}`: {}",
91            repo.display(),
92            String::from_utf8_lossy(&output.stderr).trim()
93        );
94    }
95    Ok(parse_unified_diff(&String::from_utf8_lossy(&output.stdout)))
96}
97
98/// Pure: parse `git diff --unified=0` output into the new-side lines each file
99/// gained. Tracks the current file from each `+++` header and the new-side line
100/// counter from each `@@ … +c,d @@` hunk header, then records every following `+`
101/// line (a deletion `-` consumes no new-side number). A deleted file
102/// (`+++ /dev/null`) yields no entry.
103fn parse_unified_diff(diff: &str) -> BTreeMap<String, BTreeSet<u64>> {
104    let mut changed: BTreeMap<String, BTreeSet<u64>> = BTreeMap::new();
105    let mut current: Option<String> = None;
106    let mut next_line: u64 = 0;
107    for line in diff.lines() {
108        if let Some(header) = line.strip_prefix("+++ ") {
109            current = new_side_path(header);
110        } else if line.starts_with("@@") {
111            if let Some(start) = hunk_new_start(line) {
112                next_line = start;
113            }
114        } else if line.starts_with('+') {
115            // An added new-side line — the `+++` header is handled above, so this
116            // is diff body. Record it against the current file and advance.
117            if let Some(file) = &current {
118                changed.entry(file.clone()).or_default().insert(next_line);
119            }
120            next_line += 1;
121        }
122        // `-` (deleted) and metadata lines consume no new-side line and are skipped.
123    }
124    changed
125}
126
127/// The `repo`-relative new-side path from a `+++` diff header, or `None` for a
128/// deletion (`+++ /dev/null`). Strips git's `b/` prefix and a trailing tab.
129fn new_side_path(header: &str) -> Option<String> {
130    let path = header
131        .split('\t')
132        .next()
133        .unwrap_or(header)
134        .trim_end_matches('\r');
135    if path == "/dev/null" {
136        return None;
137    }
138    let path = path.strip_prefix("b/").unwrap_or(path);
139    Some(path.replace('\\', "/"))
140}
141
142/// The new-side start line from a hunk header `@@ -a,b +c,d @@ …` — the `c`. With
143/// `--unified=0` the added lines that follow are numbered consecutively from it.
144fn hunk_new_start(header: &str) -> Option<u64> {
145    let plus = header.split_whitespace().find(|t| t.starts_with('+'))?;
146    let digits = plus.trim_start_matches('+');
147    digits.split(',').next().unwrap_or(digits).parse().ok()
148}
149
150/// Pure: every changed line the coverage report marks uncovered — a `missing_line`,
151/// or the source of a `missing_branch` (a branch out of the line the suite never
152/// took). A changed file absent from `files` was not measured (a test file, or a
153/// `coverage`-exempt file omitted from the run) and contributes nothing; a changed
154/// line that is neither missing nor a branch source (a comment or blank) has
155/// nothing to cover. `files` is keyed by `root`-relative path, as `changed` is.
156pub fn uncovered_changed_lines(
157    changed: &BTreeMap<String, BTreeSet<u64>>,
158    files: &BTreeMap<String, FileCoverage>,
159) -> Vec<Uncovered> {
160    let mut uncovered = Vec::new();
161    for (file, lines) in changed {
162        let Some(coverage) = files.get(file) else {
163            continue;
164        };
165        let missing: BTreeSet<u64> = coverage.missing_lines.iter().copied().collect();
166        // The source line of each branch never taken (the first of the
167        // `[src, dst]` pair; `dst` may be negative — an exit — but `src` is a real
168        // line, so a negative drops out via `try_from`).
169        let branch_sources: BTreeSet<u64> = coverage
170            .missing_branches
171            .iter()
172            .filter_map(|pair| pair.first().copied())
173            .filter_map(|src| u64::try_from(src).ok())
174            .collect();
175        for &line in lines {
176            if missing.contains(&line) || branch_sources.contains(&line) {
177                uncovered.push(Uncovered {
178                    file: file.clone(),
179                    line,
180                });
181            }
182        }
183    }
184    uncovered.sort();
185    uncovered
186}
187
188/// Re-key a report's `files` to `root`-relative `/`-joined paths so they match the
189/// diff's paths. coverage.py reports paths relative to where it ran (here `root`),
190/// but an absolute path is stripped to `root` defensively.
191fn relative_keys(
192    files: BTreeMap<String, FileCoverage>,
193    root: &Path,
194) -> BTreeMap<String, FileCoverage> {
195    files
196        .into_iter()
197        .map(|(key, value)| {
198            let path = Path::new(&key);
199            let rel = path
200                .strip_prefix(root)
201                .unwrap_or(path)
202                .to_string_lossy()
203                .replace('\\', "/");
204            (rel, value)
205        })
206        .collect()
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    fn changed(entries: &[(&str, &[u64])]) -> BTreeMap<String, BTreeSet<u64>> {
214        entries
215            .iter()
216            .map(|(path, lines)| (path.to_string(), lines.iter().copied().collect()))
217            .collect()
218    }
219
220    fn file_coverage(missing_lines: &[u64], missing_branches: &[[i64; 2]]) -> FileCoverage {
221        FileCoverage {
222            executed_lines: Vec::new(),
223            missing_lines: missing_lines.to_vec(),
224            excluded_lines: Vec::new(),
225            missing_branches: missing_branches.iter().map(|b| b.to_vec()).collect(),
226        }
227    }
228
229    // ---- parse_unified_diff --------------------------------------------------
230
231    #[test]
232    fn parses_added_lines_from_a_hunk() {
233        // `+4,2` → two added lines numbered from 4; the function context after the
234        // second `@@` is ignored.
235        let diff = "diff --git a/widget.py b/widget.py\n\
236                    index abc..def 100644\n\
237                    --- a/widget.py\n\
238                    +++ b/widget.py\n\
239                    @@ -3,0 +4,2 @@ def f(x):\n\
240                    +    if x == 99:\n\
241                    +        return 7\n";
242        assert_eq!(parse_unified_diff(diff), changed(&[("widget.py", &[4, 5])]));
243    }
244
245    #[test]
246    fn parses_a_new_file_as_added_from_line_one() {
247        let diff = "diff --git a/lonely.py b/lonely.py\n\
248                    new file mode 100644\n\
249                    index 0000000..bbb\n\
250                    --- /dev/null\n\
251                    +++ b/lonely.py\n\
252                    @@ -0,0 +1,2 @@\n\
253                    +def lonely():\n\
254                    +    return 41\n";
255        assert_eq!(parse_unified_diff(diff), changed(&[("lonely.py", &[1, 2])]));
256    }
257
258    #[test]
259    fn a_deletion_only_hunk_records_no_added_lines() {
260        // `+3,0` adds nothing; the `-` lines consume no new-side number.
261        let diff = "diff --git a/widget.py b/widget.py\n\
262                    index abc..def 100644\n\
263                    --- a/widget.py\n\
264                    +++ b/widget.py\n\
265                    @@ -4,2 +3,0 @@ def f(x):\n\
266                    -    dead = 1\n\
267                    -    return dead\n";
268        assert!(parse_unified_diff(diff).is_empty());
269    }
270
271    #[test]
272    fn a_deleted_file_yields_no_entry() {
273        let diff = "diff --git a/gone.py b/gone.py\n\
274                    deleted file mode 100644\n\
275                    index abc..0000000\n\
276                    --- a/gone.py\n\
277                    +++ /dev/null\n\
278                    @@ -1,2 +0,0 @@\n\
279                    -def gone():\n\
280                    -    return 0\n";
281        assert!(parse_unified_diff(diff).is_empty());
282    }
283
284    #[test]
285    fn parses_multiple_files_and_a_single_line_hunk() {
286        // `+2` (no count) is one line at line 2; a nested path is kept verbatim.
287        let diff = "diff --git a/a.py b/a.py\n\
288                    --- a/a.py\n\
289                    +++ b/a.py\n\
290                    @@ -1,0 +2 @@ def a():\n\
291                    +    x = 1\n\
292                    diff --git a/pkg/b.py b/pkg/b.py\n\
293                    --- a/pkg/b.py\n\
294                    +++ b/pkg/b.py\n\
295                    @@ -10,0 +11,1 @@\n\
296                    +    y = 2\n";
297        assert_eq!(
298            parse_unified_diff(diff),
299            changed(&[("a.py", &[2]), ("pkg/b.py", &[11])])
300        );
301    }
302
303    // ---- uncovered_changed_lines --------------------------------------------
304
305    #[test]
306    fn a_missing_changed_line_is_uncovered() {
307        let out = uncovered_changed_lines(
308            &changed(&[("widget.py", &[5])]),
309            &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[]))]),
310        );
311        assert_eq!(
312            out,
313            vec![Uncovered {
314                file: "widget.py".to_string(),
315                line: 5
316            }]
317        );
318    }
319
320    #[test]
321    fn a_covered_changed_line_is_not_reported() {
322        // Line 3 changed but it's neither missing nor a branch source → covered.
323        let out = uncovered_changed_lines(
324            &changed(&[("widget.py", &[3])]),
325            &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[[4, 5]]))]),
326        );
327        assert!(out.is_empty());
328    }
329
330    #[test]
331    fn a_changed_branch_source_is_uncovered() {
332        // Line 4 is executed (not a missing line) but a branch out of it was never
333        // taken (`[4, 5]`), so a change to line 4 is still uncovered.
334        let out = uncovered_changed_lines(
335            &changed(&[("widget.py", &[4])]),
336            &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[[4, 5]]))]),
337        );
338        assert_eq!(
339            out,
340            vec![Uncovered {
341                file: "widget.py".to_string(),
342                line: 4
343            }]
344        );
345    }
346
347    #[test]
348    fn a_negative_branch_dest_is_ignored() {
349        // `[6, -1]` is a branch to a function exit; the source line 6 is what
350        // matters, and a change to line 6 is uncovered.
351        let out = uncovered_changed_lines(
352            &changed(&[("widget.py", &[6])]),
353            &BTreeMap::from([("widget.py".to_string(), file_coverage(&[], &[[6, -1]]))]),
354        );
355        assert_eq!(
356            out,
357            vec![Uncovered {
358                file: "widget.py".to_string(),
359                line: 6
360            }]
361        );
362    }
363
364    #[test]
365    fn a_changed_file_absent_from_coverage_is_skipped() {
366        // A test file (omitted from the run) never appears in the report, so its
367        // changed lines contribute nothing rather than panicking on a lookup.
368        let out = uncovered_changed_lines(
369            &changed(&[("widget_test.py", &[1, 2])]),
370            &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[]))]),
371        );
372        assert!(out.is_empty());
373    }
374
375    #[test]
376    fn reports_are_sorted_across_files_and_lines() {
377        let out = uncovered_changed_lines(
378            &changed(&[("z.py", &[2, 1]), ("a.py", &[9])]),
379            &BTreeMap::from([
380                ("z.py".to_string(), file_coverage(&[1, 2], &[])),
381                ("a.py".to_string(), file_coverage(&[9], &[])),
382            ]),
383        );
384        assert_eq!(
385            out,
386            vec![
387                Uncovered {
388                    file: "a.py".to_string(),
389                    line: 9
390                },
391                Uncovered {
392                    file: "z.py".to_string(),
393                    line: 1
394                },
395                Uncovered {
396                    file: "z.py".to_string(),
397                    line: 2
398                },
399            ]
400        );
401    }
402}