testing_conventions/patch_coverage.rs
1//! Patch (changed-line) coverage (Python — #132, parent #46).
2//!
3//! Enforces the README Coverage rule's changed-line guarantee: every line a diff
4//! touches must be covered by the unit suite. Where [`crate::coverage`] measures
5//! the *whole* suite against a floor (#26) and the #131 ratchet against a
6//! baseline, this measures only the lines `<base>...HEAD` added or modified —
7//! failing when any changed, executable line is left uncovered.
8//!
9//! Two inputs are combined:
10//! - the **diff** — [`changed_lines`] runs `git diff --unified=0 <base>...HEAD`
11//! and returns the new-side line numbers each file gained. This is the diff
12//! machinery established here, shared by the forthcoming TypeScript / Rust
13//! twins.
14//! - the **coverage** — coverage.py's per-file `missing_lines` /
15//! `missing_branches` ([`crate::coverage::measure_patch_report`]). A changed
16//! line is uncovered when it is a missing line, or the source of a branch the
17//! suite never took ([`uncovered_changed_lines`]). Non-executable changed
18//! lines (comments, blanks) and `coverage`-exempt files have nothing to cover
19//! and are skipped.
20//!
21//! Relationship to the commit-scoped co-change rule ([`crate::co_change`], #33):
22//! co-change enforces that a changed source and its colocated *test* move
23//! together; patch coverage enforces that the changed *lines* are actually
24//! exercised. They are complementary, not overlapping — co-change can pass (the
25//! test file changed) while patch coverage fails (the change isn't covered), and
26//! vice versa.
27
28use std::collections::{BTreeMap, BTreeSet};
29use std::path::Path;
30use std::process::Command;
31
32use anyhow::{bail, Context, Result};
33
34use crate::coverage::{self, FileCoverage};
35
36/// A changed source line the unit suite doesn't cover — a `root`-relative path
37/// and the 1-based new-side line number.
38#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
39pub struct Uncovered {
40 /// `root`-relative path of the changed file.
41 pub file: String,
42 /// The 1-based new-side line number that isn't covered.
43 pub line: u64,
44}
45
46/// Every line added or modified in `root`'s `<base>...HEAD` diff that the unit
47/// suite doesn't cover, sorted for deterministic output. `omit` is the
48/// `coverage`-rule exemptions (as in [`crate::coverage::measure`]) — an exempt
49/// file is omitted from the run, so its changed lines are lifted.
50///
51/// Scopes to `.py` sources (the Python arm this slice) and returns early — with
52/// no coverage run — when the diff touches none, so a PR that changes only docs or
53/// other languages doesn't pay for a measurement. Requires coverage.py + pytest +
54/// git; an unresolvable `base` surfaces as an error rather than a silent pass.
55pub fn check(root: &Path, base: &str, omit: &[String]) -> Result<Vec<Uncovered>> {
56 let mut changed = changed_lines(root, base)?;
57 changed.retain(|path, _| path.ends_with(".py"));
58 if changed.is_empty() {
59 return Ok(Vec::new());
60 }
61 let report = coverage::measure_patch_report(root, omit)?;
62 let files = relative_keys(report.files, root);
63 Ok(uncovered_changed_lines(&changed, &files))
64}
65
66/// The new-side lines each file gained in `repo`'s `<base>...HEAD` diff, keyed by
67/// `repo`-relative path. The diff machinery shared by the TS / Rust twins.
68///
69/// `<base>...HEAD` is the merge-base diff — the changes this branch introduced
70/// (what a PR shows). `--unified=0` drops context lines so every `+` line is a
71/// real addition; `--no-renames` keeps a rename a delete + an add (the added side
72/// is held to coverage); `--relative` reports paths relative to `repo`. Returns an
73/// error if `git diff` fails (e.g. `base` names no resolvable ref).
74pub fn changed_lines(repo: &Path, base: &str) -> Result<BTreeMap<String, BTreeSet<u64>>> {
75 let range = format!("{base}...HEAD");
76 let output = Command::new("git")
77 .current_dir(repo)
78 .args([
79 "diff",
80 "--no-color",
81 "--no-renames",
82 "--unified=0",
83 "--relative",
84 &range,
85 ])
86 .output()
87 .with_context(|| format!("running `git diff` in `{}`", repo.display()))?;
88 if !output.status.success() {
89 bail!(
90 "`git diff {range}` failed in `{}`: {}",
91 repo.display(),
92 String::from_utf8_lossy(&output.stderr).trim()
93 );
94 }
95 Ok(parse_unified_diff(&String::from_utf8_lossy(&output.stdout)))
96}
97
98/// Pure: parse `git diff --unified=0` output into the new-side lines each file
99/// gained. Tracks the current file from each `+++` header and the new-side line
100/// counter from each `@@ … +c,d @@` hunk header, then records every following `+`
101/// line (a deletion `-` consumes no new-side number). A deleted file
102/// (`+++ /dev/null`) yields no entry.
103fn parse_unified_diff(diff: &str) -> BTreeMap<String, BTreeSet<u64>> {
104 let mut changed: BTreeMap<String, BTreeSet<u64>> = BTreeMap::new();
105 let mut current: Option<String> = None;
106 let mut next_line: u64 = 0;
107 for line in diff.lines() {
108 if let Some(header) = line.strip_prefix("+++ ") {
109 current = new_side_path(header);
110 } else if line.starts_with("@@") {
111 if let Some(start) = hunk_new_start(line) {
112 next_line = start;
113 }
114 } else if line.starts_with('+') {
115 // An added new-side line — the `+++` header is handled above, so this
116 // is diff body. Record it against the current file and advance.
117 if let Some(file) = ¤t {
118 changed.entry(file.clone()).or_default().insert(next_line);
119 }
120 next_line += 1;
121 }
122 // `-` (deleted) and metadata lines consume no new-side line and are skipped.
123 }
124 changed
125}
126
127/// The `repo`-relative new-side path from a `+++` diff header, or `None` for a
128/// deletion (`+++ /dev/null`). Strips git's `b/` prefix and a trailing tab.
129fn new_side_path(header: &str) -> Option<String> {
130 let path = header
131 .split('\t')
132 .next()
133 .unwrap_or(header)
134 .trim_end_matches('\r');
135 if path == "/dev/null" {
136 return None;
137 }
138 let path = path.strip_prefix("b/").unwrap_or(path);
139 Some(path.replace('\\', "/"))
140}
141
142/// The new-side start line from a hunk header `@@ -a,b +c,d @@ …` — the `c`. With
143/// `--unified=0` the added lines that follow are numbered consecutively from it.
144fn hunk_new_start(header: &str) -> Option<u64> {
145 let plus = header.split_whitespace().find(|t| t.starts_with('+'))?;
146 let digits = plus.trim_start_matches('+');
147 digits.split(',').next().unwrap_or(digits).parse().ok()
148}
149
150/// Pure: every changed line the coverage report marks uncovered — a `missing_line`,
151/// or the source of a `missing_branch` (a branch out of the line the suite never
152/// took). A changed file absent from `files` was not measured (a test file, or a
153/// `coverage`-exempt file omitted from the run) and contributes nothing; a changed
154/// line that is neither missing nor a branch source (a comment or blank) has
155/// nothing to cover. `files` is keyed by `root`-relative path, as `changed` is.
156pub fn uncovered_changed_lines(
157 changed: &BTreeMap<String, BTreeSet<u64>>,
158 files: &BTreeMap<String, FileCoverage>,
159) -> Vec<Uncovered> {
160 let mut uncovered = Vec::new();
161 for (file, lines) in changed {
162 let Some(coverage) = files.get(file) else {
163 continue;
164 };
165 let missing: BTreeSet<u64> = coverage.missing_lines.iter().copied().collect();
166 // The source line of each branch never taken (the first of the
167 // `[src, dst]` pair; `dst` may be negative — an exit — but `src` is a real
168 // line, so a negative drops out via `try_from`).
169 let branch_sources: BTreeSet<u64> = coverage
170 .missing_branches
171 .iter()
172 .filter_map(|pair| pair.first().copied())
173 .filter_map(|src| u64::try_from(src).ok())
174 .collect();
175 for &line in lines {
176 if missing.contains(&line) || branch_sources.contains(&line) {
177 uncovered.push(Uncovered {
178 file: file.clone(),
179 line,
180 });
181 }
182 }
183 }
184 uncovered.sort();
185 uncovered
186}
187
188/// Re-key a report's `files` to `root`-relative `/`-joined paths so they match the
189/// diff's paths. coverage.py reports paths relative to where it ran (here `root`),
190/// but an absolute path is stripped to `root` defensively.
191fn relative_keys(
192 files: BTreeMap<String, FileCoverage>,
193 root: &Path,
194) -> BTreeMap<String, FileCoverage> {
195 files
196 .into_iter()
197 .map(|(key, value)| {
198 let path = Path::new(&key);
199 let rel = path
200 .strip_prefix(root)
201 .unwrap_or(path)
202 .to_string_lossy()
203 .replace('\\', "/");
204 (rel, value)
205 })
206 .collect()
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212
213 fn changed(entries: &[(&str, &[u64])]) -> BTreeMap<String, BTreeSet<u64>> {
214 entries
215 .iter()
216 .map(|(path, lines)| (path.to_string(), lines.iter().copied().collect()))
217 .collect()
218 }
219
220 fn file_coverage(missing_lines: &[u64], missing_branches: &[[i64; 2]]) -> FileCoverage {
221 FileCoverage {
222 executed_lines: Vec::new(),
223 missing_lines: missing_lines.to_vec(),
224 excluded_lines: Vec::new(),
225 missing_branches: missing_branches.iter().map(|b| b.to_vec()).collect(),
226 }
227 }
228
229 // ---- parse_unified_diff --------------------------------------------------
230
231 #[test]
232 fn parses_added_lines_from_a_hunk() {
233 // `+4,2` → two added lines numbered from 4; the function context after the
234 // second `@@` is ignored.
235 let diff = "diff --git a/widget.py b/widget.py\n\
236 index abc..def 100644\n\
237 --- a/widget.py\n\
238 +++ b/widget.py\n\
239 @@ -3,0 +4,2 @@ def f(x):\n\
240 + if x == 99:\n\
241 + return 7\n";
242 assert_eq!(parse_unified_diff(diff), changed(&[("widget.py", &[4, 5])]));
243 }
244
245 #[test]
246 fn parses_a_new_file_as_added_from_line_one() {
247 let diff = "diff --git a/lonely.py b/lonely.py\n\
248 new file mode 100644\n\
249 index 0000000..bbb\n\
250 --- /dev/null\n\
251 +++ b/lonely.py\n\
252 @@ -0,0 +1,2 @@\n\
253 +def lonely():\n\
254 + return 41\n";
255 assert_eq!(parse_unified_diff(diff), changed(&[("lonely.py", &[1, 2])]));
256 }
257
258 #[test]
259 fn a_deletion_only_hunk_records_no_added_lines() {
260 // `+3,0` adds nothing; the `-` lines consume no new-side number.
261 let diff = "diff --git a/widget.py b/widget.py\n\
262 index abc..def 100644\n\
263 --- a/widget.py\n\
264 +++ b/widget.py\n\
265 @@ -4,2 +3,0 @@ def f(x):\n\
266 - dead = 1\n\
267 - return dead\n";
268 assert!(parse_unified_diff(diff).is_empty());
269 }
270
271 #[test]
272 fn a_deleted_file_yields_no_entry() {
273 let diff = "diff --git a/gone.py b/gone.py\n\
274 deleted file mode 100644\n\
275 index abc..0000000\n\
276 --- a/gone.py\n\
277 +++ /dev/null\n\
278 @@ -1,2 +0,0 @@\n\
279 -def gone():\n\
280 - return 0\n";
281 assert!(parse_unified_diff(diff).is_empty());
282 }
283
284 #[test]
285 fn parses_multiple_files_and_a_single_line_hunk() {
286 // `+2` (no count) is one line at line 2; a nested path is kept verbatim.
287 let diff = "diff --git a/a.py b/a.py\n\
288 --- a/a.py\n\
289 +++ b/a.py\n\
290 @@ -1,0 +2 @@ def a():\n\
291 + x = 1\n\
292 diff --git a/pkg/b.py b/pkg/b.py\n\
293 --- a/pkg/b.py\n\
294 +++ b/pkg/b.py\n\
295 @@ -10,0 +11,1 @@\n\
296 + y = 2\n";
297 assert_eq!(
298 parse_unified_diff(diff),
299 changed(&[("a.py", &[2]), ("pkg/b.py", &[11])])
300 );
301 }
302
303 // ---- uncovered_changed_lines --------------------------------------------
304
305 #[test]
306 fn a_missing_changed_line_is_uncovered() {
307 let out = uncovered_changed_lines(
308 &changed(&[("widget.py", &[5])]),
309 &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[]))]),
310 );
311 assert_eq!(
312 out,
313 vec![Uncovered {
314 file: "widget.py".to_string(),
315 line: 5
316 }]
317 );
318 }
319
320 #[test]
321 fn a_covered_changed_line_is_not_reported() {
322 // Line 3 changed but it's neither missing nor a branch source → covered.
323 let out = uncovered_changed_lines(
324 &changed(&[("widget.py", &[3])]),
325 &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[[4, 5]]))]),
326 );
327 assert!(out.is_empty());
328 }
329
330 #[test]
331 fn a_changed_branch_source_is_uncovered() {
332 // Line 4 is executed (not a missing line) but a branch out of it was never
333 // taken (`[4, 5]`), so a change to line 4 is still uncovered.
334 let out = uncovered_changed_lines(
335 &changed(&[("widget.py", &[4])]),
336 &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[[4, 5]]))]),
337 );
338 assert_eq!(
339 out,
340 vec![Uncovered {
341 file: "widget.py".to_string(),
342 line: 4
343 }]
344 );
345 }
346
347 #[test]
348 fn a_negative_branch_dest_is_ignored() {
349 // `[6, -1]` is a branch to a function exit; the source line 6 is what
350 // matters, and a change to line 6 is uncovered.
351 let out = uncovered_changed_lines(
352 &changed(&[("widget.py", &[6])]),
353 &BTreeMap::from([("widget.py".to_string(), file_coverage(&[], &[[6, -1]]))]),
354 );
355 assert_eq!(
356 out,
357 vec![Uncovered {
358 file: "widget.py".to_string(),
359 line: 6
360 }]
361 );
362 }
363
364 #[test]
365 fn a_changed_file_absent_from_coverage_is_skipped() {
366 // A test file (omitted from the run) never appears in the report, so its
367 // changed lines contribute nothing rather than panicking on a lookup.
368 let out = uncovered_changed_lines(
369 &changed(&[("widget_test.py", &[1, 2])]),
370 &BTreeMap::from([("widget.py".to_string(), file_coverage(&[5], &[]))]),
371 );
372 assert!(out.is_empty());
373 }
374
375 #[test]
376 fn reports_are_sorted_across_files_and_lines() {
377 let out = uncovered_changed_lines(
378 &changed(&[("z.py", &[2, 1]), ("a.py", &[9])]),
379 &BTreeMap::from([
380 ("z.py".to_string(), file_coverage(&[1, 2], &[])),
381 ("a.py".to_string(), file_coverage(&[9], &[])),
382 ]),
383 );
384 assert_eq!(
385 out,
386 vec![
387 Uncovered {
388 file: "a.py".to_string(),
389 line: 9
390 },
391 Uncovered {
392 file: "z.py".to_string(),
393 line: 1
394 },
395 Uncovered {
396 file: "z.py".to_string(),
397 line: 2
398 },
399 ]
400 );
401 }
402}