Skip to main content

heddle_core/diff/
patch.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Patch-compatible rendering for diff reports.
3
4use std::io::{self, Write};
5
6use objects::object::FileMode;
7
8use super::{DiffReport, FileChange, LineDiff};
9
10pub fn write_diff_patch<W: Write>(output: &DiffReport, writer: &mut W) -> io::Result<()> {
11    for change in &output.changes {
12        // A symlink change carries its raw target bytes in `change.symlink`,
13        // which on Unix need not be valid UTF-8. Render it byte-exact so a
14        // non-UTF-8 link target round-trips through `git apply`; every other
15        // change is UTF-8 text and is appended as its bytes.
16        if change.symlink.is_some() {
17            write_symlink_change(change, writer)?;
18        } else {
19            write_text_change(change, writer)?;
20        }
21    }
22    Ok(())
23}
24
25pub fn render_diff_patch_bytes(output: &DiffReport) -> Vec<u8> {
26    let mut buf: Vec<u8> = Vec::new();
27    write_diff_patch(output, &mut buf).expect("writing diff patch to Vec cannot fail");
28    buf
29}
30
31/// Lossy String view of the byte-exact patch (`render_diff_patch_bytes`),
32/// for the JSON `.patch` field and String-based callers/tests. Only a
33/// non-UTF-8 symlink target (Unix-only) differs from the byte render; JSON
34/// strings cannot carry raw bytes, so a lossy view is the best a String
35/// surface can do. The round-trip surface (`heddle diff --patch`) writes the
36/// bytes directly via `render_diff_patch_bytes`, so its byte fidelity is
37/// never reduced here.
38pub fn render_diff_patch(output: &DiffReport) -> String {
39    String::from_utf8_lossy(&render_diff_patch_bytes(output)).into_owned()
40}
41
42/// Render one non-symlink change as unified-diff text into `writer`. Symlink
43/// changes never reach here — `write_diff_patch` routes them to
44/// `write_symlink_change`, which preserves a non-UTF-8 target — so a symlink
45/// target is never forced through `change.lines` (which a non-UTF-8 target
46/// cannot populate) or `write_binary_change`.
47fn write_text_change<W: Write>(change: &FileChange, writer: &mut W) -> io::Result<()> {
48    let lines_ref = change.lines.as_deref();
49    let has_hunk_body = lines_ref.is_some_and(|lines| lines.iter().any(|line| line.prefix != " "));
50    let old_path = change.old_path.as_deref().unwrap_or(&change.path);
51    let is_rename = change
52        .old_path
53        .as_deref()
54        .is_some_and(|old| old != change.path);
55    let is_added = change.kind == "added";
56    let is_deleted = change.kind == "deleted";
57    let is_modified = !is_rename && !is_added && !is_deleted;
58    // A mode-only modify (chmod / exec-bit flip / type swap) has no
59    // hunk body but is still a real change: git records it as
60    // `old mode`/`new mode` extended headers and `git apply`
61    // reproduces the permission change from those alone.
62    let mode_changed = is_modified
63        && matches!((change.old_mode, change.mode), (Some(old), Some(new)) if old != new);
64    // `lines: None` is the binary / unreadable case — there is no
65    // text body to render, so it never produces a patch regardless
66    // of kind. `lines: Some(_)` (even empty) means we have a
67    // readable text side.
68    let has_text = change.lines.is_some();
69
70    // A binary *content* change (add/delete/modify of a file heddle
71    // cannot diff as text). heddle has no git binary delta to emit
72    // (its blob hashes are not git SHAs), and silently dropping the
73    // change would let `git apply` "succeed" while the binary content
74    // stays stale — the false round-trip cid 3319484747 flagged. Emit
75    // git's `Binary files … differ` marker with a *placeholder* index
76    // line: that index line is what makes `git apply` recognize a
77    // binary patch and refuse the *whole* patch ("without full index
78    // line") instead of skipping the block. Without the index line git
79    // treats the marker as an empty patch and silently ignores it. A
80    // content-identical mode-only change is never `binary` (the diff
81    // readers short-circuit it to an empty text body), so this only
82    // fires on a real binary content change, never a chmod.
83    if change.binary && !is_rename {
84        write_binary_change(change, is_added, is_deleted, mode_changed, writer)?;
85        return Ok(());
86    }
87
88    // Decide whether this change emits anything at all:
89    // * renames always do (the extended headers carry the move even
90    //   for identical content);
91    // * add/delete do whenever there's a readable text side — the
92    //   empty-file case renders header-only;
93    // * a modify renders only when it has a real hunk body. A modify
94    //   with no body and matching EOL is a no-op; the
95    //   trailing-newline-only case is handled upstream in
96    //   `unified_hunks`, which synthesizes a tail hunk so this
97    //   branch sees `has_hunk_body == true`.
98    let should_render = if is_rename {
99        true
100    } else if is_added || is_deleted {
101        has_text
102    } else {
103        has_hunk_body || mode_changed
104    };
105    if !should_render {
106        return Ok(());
107    }
108
109    if is_rename {
110        writeln!(
111            writer,
112            "diff --git {} {}",
113            quote_path_for_patch("a/", old_path),
114            quote_path_for_patch("b/", &change.path)
115        )?;
116        // A rename paired with a chmod/type change (`old.sh` renamed
117        // to `new.sh` and made executable) carries both modes; emit
118        // the `old mode`/`new mode` pair before `similarity index`,
119        // matching `git diff`, so `git apply` reproduces the
120        // permission change as well as the move.
121        if let (Some(old), Some(new)) = (change.old_mode, change.mode)
122            && old != new
123        {
124            writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
125            writeln!(writer, "new mode {}", mode_str(change.mode))?;
126        }
127        let pct = (change.similarity_score.unwrap_or(1.0).clamp(0.0, 1.0) * 100.0).round() as u32;
128        writeln!(writer, "similarity index {pct}%")?;
129        writeln!(writer, "rename from {}", quote_path_for_patch("", old_path))?;
130        writeln!(
131            writer,
132            "rename to {}",
133            quote_path_for_patch("", &change.path)
134        )?;
135        // Pure rename — extended headers alone suffice; emitting
136        // `--- a/old / +++ b/new` without hunks would tell git to
137        // apply an empty patch and warn about a stray header.
138        if !has_hunk_body {
139            return Ok(());
140        }
141    } else if is_added {
142        writeln!(
143            writer,
144            "diff --git {} {}",
145            quote_path_for_patch("a/", &change.path),
146            quote_path_for_patch("b/", &change.path)
147        )?;
148        writeln!(writer, "new file mode {}", mode_str(change.mode))?;
149    } else if is_deleted {
150        writeln!(
151            writer,
152            "diff --git {} {}",
153            quote_path_for_patch("a/", &change.path),
154            quote_path_for_patch("b/", &change.path)
155        )?;
156        writeln!(writer, "deleted file mode {}", mode_str(change.mode))?;
157    } else if mode_changed {
158        // A modify whose mode changed (with or without a content
159        // hunk). Emit the `diff --git` + `old mode`/`new mode`
160        // header pair.
161        writeln!(
162            writer,
163            "diff --git {} {}",
164            quote_path_for_patch("a/", &change.path),
165            quote_path_for_patch("b/", &change.path)
166        )?;
167        writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
168        writeln!(writer, "new mode {}", mode_str(change.mode))?;
169    } else {
170        // A plain content modify. Emit the `diff --git` header so
171        // every file stanza is self-delimiting. A bare `--- a/<path>`
172        // is ambiguous: git's parser binds it to the *preceding*
173        // `diff --git` stanza when one is still open — e.g. a
174        // header-only empty-add (`diff --git ... / new file mode`) or
175        // a mode-only change immediately above — and misreads this
176        // file's `---` as the prior file's source side, corrupting the
177        // patch ("expected /dev/null"). The explicit header closes the
178        // prior stanza and opens this one. (cid 3319484717 ordering.)
179        writeln!(
180            writer,
181            "diff --git {} {}",
182            quote_path_for_patch("a/", &change.path),
183            quote_path_for_patch("b/", &change.path)
184        )?;
185    }
186
187    // An empty-file add/delete (text side present but zero lines)
188    // has no hunk body. git stops after the `new/deleted file mode`
189    // header in that case and `git apply` still creates/unlinks the
190    // path — emitting `--- /+++/@@` with no `@@` body would be a
191    // malformed hunk, so we stop here too.
192    if (is_added || is_deleted) && !has_hunk_body {
193        return Ok(());
194    }
195    // A mode-only modify carries no content hunk: the `old mode`/
196    // `new mode` header pair is the entire patch, so stop before the
197    // `--- /+++` line-diff headers (which would be a malformed
198    // empty hunk).
199    if is_modified && !has_hunk_body {
200        return Ok(());
201    }
202
203    if is_added {
204        writer.write_all(b"--- /dev/null\n")?;
205    } else {
206        writeln!(writer, "--- {}", quote_path_for_patch("a/", old_path))?;
207    }
208    if is_deleted {
209        writer.write_all(b"+++ /dev/null\n")?;
210    } else {
211        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
212    }
213    if let Some(lines) = lines_ref {
214        write_patch_hunks(change, lines, writer)?;
215    }
216    Ok(())
217}
218
219/// Render a symlink change (add / delete / target-edit / rename) byte-exact.
220///
221/// A symlink's git blob is its raw target bytes, which on Unix need not be
222/// valid UTF-8 — so the hunk body is emitted straight from `change.symlink`
223/// (the single byte-preserving symlink path) rather than `change.lines`,
224/// which a non-UTF-8 target cannot populate. Marking such a change `binary`
225/// (the old behaviour) emitted a placeholder-binary stanza that `git apply`
226/// rejects for a `120000` entry; emitting the target as a text hunk is what
227/// git itself does and round-trips. The extended headers mirror
228/// `write_text_change`'s (add/delete/rename), and the mode is always
229/// `120000` so a rename never needs an `old mode`/`new mode` pair unless the
230/// two sides genuinely differ.
231fn write_symlink_change<W: Write>(change: &FileChange, writer: &mut W) -> io::Result<()> {
232    let Some(sym) = change.symlink.as_ref() else {
233        return Ok(());
234    };
235    let old_path = change.old_path.as_deref().unwrap_or(&change.path);
236    let is_rename = change
237        .old_path
238        .as_deref()
239        .is_some_and(|old| old != change.path);
240    let is_added = change.kind == "added";
241    let is_deleted = change.kind == "deleted";
242
243    if is_rename {
244        writeln!(
245            writer,
246            "diff --git {} {}",
247            quote_path_for_patch("a/", old_path),
248            quote_path_for_patch("b/", &change.path)
249        )?;
250        if let (Some(old), Some(new)) = (change.old_mode, change.mode)
251            && old != new
252        {
253            writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
254            writeln!(writer, "new mode {}", mode_str(change.mode))?;
255        }
256        let pct = (change.similarity_score.unwrap_or(1.0).clamp(0.0, 1.0) * 100.0).round() as u32;
257        writeln!(writer, "similarity index {pct}%")?;
258        writeln!(writer, "rename from {}", quote_path_for_patch("", old_path))?;
259        writeln!(
260            writer,
261            "rename to {}",
262            quote_path_for_patch("", &change.path)
263        )?;
264        // Pure rename (identical target) — the extended headers alone carry
265        // the move, exactly like a text rename with no hunk body.
266        if sym.old == sym.new {
267            return Ok(());
268        }
269        writeln!(writer, "--- {}", quote_path_for_patch("a/", old_path))?;
270        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
271    } else if is_added {
272        writeln!(
273            writer,
274            "diff --git {} {}",
275            quote_path_for_patch("a/", &change.path),
276            quote_path_for_patch("b/", &change.path)
277        )?;
278        writeln!(writer, "new file mode {}", mode_str(change.mode))?;
279        writer.write_all(b"--- /dev/null\n")?;
280        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
281    } else if is_deleted {
282        writeln!(
283            writer,
284            "diff --git {} {}",
285            quote_path_for_patch("a/", &change.path),
286            quote_path_for_patch("b/", &change.path)
287        )?;
288        writeln!(writer, "deleted file mode {}", mode_str(change.mode))?;
289        writeln!(writer, "--- {}", quote_path_for_patch("a/", &change.path))?;
290        writer.write_all(b"+++ /dev/null\n")?;
291    } else {
292        // A symlink target-edit. The mode is unchanged (`120000` → `120000`),
293        // so no `old mode`/`new mode` block — just the file header. An
294        // identical target would be a no-op and is never emitted by the diff
295        // backends, but guard it so an accidental empty hunk can't form.
296        if sym.old == sym.new {
297            return Ok(());
298        }
299        writeln!(
300            writer,
301            "diff --git {} {}",
302            quote_path_for_patch("a/", &change.path),
303            quote_path_for_patch("b/", &change.path)
304        )?;
305        writeln!(writer, "--- {}", quote_path_for_patch("a/", &change.path))?;
306        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
307    }
308
309    write_symlink_hunk(sym.old.as_deref(), sym.new.as_deref(), writer)?;
310    Ok(())
311}
312
313/// Emit the unified-diff hunk for a symlink's target bytes. A symlink's git
314/// blob has no trailing newline, so each side normally collapses to a single
315/// line carrying the `\ No newline at end of file` marker; a target that
316/// embeds a `\n` (pathological but representable) splits into multiple lines.
317/// The `@@` header mirrors `unified_hunks`'s `@@ -s,c +s,c @@` shape (counts
318/// always written, even `,1`), which `git apply` accepts.
319fn write_symlink_hunk<W: Write>(
320    old: Option<&[u8]>,
321    new: Option<&[u8]>,
322    writer: &mut W,
323) -> io::Result<()> {
324    let old_lines = split_target_lines(old);
325    let new_lines = split_target_lines(new);
326    let old_count = old_lines.len();
327    let new_count = new_lines.len();
328    let old_start = if old_count == 0 { 0 } else { 1 };
329    let new_start = if new_count == 0 { 0 } else { 1 };
330    writeln!(
331        writer,
332        "@@ -{old_start},{old_count} +{new_start},{new_count} @@"
333    )?;
334    let old_no_eol = !target_has_trailing_newline(old);
335    let new_no_eol = !target_has_trailing_newline(new);
336    for (idx, line) in old_lines.iter().enumerate() {
337        writer.write_all(b"-")?;
338        writer.write_all(line)?;
339        writer.write_all(b"\n")?;
340        if old_no_eol && idx + 1 == old_count {
341            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
342        }
343    }
344    for (idx, line) in new_lines.iter().enumerate() {
345        writer.write_all(b"+")?;
346        writer.write_all(line)?;
347        writer.write_all(b"\n")?;
348        if new_no_eol && idx + 1 == new_count {
349            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
350        }
351    }
352    Ok(())
353}
354
355/// Split a symlink target's raw bytes into unified-diff lines. An absent side
356/// (`None`) or an empty blob yields no lines; a trailing `\n` is the line
357/// terminator (dropped here, surfaced via `target_has_trailing_newline`)
358/// rather than an extra empty line, matching how text blobs are line-counted.
359fn split_target_lines(target: Option<&[u8]>) -> Vec<&[u8]> {
360    let Some(bytes) = target else {
361        return Vec::new();
362    };
363    if bytes.is_empty() {
364        return Vec::new();
365    }
366    let mut lines: Vec<&[u8]> = bytes.split(|&byte| byte == b'\n').collect();
367    if bytes.ends_with(b"\n") {
368        lines.pop();
369    }
370    lines
371}
372
373fn target_has_trailing_newline(target: Option<&[u8]>) -> bool {
374    target.is_some_and(|bytes| bytes.ends_with(b"\n"))
375}
376
377/// Render a binary content change (add / delete / plain modify / modify
378/// with a mode change) as git's `Binary files … differ` marker.
379///
380/// heddle cannot emit a git binary delta — its blob hashes are not git
381/// SHAs — so the marker is the most faithful thing it can produce. The
382/// catch (cid 3319484747): a bare `Binary files … differ` marker with no
383/// `index` header is treated by `git apply` as an empty patch and
384/// *silently skipped*, which would let the apply "succeed" while the
385/// binary content stays stale. Emitting a *placeholder* `index
386/// 0000000..0000000` line flips git into binary-patch mode, where it
387/// refuses the whole patch ("cannot apply binary patch … without full
388/// index line") rather than ignoring it. That refusal is the correct
389/// outcome: heddle has no delta to apply, so the honest result is a hard
390/// failure, never a false round-trip.
391fn write_binary_change<W: Write>(
392    change: &FileChange,
393    is_added: bool,
394    is_deleted: bool,
395    mode_changed: bool,
396    writer: &mut W,
397) -> io::Result<()> {
398    let path = &change.path;
399    writeln!(
400        writer,
401        "diff --git {} {}",
402        quote_path_for_patch("a/", path),
403        quote_path_for_patch("b/", path)
404    )?;
405    if is_added {
406        writeln!(writer, "new file mode {}", mode_str(change.mode))?;
407        writer.write_all(b"index 0000000..0000000\n")?;
408    } else if is_deleted {
409        writeln!(writer, "deleted file mode {}", mode_str(change.mode))?;
410        writer.write_all(b"index 0000000..0000000\n")?;
411    } else if mode_changed {
412        writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
413        writeln!(writer, "new mode {}", mode_str(change.mode))?;
414        writer.write_all(b"index 0000000..0000000\n")?;
415    } else {
416        // Plain binary modify: git stamps the mode at the end of the
417        // index line (`index <old>..<new> 100644`).
418        writeln!(writer, "index 0000000..0000000 {}", mode_str(change.mode))?;
419    }
420    let (a, b) = if is_added {
421        ("/dev/null".to_string(), quote_path_for_patch("b/", path))
422    } else if is_deleted {
423        (quote_path_for_patch("a/", path), "/dev/null".to_string())
424    } else {
425        (
426            quote_path_for_patch("a/", path),
427            quote_path_for_patch("b/", path),
428        )
429    };
430    writeln!(writer, "Binary files {a} and {b} differ")?;
431    Ok(())
432}
433
434/// Map a tracked file mode to the git unified-diff mode string. `None`
435/// (mode not resolved) and the regular-file case both render `100644`.
436fn mode_str(mode: Option<FileMode>) -> &'static str {
437    match mode {
438        Some(FileMode::Executable) => "100755",
439        Some(FileMode::Symlink) => "120000",
440        Some(FileMode::Gitlink) => "160000",
441        Some(FileMode::Normal) | None => "100644",
442    }
443}
444
445/// Quote a patch-header path the way `git diff` does (C-style quoting,
446/// `core.quotePath` defaults to true). A path containing a tab, newline,
447/// double-quote, backslash, control byte, or non-ASCII byte is wrapped in
448/// double quotes with the bytes escaped; a "simple" path is emitted bare.
449///
450/// `prefix` is the in-quote prefix git stamps on `diff --git`/`--- `/`+++ `
451/// headers (`a/`, `b/`) — git puts the prefix *inside* the quotes
452/// (`"a/tab\there"`), so it is escaped alongside the path. `rename from`/
453/// `rename to` pass an empty prefix (git quotes the bare path there).
454///
455/// Verified byte-for-byte against `git diff` for tab, newline, quote,
456/// backslash, and non-ASCII (UTF-8 → per-byte octal) paths.
457fn quote_path_for_patch(prefix: &str, path: &str) -> String {
458    if !needs_c_quoting(prefix) && !needs_c_quoting(path) {
459        return format!("{prefix}{path}");
460    }
461    let mut out = String::with_capacity(prefix.len() + path.len() + 2);
462    out.push('"');
463    push_c_quoted(&mut out, prefix);
464    push_c_quoted(&mut out, path);
465    out.push('"');
466    out
467}
468
469fn needs_c_quoting(s: &str) -> bool {
470    s.bytes().any(byte_needs_escape)
471}
472
473/// git escapes any byte below 0x20, the DEL byte and everything above it
474/// (0x7f..=0xff — `core.quotePath` octal-escapes non-ASCII), plus the two
475/// in-quote metacharacters `"` and `\`.
476fn byte_needs_escape(byte: u8) -> bool {
477    matches!(byte, b'"' | b'\\') || !(0x20..0x7f).contains(&byte)
478}
479
480fn push_c_quoted(out: &mut String, s: &str) {
481    for byte in s.bytes() {
482        match byte {
483            b'"' => out.push_str("\\\""),
484            b'\\' => out.push_str("\\\\"),
485            0x07 => out.push_str("\\a"),
486            0x08 => out.push_str("\\b"),
487            0x09 => out.push_str("\\t"),
488            0x0a => out.push_str("\\n"),
489            0x0b => out.push_str("\\v"),
490            0x0c => out.push_str("\\f"),
491            0x0d => out.push_str("\\r"),
492            0x20..=0x7e => out.push(byte as char),
493            other => out.push_str(&format!("\\{other:03o}")),
494        }
495    }
496}
497
498const NO_NEWLINE_MARKER: &str = "\\ No newline at end of file\n";
499
500/// Walk the rendered hunks once and emit each line, splicing in the
501/// `\ No newline at end of file` marker after the line that holds the
502/// file's tail on a side whose source bytes lacked a trailing `\n`.
503///
504/// The diff backend strips line terminators, so per-line equality
505/// collapses `hello` and `hello\n` into the same `LineDiff`. To match
506/// `git diff`'s output (which `git apply --check` accepts), a context
507/// line that sits on the no-newline side's tail has to be split into
508/// a `-` + `+` pair, with the marker attached to the side that lacks
509/// the terminator. The 4-case matrix is in `write_patch_hunks`'s
510/// context-line branch.
511fn write_patch_hunks<W: Write>(
512    change: &FileChange,
513    lines: &[LineDiff],
514    writer: &mut W,
515) -> io::Result<()> {
516    let old_no_eol = !change.eol.old_has_final_newline;
517    let new_no_eol = !change.eol.new_has_final_newline;
518    let old_tail_idx = if old_no_eol && change.eol.old_line_count > 0 {
519        find_side_tail_idx(lines, Side::Old, change.eol.old_line_count)
520    } else {
521        None
522    };
523    let new_tail_idx = if new_no_eol && change.eol.new_line_count > 0 {
524        find_side_tail_idx(lines, Side::New, change.eol.new_line_count)
525    } else {
526        None
527    };
528
529    for (idx, line) in lines.iter().enumerate() {
530        let is_old_tail = Some(idx) == old_tail_idx;
531        let is_new_tail = Some(idx) == new_tail_idx;
532        let needs_old_marker = is_old_tail && old_no_eol;
533        let needs_new_marker = is_new_tail && new_no_eol;
534
535        if line.prefix == " " && (needs_old_marker || needs_new_marker) {
536            if is_old_tail && is_new_tail && needs_old_marker && needs_new_marker {
537                // Both sides' tail lands on this context line and both
538                // lack a trailing newline — emit the line once, then
539                // a single marker that applies to both sides.
540                write_patch_line(writer, line)?;
541                writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
542            } else {
543                // Mixed state: at least one side needs the marker and
544                // the other shouldn't be tagged. Split the context
545                // line into a `-content` / `+content` pair so each
546                // side's marker (or its absence) is unambiguous.
547                writer.write_all(b"-")?;
548                writer.write_all(line.content.as_bytes())?;
549                writer.write_all(b"\n")?;
550                if needs_old_marker {
551                    writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
552                }
553                writer.write_all(b"+")?;
554                writer.write_all(line.content.as_bytes())?;
555                writer.write_all(b"\n")?;
556                if needs_new_marker {
557                    writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
558                }
559            }
560            continue;
561        }
562
563        write_patch_line(writer, line)?;
564        if needs_old_marker && line.prefix == "-" {
565            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
566        }
567        if needs_new_marker && line.prefix == "+" {
568            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
569        }
570    }
571    Ok(())
572}
573
574#[derive(Clone, Copy)]
575enum Side {
576    Old,
577    New,
578}
579
580fn find_side_tail_idx(lines: &[LineDiff], side: Side, target: usize) -> Option<usize> {
581    lines.iter().enumerate().rev().find_map(|(idx, line)| {
582        let (on_side, line_number) = match side {
583            Side::Old => (line.prefix == "-" || line.prefix == " ", line.old_line),
584            Side::New => (line.prefix == "+" || line.prefix == " ", line.new_line),
585        };
586        if on_side && line_number == Some(target) {
587            Some(idx)
588        } else {
589            None
590        }
591    })
592}
593
594fn write_patch_line<W: Write>(writer: &mut W, line: &LineDiff) -> io::Result<()> {
595    writer.write_all(line.prefix.as_bytes())?;
596    writer.write_all(line.content.as_bytes())?;
597    writer.write_all(b"\n")
598}
599
600#[cfg(test)]
601mod tests {
602    use objects::object::FileMode;
603
604    use super::{quote_path_for_patch, render_diff_patch, render_diff_patch_bytes};
605    use crate::diff::{DiffReport, FileChange, FileEolState, LineDiff, SymlinkChange};
606
607    fn modified_change_with_eol(path: &str, lines: Vec<LineDiff>, eol: FileEolState) -> FileChange {
608        FileChange {
609            path: path.to_string(),
610            kind: "modified".to_string(),
611            lines: Some(lines),
612            eol,
613            ..Default::default()
614        }
615    }
616
617    fn diff_report_with(changes: Vec<FileChange>) -> DiffReport {
618        DiffReport::new(None, None, changes, None, None, None)
619    }
620
621    #[cfg(unix)]
622    fn hermetic_git_command(dir: &std::path::Path, args: &[&str]) -> std::process::Command {
623        let mut command = std::process::Command::new("git");
624        command
625            .args(args)
626            .current_dir(dir)
627            .env("GIT_CONFIG_GLOBAL", "/dev/null")
628            .env("GIT_CONFIG_SYSTEM", "/dev/null")
629            .env("GIT_AUTHOR_NAME", "Heddle Test")
630            .env("GIT_AUTHOR_EMAIL", "heddle@example.com")
631            .env("GIT_COMMITTER_NAME", "Heddle Test")
632            .env("GIT_COMMITTER_EMAIL", "heddle@example.com");
633        command
634    }
635
636    #[cfg(unix)]
637    fn hermetic_git(dir: &std::path::Path, args: &[&str]) {
638        let status = hermetic_git_command(dir, args)
639            .status()
640            .unwrap_or_else(|err| panic!("git {args:?} should spawn: {err}"));
641        assert!(status.success(), "git {args:?} should succeed");
642    }
643
644    #[cfg(unix)]
645    fn pipe_git_apply(dir: &std::path::Path, args: &[&str], patch: &[u8]) -> std::process::Output {
646        use std::{io::Write, process::Stdio};
647
648        let mut child = hermetic_git_command(dir, args)
649            .stdin(Stdio::piped())
650            .stdout(Stdio::piped())
651            .stderr(Stdio::piped())
652            .spawn()
653            .unwrap_or_else(|err| panic!("git {args:?} should spawn: {err}"));
654        child.stdin.as_mut().unwrap().write_all(patch).unwrap();
655        child
656            .wait_with_output()
657            .unwrap_or_else(|err| panic!("git {args:?} should finish: {err}"))
658    }
659
660    #[cfg(unix)]
661    #[test]
662    fn render_diff_patch_bytes_applies_non_utf8_symlink_target_byte_exactly() {
663        use std::os::unix::ffi::OsStrExt;
664
665        let target = b"target-\xff\xfe";
666        let change = FileChange {
667            path: "linky".to_string(),
668            kind: "added".to_string(),
669            mode: Some(FileMode::Symlink),
670            symlink: Some(SymlinkChange {
671                old: None,
672                new: Some(target.to_vec()),
673            }),
674            ..Default::default()
675        };
676        let patch = render_diff_patch_bytes(&diff_report_with(vec![change]));
677        assert!(
678            patch.windows(target.len()).any(|window| window == target),
679            "patch must carry the raw non-UTF-8 target bytes:\n{}",
680            String::from_utf8_lossy(&patch)
681        );
682
683        let scratch = tempfile::TempDir::new().unwrap();
684        hermetic_git(scratch.path(), &["init", "-q"]);
685        hermetic_git(scratch.path(), &["checkout", "-q", "-b", "main"]);
686
687        let check = pipe_git_apply(scratch.path(), &["apply", "--check"], &patch);
688        assert!(
689            check.status.success(),
690            "git apply --check rejected patch;\nstderr={}\npatch=\n{}",
691            String::from_utf8_lossy(&check.stderr),
692            String::from_utf8_lossy(&patch)
693        );
694        let applied = pipe_git_apply(scratch.path(), &["apply"], &patch);
695        assert!(
696            applied.status.success(),
697            "git apply rejected patch;\nstderr={}\npatch=\n{}",
698            String::from_utf8_lossy(&applied.stderr),
699            String::from_utf8_lossy(&patch)
700        );
701
702        let applied_target = std::fs::read_link(scratch.path().join("linky")).unwrap();
703        assert_eq!(
704            applied_target.as_os_str().as_bytes(),
705            target,
706            "applied symlink target must be byte-exact"
707        );
708    }
709
710    /// A mode-only modify (exec-bit flip, no content change) must render
711    /// as a header-only `diff --git` + `old mode`/`new mode` block with
712    /// no `@@` hunk. Regressing this drops the chmod from the patch so
713    /// `git apply` can't reproduce the permission change (cid 3318629228).
714    #[test]
715    fn render_diff_patch_emits_mode_only_header_for_chmod() {
716        let change = FileChange {
717            path: "run.sh".to_string(),
718            kind: "modified".to_string(),
719            lines: Some(Vec::new()),
720            old_mode: Some(FileMode::Normal),
721            mode: Some(FileMode::Executable),
722            ..Default::default()
723        };
724        let rendered = render_diff_patch(&diff_report_with(vec![change]));
725        assert!(
726            rendered.contains("diff --git a/run.sh b/run.sh"),
727            "chmod-only must emit the `diff --git` header:\n{rendered}"
728        );
729        assert!(
730            rendered.contains("old mode 100644") && rendered.contains("new mode 100755"),
731            "chmod-only must emit `old mode`/`new mode`:\n{rendered}"
732        );
733        assert!(
734            !rendered.contains("@@") && !rendered.contains("--- a/"),
735            "chmod-only is header-only — no hunk body:\n{rendered}"
736        );
737    }
738
739    #[test]
740    fn render_diff_patch_emits_gitlink_mode_without_blob_hunk() {
741        let change = FileChange {
742            path: "vendor".to_string(),
743            kind: "added".to_string(),
744            lines: Some(Vec::new()),
745            mode: Some(FileMode::Gitlink),
746            ..Default::default()
747        };
748
749        let rendered = render_diff_patch(&diff_report_with(vec![change]));
750
751        assert!(
752            rendered.contains("new file mode 160000"),
753            "gitlinks must render their durable mode, not a regular-file mode:\n{rendered}"
754        );
755        assert!(
756            !rendered.contains("@@") && !rendered.contains("heddle-submodule:"),
757            "gitlink patch output must not synthesize legacy marker blob content:\n{rendered}"
758        );
759    }
760
761    /// A modify that changes BOTH content and mode emits the mode-header
762    /// pair AND the usual `--- /+++` line-diff body.
763    #[test]
764    fn render_diff_patch_emits_mode_headers_with_content_hunk() {
765        let change = FileChange {
766            path: "run.sh".to_string(),
767            kind: "modified".to_string(),
768            lines: Some(vec![
769                LineDiff::with_lines("@", "@ -1,1 +1,1 @@", None, None),
770                LineDiff::with_lines("-", "echo old", Some(1), None),
771                LineDiff::with_lines("+", "echo new", None, Some(1)),
772            ]),
773            old_mode: Some(FileMode::Normal),
774            mode: Some(FileMode::Executable),
775            ..Default::default()
776        };
777        let rendered = render_diff_patch(&diff_report_with(vec![change]));
778        assert!(
779            rendered.contains("old mode 100644") && rendered.contains("new mode 100755"),
780            "content+mode change must still emit the mode headers:\n{rendered}"
781        );
782        assert!(
783            rendered.contains("--- a/run.sh")
784                && rendered.contains("+++ b/run.sh")
785                && rendered.contains("+echo new"),
786            "content+mode change must still emit the line-diff body:\n{rendered}"
787        );
788    }
789
790    /// An unchanged mode on a modify with no hunk body is a genuine
791    /// no-op and must emit nothing — guards against the mode branch
792    /// firing when `old_mode == mode`.
793    #[test]
794    fn render_diff_patch_skips_modify_with_same_mode_and_no_body() {
795        let change = FileChange {
796            path: "run.sh".to_string(),
797            kind: "modified".to_string(),
798            lines: Some(Vec::new()),
799            old_mode: Some(FileMode::Normal),
800            mode: Some(FileMode::Normal),
801            ..Default::default()
802        };
803        let rendered = render_diff_patch(&diff_report_with(vec![change]));
804        assert!(
805            rendered.is_empty(),
806            "no-op modify (same mode, no body) must emit nothing:\n{rendered}"
807        );
808    }
809
810    /// A binary content modify (`binary: true`, `lines: None`) must emit
811    /// git's `Binary files … differ` marker with a *placeholder* index
812    /// line. Silently dropping it would let `git apply` "succeed" while
813    /// the binary content stayed stale (cid 3319484747); the index line
814    /// flips git into binary-patch mode so it refuses the whole patch
815    /// instead of skipping the block.
816    #[test]
817    fn render_diff_patch_binary_modify_emits_marker_with_index() {
818        let change = FileChange {
819            path: "binary.bin".to_string(),
820            kind: "modified".to_string(),
821            binary: true,
822            lines: None,
823            mode: Some(FileMode::Normal),
824            old_mode: Some(FileMode::Normal),
825            ..Default::default()
826        };
827        let rendered = render_diff_patch(&diff_report_with(vec![change]));
828        assert!(
829            rendered.contains("diff --git a/binary.bin b/binary.bin"),
830            "binary modify must emit a diff header:\n{rendered}"
831        );
832        assert!(
833            rendered.contains("index 0000000..0000000 100644"),
834            "binary modify must emit a placeholder index line:\n{rendered}"
835        );
836        assert!(
837            rendered.contains("Binary files a/binary.bin and b/binary.bin differ"),
838            "binary modify must emit the binary marker:\n{rendered}"
839        );
840        assert!(
841            !rendered.contains("--- a/binary.bin"),
842            "binary modify must not emit a text hunk header:\n{rendered}"
843        );
844    }
845
846    /// A binary modify whose mode *also* changed emits the
847    /// `old mode`/`new mode` pair (so the chmod is recorded) followed by
848    /// the placeholder index + binary marker — never a mode-only chmod
849    /// patch that git apply would accept while leaving stale binary
850    /// content (cid 3319484747).
851    #[test]
852    fn render_diff_patch_binary_modify_with_mode_change_keeps_marker() {
853        let change = FileChange {
854            path: "binary.bin".to_string(),
855            kind: "modified".to_string(),
856            binary: true,
857            lines: None,
858            old_mode: Some(FileMode::Normal),
859            mode: Some(FileMode::Executable),
860            ..Default::default()
861        };
862        let rendered = render_diff_patch(&diff_report_with(vec![change]));
863        assert!(
864            rendered.contains("old mode 100644") && rendered.contains("new mode 100755"),
865            "binary+mode change must still record the chmod:\n{rendered}"
866        );
867        assert!(
868            rendered.contains("index 0000000..0000000"),
869            "binary+mode change must emit the placeholder index line:\n{rendered}"
870        );
871        assert!(
872            rendered.contains("Binary files a/binary.bin and b/binary.bin differ"),
873            "binary+mode change must still emit the binary marker:\n{rendered}"
874        );
875    }
876
877    /// A binary add emits `new file mode` + placeholder index + marker;
878    /// a binary delete mirrors it with `deleted file mode`.
879    #[test]
880    fn render_diff_patch_binary_add_and_delete_emit_markers() {
881        let added = FileChange {
882            path: "added.bin".to_string(),
883            kind: "added".to_string(),
884            binary: true,
885            lines: None,
886            mode: Some(FileMode::Normal),
887            ..Default::default()
888        };
889        let rendered = render_diff_patch(&diff_report_with(vec![added]));
890        assert!(
891            rendered.contains("new file mode 100644")
892                && rendered.contains("index 0000000..0000000")
893                && rendered.contains("Binary files /dev/null and b/added.bin differ"),
894            "binary add marker:\n{rendered}"
895        );
896
897        let deleted = FileChange {
898            path: "gone.bin".to_string(),
899            kind: "deleted".to_string(),
900            binary: true,
901            lines: None,
902            mode: Some(FileMode::Normal),
903            ..Default::default()
904        };
905        let rendered = render_diff_patch(&diff_report_with(vec![deleted]));
906        assert!(
907            rendered.contains("deleted file mode 100644")
908                && rendered.contains("index 0000000..0000000")
909                && rendered.contains("Binary files a/gone.bin and /dev/null differ"),
910            "binary delete marker:\n{rendered}"
911        );
912    }
913
914    /// A change whose `lines` vector is present but empty must also
915    /// be skipped — the file path is known but there's no hunk body
916    /// to render. Mixed batches (one renderable, one empty) must keep
917    /// rendering the renderable change.
918    #[test]
919    fn render_diff_patch_skips_change_with_empty_lines() {
920        let empty = FileChange {
921            path: "empty.txt".to_string(),
922            kind: "modified".to_string(),
923            lines: Some(Vec::new()),
924            ..Default::default()
925        };
926        let real = modified_change_with_eol(
927            "real.txt",
928            vec![
929                LineDiff::with_lines("@", "@ -1,1 +1,1 @@", None, None),
930                LineDiff::with_lines("-", "old", Some(1), None),
931                LineDiff::with_lines("+", "new", None, Some(1)),
932            ],
933            FileEolState::default(),
934        );
935        let rendered = render_diff_patch(&diff_report_with(vec![empty, real]));
936        assert!(
937            !rendered.contains("empty.txt"),
938            "skipped change must not emit a header: {rendered}"
939        );
940        assert!(
941            rendered.contains("--- a/real.txt"),
942            "renderable change must still be emitted: {rendered}"
943        );
944    }
945
946    /// When both sides lack a trailing newline AND their tails land on
947    /// the same context line, the renderer emits the line once and a
948    /// single `\ No newline at end of file` marker that documents both
949    /// sides' state. `git diff` does the same — two markers in a row
950    /// would be a corruption.
951    #[test]
952    fn render_diff_patch_collapses_both_side_no_eol_marker_on_shared_tail() {
953        // `more` is the tail for both sides; the change is on the line
954        // above (hello -> world). Both blobs end without `\n`.
955        let lines = vec![
956            LineDiff::with_lines("@", "@ -1,2 +1,2 @@", None, None),
957            LineDiff::with_lines("-", "hello", Some(1), None),
958            LineDiff::with_lines("+", "world", None, Some(1)),
959            LineDiff::with_lines(" ", "more", Some(2), Some(2)),
960        ];
961        let eol = FileEolState {
962            old_has_final_newline: false,
963            new_has_final_newline: false,
964            old_line_count: 2,
965            new_line_count: 2,
966        };
967        let change = modified_change_with_eol("tail.txt", lines, eol);
968        let rendered = render_diff_patch(&diff_report_with(vec![change]));
969
970        let marker_count = rendered.matches("\\ No newline at end of file").count();
971        assert_eq!(
972            marker_count, 1,
973            "shared-tail double-no-eol must emit exactly one marker, got:\n{rendered}"
974        );
975        // The context line must NOT be split into `-more`/`+more` —
976        // that's the wrong branch and would confuse `git apply` about
977        // whether the line is being modified.
978        assert!(
979            !rendered.contains("-more\n"),
980            "context tail must not be split when both sides agree:\n{rendered}"
981        );
982        assert!(
983            !rendered.contains("+more\n"),
984            "context tail must not be split when both sides agree:\n{rendered}"
985        );
986        assert!(
987            rendered.contains(" more\n\\ No newline at end of file\n"),
988            "marker must sit immediately after the shared context line:\n{rendered}"
989        );
990    }
991
992    /// When only the OLD side lacks a trailing newline and its tail is
993    /// a context line, the renderer must split that line into a
994    /// `-content` (with the marker after it) + `+content` pair so the
995    /// patch unambiguously documents that the OLD file ends without
996    /// `\n` while the NEW file ends with one.
997    #[test]
998    fn render_diff_patch_splits_context_tail_when_only_old_lacks_newline() {
999        // Diff for OLD `hello` (no eol) -> NEW `hello\nmore\n`:
1000        // ` hello` is the old tail; `+more` is the trailing addition.
1001        let lines = vec![
1002            LineDiff::with_lines("@", "@ -1,1 +1,2 @@", None, None),
1003            LineDiff::with_lines(" ", "hello", Some(1), Some(1)),
1004            LineDiff::with_lines("+", "more", None, Some(2)),
1005        ];
1006        let eol = FileEolState {
1007            old_has_final_newline: false,
1008            new_has_final_newline: true,
1009            old_line_count: 1,
1010            new_line_count: 2,
1011        };
1012        let change = modified_change_with_eol("old.txt", lines, eol);
1013        let rendered = render_diff_patch(&diff_report_with(vec![change]));
1014
1015        assert!(
1016            rendered.contains("-hello\n\\ No newline at end of file\n+hello\n"),
1017            "OLD-side context-tail split must emit `-hello` + marker + `+hello`:\n{rendered}"
1018        );
1019        // Only the OLD side carries a marker — the NEW side ends with
1020        // `\n` so its tail line must NOT be followed by a marker.
1021        let marker_count = rendered.matches("\\ No newline at end of file").count();
1022        assert_eq!(
1023            marker_count, 1,
1024            "exactly one marker expected (OLD side only):\n{rendered}"
1025        );
1026    }
1027
1028    /// Mirror of the OLD-only case: when only the NEW side lacks a
1029    /// trailing newline and its tail is a shared context line, the
1030    /// split emits `-content` + `+content` + marker so the patch
1031    /// states "the file ends without `\n` after applying".
1032    #[test]
1033    fn render_diff_patch_splits_context_tail_when_only_new_lacks_newline() {
1034        // Diff for OLD `hello\nmore\n` -> NEW `hello` (no eol):
1035        // ` hello` is the new tail; `-more` is the removal.
1036        let lines = vec![
1037            LineDiff::with_lines("@", "@ -1,2 +1,1 @@", None, None),
1038            LineDiff::with_lines(" ", "hello", Some(1), Some(1)),
1039            LineDiff::with_lines("-", "more", Some(2), None),
1040        ];
1041        let eol = FileEolState {
1042            old_has_final_newline: true,
1043            new_has_final_newline: false,
1044            old_line_count: 2,
1045            new_line_count: 1,
1046        };
1047        let change = modified_change_with_eol("new.txt", lines, eol);
1048        let rendered = render_diff_patch(&diff_report_with(vec![change]));
1049
1050        assert!(
1051            rendered.contains("-hello\n+hello\n\\ No newline at end of file\n"),
1052            "NEW-side context-tail split must emit `-hello` + `+hello` + marker:\n{rendered}"
1053        );
1054        let marker_count = rendered.matches("\\ No newline at end of file").count();
1055        assert_eq!(
1056            marker_count, 1,
1057            "exactly one marker expected (NEW side only):\n{rendered}"
1058        );
1059    }
1060
1061    /// When the tail is a `-` (deletion) on the OLD side and the OLD
1062    /// blob lacked a trailing newline, the marker goes right after the
1063    /// `-line` — same as `git diff` for a delete-the-last-line patch
1064    /// against a no-eol source. The `+` branch is the mirror.
1065    #[test]
1066    fn render_diff_patch_marker_after_minus_line_when_old_tail_is_deletion() {
1067        // OLD has 2 lines (no eol on `tail`), NEW has 1 line (`only`,
1068        // with eol). The diff is two replacements; the second `-tail`
1069        // is the OLD tail.
1070        let lines = vec![
1071            LineDiff::with_lines("@", "@ -1,2 +1,1 @@", None, None),
1072            LineDiff::with_lines("-", "only", Some(1), None),
1073            LineDiff::with_lines("-", "tail", Some(2), None),
1074            LineDiff::with_lines("+", "only", None, Some(1)),
1075        ];
1076        let eol = FileEolState {
1077            old_has_final_newline: false,
1078            new_has_final_newline: true,
1079            old_line_count: 2,
1080            new_line_count: 1,
1081        };
1082        let change = modified_change_with_eol("del.txt", lines, eol);
1083        let rendered = render_diff_patch(&diff_report_with(vec![change]));
1084
1085        assert!(
1086            rendered.contains("-tail\n\\ No newline at end of file\n"),
1087            "marker must follow the OLD tail deletion line:\n{rendered}"
1088        );
1089    }
1090
1091    /// Pin git's C-style path quoting byte-for-byte. The conformance
1092    /// harness round-trips the common classes through real `git apply`;
1093    /// this covers the exact escape spellings (including the `\a \b \v \f
1094    /// \r` controls and octal fallback) the integration cells don't reach.
1095    #[test]
1096    fn quote_path_matches_git_c_style() {
1097        // Simple paths — and spaces, which git leaves bare — emit unquoted.
1098        assert_eq!(quote_path_for_patch("a/", "src/main.rs"), "a/src/main.rs");
1099        assert_eq!(
1100            quote_path_for_patch("a/", "with space.txt"),
1101            "a/with space.txt"
1102        );
1103        // Tab/newline/quote/backslash force quoting; the prefix is escaped
1104        // inside the quotes, matching git's `quote_two`.
1105        assert_eq!(quote_path_for_patch("a/", "tab\there"), "\"a/tab\\there\"");
1106        assert_eq!(
1107            quote_path_for_patch("b/", "line\nbreak"),
1108            "\"b/line\\nbreak\""
1109        );
1110        assert_eq!(quote_path_for_patch("a/", "quo\"te"), "\"a/quo\\\"te\"");
1111        assert_eq!(
1112            quote_path_for_patch("a/", "back\\slash"),
1113            "\"a/back\\\\slash\""
1114        );
1115        // Non-ASCII (UTF-8 é = 0xC3 0xA9) → per-byte octal.
1116        assert_eq!(quote_path_for_patch("a/", "café"), "\"a/caf\\303\\251\"");
1117        // `rename from`/`rename to` quote the bare path (empty prefix).
1118        assert_eq!(quote_path_for_patch("", "x\ty"), "\"x\\ty\"");
1119        // The remaining named C-escapes plus a low control byte (octal).
1120        assert_eq!(
1121            quote_path_for_patch("", "\u{07}\u{08}\u{0b}\u{0c}\r\u{01}"),
1122            "\"\\a\\b\\v\\f\\r\\001\""
1123        );
1124    }
1125}