Skip to main content

heddle_core/diff/
patch.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Patch-compatible rendering for diff reports.
3
4use std::io::{self, Write};
5
6use objects::object::FileMode;
7
8use super::{DiffReport, FileChange, LineDiff};
9
10pub fn write_diff_patch<W: Write>(output: &DiffReport, writer: &mut W) -> io::Result<()> {
11    for change in &output.changes {
12        // A symlink change carries its raw target bytes in `change.symlink`,
13        // which on Unix need not be valid UTF-8. Render it byte-exact so a
14        // non-UTF-8 link target round-trips through `git apply`; every other
15        // change is UTF-8 text and is appended as its bytes.
16        if change.symlink.is_some() {
17            write_symlink_change(change, writer)?;
18        } else {
19            write_text_change(change, writer)?;
20        }
21    }
22    Ok(())
23}
24
25pub fn render_diff_patch_bytes(output: &DiffReport) -> Vec<u8> {
26    let mut buf: Vec<u8> = Vec::new();
27    write_diff_patch(output, &mut buf).expect("writing diff patch to Vec cannot fail");
28    buf
29}
30
31/// Lossy String view of the byte-exact patch (`render_diff_patch_bytes`),
32/// for the JSON `.patch` field and String-based callers/tests. Only a
33/// non-UTF-8 symlink target (Unix-only) differs from the byte render; JSON
34/// strings cannot carry raw bytes, so a lossy view is the best a String
35/// surface can do. The round-trip surface (`heddle diff --patch`) writes the
36/// bytes directly via `render_diff_patch_bytes`, so its byte fidelity is
37/// never reduced here.
38pub fn render_diff_patch(output: &DiffReport) -> String {
39    String::from_utf8_lossy(&render_diff_patch_bytes(output)).into_owned()
40}
41
42/// Render one non-symlink change as unified-diff text into `writer`. Symlink
43/// changes never reach here — `write_diff_patch` routes them to
44/// `write_symlink_change`, which preserves a non-UTF-8 target — so a symlink
45/// target is never forced through `change.lines` (which a non-UTF-8 target
46/// cannot populate) or `write_binary_change`.
47fn write_text_change<W: Write>(change: &FileChange, writer: &mut W) -> io::Result<()> {
48    let lines_ref = change.lines.as_deref();
49    let has_hunk_body = lines_ref.is_some_and(|lines| lines.iter().any(|line| line.prefix != " "));
50    let old_path = change.old_path.as_deref().unwrap_or(&change.path);
51    let is_rename = change
52        .old_path
53        .as_deref()
54        .is_some_and(|old| old != change.path);
55    let is_added = change.kind == "added";
56    let is_deleted = change.kind == "deleted";
57    let is_modified = !is_rename && !is_added && !is_deleted;
58    // A mode-only modify (chmod / exec-bit flip / type swap) has no
59    // hunk body but is still a real change: git records it as
60    // `old mode`/`new mode` extended headers and `git apply`
61    // reproduces the permission change from those alone.
62    let mode_changed = is_modified
63        && matches!((change.old_mode, change.mode), (Some(old), Some(new)) if old != new);
64    // `lines: None` is the binary / unreadable case — there is no
65    // text body to render, so it never produces a patch regardless
66    // of kind. `lines: Some(_)` (even empty) means we have a
67    // readable text side.
68    let has_text = change.lines.is_some();
69
70    // A binary *content* change (add/delete/modify of a file heddle
71    // cannot diff as text). heddle has no git binary delta to emit
72    // (its blob hashes are not git SHAs), and silently dropping the
73    // change would let `git apply` "succeed" while the binary content
74    // stays stale — the false round-trip cid 3319484747 flagged. Emit
75    // git's `Binary files … differ` marker with a *placeholder* index
76    // line: that index line is what makes `git apply` recognize a
77    // binary patch and refuse the *whole* patch ("without full index
78    // line") instead of skipping the block. Without the index line git
79    // treats the marker as an empty patch and silently ignores it. A
80    // content-identical mode-only change is never `binary` (the diff
81    // readers short-circuit it to an empty text body), so this only
82    // fires on a real binary content change, never a chmod.
83    if change.binary && !is_rename {
84        write_binary_change(change, is_added, is_deleted, mode_changed, writer)?;
85        return Ok(());
86    }
87
88    // Decide whether this change emits anything at all:
89    // * renames always do (the extended headers carry the move even
90    //   for identical content);
91    // * add/delete do whenever there's a readable text side — the
92    //   empty-file case renders header-only;
93    // * a modify renders only when it has a real hunk body. A modify
94    //   with no body and matching EOL is a no-op; the
95    //   trailing-newline-only case is handled upstream in
96    //   `unified_hunks`, which synthesizes a tail hunk so this
97    //   branch sees `has_hunk_body == true`.
98    let should_render = if is_rename {
99        true
100    } else if is_added || is_deleted {
101        has_text
102    } else {
103        has_hunk_body || mode_changed
104    };
105    if !should_render {
106        return Ok(());
107    }
108
109    if is_rename {
110        writeln!(
111            writer,
112            "diff --git {} {}",
113            quote_path_for_patch("a/", old_path),
114            quote_path_for_patch("b/", &change.path)
115        )?;
116        // A rename paired with a chmod/type change (`old.sh` renamed
117        // to `new.sh` and made executable) carries both modes; emit
118        // the `old mode`/`new mode` pair before `similarity index`,
119        // matching `git diff`, so `git apply` reproduces the
120        // permission change as well as the move.
121        if let (Some(old), Some(new)) = (change.old_mode, change.mode)
122            && old != new
123        {
124            writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
125            writeln!(writer, "new mode {}", mode_str(change.mode))?;
126        }
127        let pct = (change.similarity_score.unwrap_or(1.0).clamp(0.0, 1.0) * 100.0).round() as u32;
128        writeln!(writer, "similarity index {pct}%")?;
129        writeln!(writer, "rename from {}", quote_path_for_patch("", old_path))?;
130        writeln!(
131            writer,
132            "rename to {}",
133            quote_path_for_patch("", &change.path)
134        )?;
135        // Pure rename — extended headers alone suffice; emitting
136        // `--- a/old / +++ b/new` without hunks would tell git to
137        // apply an empty patch and warn about a stray header.
138        if !has_hunk_body {
139            return Ok(());
140        }
141    } else if is_added {
142        writeln!(
143            writer,
144            "diff --git {} {}",
145            quote_path_for_patch("a/", &change.path),
146            quote_path_for_patch("b/", &change.path)
147        )?;
148        writeln!(writer, "new file mode {}", mode_str(change.mode))?;
149    } else if is_deleted {
150        writeln!(
151            writer,
152            "diff --git {} {}",
153            quote_path_for_patch("a/", &change.path),
154            quote_path_for_patch("b/", &change.path)
155        )?;
156        writeln!(writer, "deleted file mode {}", mode_str(change.mode))?;
157    } else if mode_changed {
158        // A modify whose mode changed (with or without a content
159        // hunk). Emit the `diff --git` + `old mode`/`new mode`
160        // header pair.
161        writeln!(
162            writer,
163            "diff --git {} {}",
164            quote_path_for_patch("a/", &change.path),
165            quote_path_for_patch("b/", &change.path)
166        )?;
167        writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
168        writeln!(writer, "new mode {}", mode_str(change.mode))?;
169    } else {
170        // A plain content modify. Emit the `diff --git` header so
171        // every file stanza is self-delimiting. A bare `--- a/<path>`
172        // is ambiguous: git's parser binds it to the *preceding*
173        // `diff --git` stanza when one is still open — e.g. a
174        // header-only empty-add (`diff --git ... / new file mode`) or
175        // a mode-only change immediately above — and misreads this
176        // file's `---` as the prior file's source side, corrupting the
177        // patch ("expected /dev/null"). The explicit header closes the
178        // prior stanza and opens this one. (cid 3319484717 ordering.)
179        writeln!(
180            writer,
181            "diff --git {} {}",
182            quote_path_for_patch("a/", &change.path),
183            quote_path_for_patch("b/", &change.path)
184        )?;
185    }
186
187    // An empty-file add/delete (text side present but zero lines)
188    // has no hunk body. git stops after the `new/deleted file mode`
189    // header in that case and `git apply` still creates/unlinks the
190    // path — emitting `--- /+++/@@` with no `@@` body would be a
191    // malformed hunk, so we stop here too.
192    if (is_added || is_deleted) && !has_hunk_body {
193        return Ok(());
194    }
195    // A mode-only modify carries no content hunk: the `old mode`/
196    // `new mode` header pair is the entire patch, so stop before the
197    // `--- /+++` line-diff headers (which would be a malformed
198    // empty hunk).
199    if is_modified && !has_hunk_body {
200        return Ok(());
201    }
202
203    if is_added {
204        writer.write_all(b"--- /dev/null\n")?;
205    } else {
206        writeln!(writer, "--- {}", quote_path_for_patch("a/", old_path))?;
207    }
208    if is_deleted {
209        writer.write_all(b"+++ /dev/null\n")?;
210    } else {
211        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
212    }
213    if let Some(lines) = lines_ref {
214        write_patch_hunks(change, lines, writer)?;
215    }
216    Ok(())
217}
218
219/// Render a symlink change (add / delete / target-edit / rename) byte-exact.
220///
221/// A symlink's git blob is its raw target bytes, which on Unix need not be
222/// valid UTF-8 — so the hunk body is emitted straight from `change.symlink`
223/// (the single byte-preserving symlink path) rather than `change.lines`,
224/// which a non-UTF-8 target cannot populate. Marking such a change `binary`
225/// (the old behaviour) emitted a placeholder-binary stanza that `git apply`
226/// rejects for a `120000` entry; emitting the target as a text hunk is what
227/// git itself does and round-trips. The extended headers mirror
228/// `write_text_change`'s (add/delete/rename), and the mode is always
229/// `120000` so a rename never needs an `old mode`/`new mode` pair unless the
230/// two sides genuinely differ.
231fn write_symlink_change<W: Write>(change: &FileChange, writer: &mut W) -> io::Result<()> {
232    let Some(sym) = change.symlink.as_ref() else {
233        return Ok(());
234    };
235    let old_path = change.old_path.as_deref().unwrap_or(&change.path);
236    let is_rename = change
237        .old_path
238        .as_deref()
239        .is_some_and(|old| old != change.path);
240    let is_added = change.kind == "added";
241    let is_deleted = change.kind == "deleted";
242
243    if is_rename {
244        writeln!(
245            writer,
246            "diff --git {} {}",
247            quote_path_for_patch("a/", old_path),
248            quote_path_for_patch("b/", &change.path)
249        )?;
250        if let (Some(old), Some(new)) = (change.old_mode, change.mode)
251            && old != new
252        {
253            writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
254            writeln!(writer, "new mode {}", mode_str(change.mode))?;
255        }
256        let pct = (change.similarity_score.unwrap_or(1.0).clamp(0.0, 1.0) * 100.0).round() as u32;
257        writeln!(writer, "similarity index {pct}%")?;
258        writeln!(writer, "rename from {}", quote_path_for_patch("", old_path))?;
259        writeln!(
260            writer,
261            "rename to {}",
262            quote_path_for_patch("", &change.path)
263        )?;
264        // Pure rename (identical target) — the extended headers alone carry
265        // the move, exactly like a text rename with no hunk body.
266        if sym.old == sym.new {
267            return Ok(());
268        }
269        writeln!(writer, "--- {}", quote_path_for_patch("a/", old_path))?;
270        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
271    } else if is_added {
272        writeln!(
273            writer,
274            "diff --git {} {}",
275            quote_path_for_patch("a/", &change.path),
276            quote_path_for_patch("b/", &change.path)
277        )?;
278        writeln!(writer, "new file mode {}", mode_str(change.mode))?;
279        writer.write_all(b"--- /dev/null\n")?;
280        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
281    } else if is_deleted {
282        writeln!(
283            writer,
284            "diff --git {} {}",
285            quote_path_for_patch("a/", &change.path),
286            quote_path_for_patch("b/", &change.path)
287        )?;
288        writeln!(writer, "deleted file mode {}", mode_str(change.mode))?;
289        writeln!(writer, "--- {}", quote_path_for_patch("a/", &change.path))?;
290        writer.write_all(b"+++ /dev/null\n")?;
291    } else {
292        // A symlink target-edit. The mode is unchanged (`120000` → `120000`),
293        // so no `old mode`/`new mode` block — just the file header. An
294        // identical target would be a no-op and is never emitted by the diff
295        // backends, but guard it so an accidental empty hunk can't form.
296        if sym.old == sym.new {
297            return Ok(());
298        }
299        writeln!(
300            writer,
301            "diff --git {} {}",
302            quote_path_for_patch("a/", &change.path),
303            quote_path_for_patch("b/", &change.path)
304        )?;
305        writeln!(writer, "--- {}", quote_path_for_patch("a/", &change.path))?;
306        writeln!(writer, "+++ {}", quote_path_for_patch("b/", &change.path))?;
307    }
308
309    write_symlink_hunk(sym.old.as_deref(), sym.new.as_deref(), writer)?;
310    Ok(())
311}
312
313/// Emit the unified-diff hunk for a symlink's target bytes. A symlink's git
314/// blob has no trailing newline, so each side normally collapses to a single
315/// line carrying the `\ No newline at end of file` marker; a target that
316/// embeds a `\n` (pathological but representable) splits into multiple lines.
317/// The `@@` header mirrors `unified_hunks`'s `@@ -s,c +s,c @@` shape (counts
318/// always written, even `,1`), which `git apply` accepts.
319fn write_symlink_hunk<W: Write>(
320    old: Option<&[u8]>,
321    new: Option<&[u8]>,
322    writer: &mut W,
323) -> io::Result<()> {
324    let old_lines = split_target_lines(old);
325    let new_lines = split_target_lines(new);
326    let old_count = old_lines.len();
327    let new_count = new_lines.len();
328    let old_start = if old_count == 0 { 0 } else { 1 };
329    let new_start = if new_count == 0 { 0 } else { 1 };
330    writeln!(
331        writer,
332        "@@ -{old_start},{old_count} +{new_start},{new_count} @@"
333    )?;
334    let old_no_eol = !target_has_trailing_newline(old);
335    let new_no_eol = !target_has_trailing_newline(new);
336    for (idx, line) in old_lines.iter().enumerate() {
337        writer.write_all(b"-")?;
338        writer.write_all(line)?;
339        writer.write_all(b"\n")?;
340        if old_no_eol && idx + 1 == old_count {
341            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
342        }
343    }
344    for (idx, line) in new_lines.iter().enumerate() {
345        writer.write_all(b"+")?;
346        writer.write_all(line)?;
347        writer.write_all(b"\n")?;
348        if new_no_eol && idx + 1 == new_count {
349            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
350        }
351    }
352    Ok(())
353}
354
355/// Split a symlink target's raw bytes into unified-diff lines. An absent side
356/// (`None`) or an empty blob yields no lines; a trailing `\n` is the line
357/// terminator (dropped here, surfaced via `target_has_trailing_newline`)
358/// rather than an extra empty line, matching how text blobs are line-counted.
359fn split_target_lines(target: Option<&[u8]>) -> Vec<&[u8]> {
360    let Some(bytes) = target else {
361        return Vec::new();
362    };
363    if bytes.is_empty() {
364        return Vec::new();
365    }
366    let mut lines: Vec<&[u8]> = bytes.split(|&byte| byte == b'\n').collect();
367    if bytes.ends_with(b"\n") {
368        lines.pop();
369    }
370    lines
371}
372
373fn target_has_trailing_newline(target: Option<&[u8]>) -> bool {
374    target.is_some_and(|bytes| bytes.ends_with(b"\n"))
375}
376
377/// Render a binary content change (add / delete / plain modify / modify
378/// with a mode change) as git's `Binary files … differ` marker.
379///
380/// heddle cannot emit a git binary delta — its blob hashes are not git
381/// SHAs — so the marker is the most faithful thing it can produce. The
382/// catch (cid 3319484747): a bare `Binary files … differ` marker with no
383/// `index` header is treated by `git apply` as an empty patch and
384/// *silently skipped*, which would let the apply "succeed" while the
385/// binary content stays stale. Emitting a *placeholder* `index
386/// 0000000..0000000` line flips git into binary-patch mode, where it
387/// refuses the whole patch ("cannot apply binary patch … without full
388/// index line") rather than ignoring it. That refusal is the correct
389/// outcome: heddle has no delta to apply, so the honest result is a hard
390/// failure, never a false round-trip.
391fn write_binary_change<W: Write>(
392    change: &FileChange,
393    is_added: bool,
394    is_deleted: bool,
395    mode_changed: bool,
396    writer: &mut W,
397) -> io::Result<()> {
398    let path = &change.path;
399    writeln!(
400        writer,
401        "diff --git {} {}",
402        quote_path_for_patch("a/", path),
403        quote_path_for_patch("b/", path)
404    )?;
405    if is_added {
406        writeln!(writer, "new file mode {}", mode_str(change.mode))?;
407        writer.write_all(b"index 0000000..0000000\n")?;
408    } else if is_deleted {
409        writeln!(writer, "deleted file mode {}", mode_str(change.mode))?;
410        writer.write_all(b"index 0000000..0000000\n")?;
411    } else if mode_changed {
412        writeln!(writer, "old mode {}", mode_str(change.old_mode))?;
413        writeln!(writer, "new mode {}", mode_str(change.mode))?;
414        writer.write_all(b"index 0000000..0000000\n")?;
415    } else {
416        // Plain binary modify: git stamps the mode at the end of the
417        // index line (`index <old>..<new> 100644`).
418        writeln!(writer, "index 0000000..0000000 {}", mode_str(change.mode))?;
419    }
420    let (a, b) = if is_added {
421        ("/dev/null".to_string(), quote_path_for_patch("b/", path))
422    } else if is_deleted {
423        (quote_path_for_patch("a/", path), "/dev/null".to_string())
424    } else {
425        (
426            quote_path_for_patch("a/", path),
427            quote_path_for_patch("b/", path),
428        )
429    };
430    writeln!(writer, "Binary files {a} and {b} differ")?;
431    Ok(())
432}
433
434/// Map a tracked file mode to the git unified-diff mode string. `None`
435/// (mode not resolved) and the regular-file case both render `100644`.
436///
437/// A `Spoollink` is a native child-spool edge, not a file, and does not
438/// participate in git-style textual diffs; it deliberately does NOT render as
439/// a git submodule (`160000`) and falls back to the neutral `100644` if it
440/// ever reaches here.
441fn mode_str(mode: Option<FileMode>) -> &'static str {
442    match mode {
443        Some(FileMode::Executable) => "100755",
444        Some(FileMode::Symlink) => "120000",
445        Some(FileMode::Gitlink) => "160000",
446        Some(FileMode::Normal) | Some(FileMode::Spoollink) | None => "100644",
447    }
448}
449
450/// Quote a patch-header path the way `git diff` does (C-style quoting,
451/// `core.quotePath` defaults to true). A path containing a tab, newline,
452/// double-quote, backslash, control byte, or non-ASCII byte is wrapped in
453/// double quotes with the bytes escaped; a "simple" path is emitted bare.
454///
455/// `prefix` is the in-quote prefix git stamps on `diff --git`/`--- `/`+++ `
456/// headers (`a/`, `b/`) — git puts the prefix *inside* the quotes
457/// (`"a/tab\there"`), so it is escaped alongside the path. `rename from`/
458/// `rename to` pass an empty prefix (git quotes the bare path there).
459///
460/// Verified byte-for-byte against `git diff` for tab, newline, quote,
461/// backslash, and non-ASCII (UTF-8 → per-byte octal) paths.
462fn quote_path_for_patch(prefix: &str, path: &str) -> String {
463    if !needs_c_quoting(prefix) && !needs_c_quoting(path) {
464        return format!("{prefix}{path}");
465    }
466    let mut out = String::with_capacity(prefix.len() + path.len() + 2);
467    out.push('"');
468    push_c_quoted(&mut out, prefix);
469    push_c_quoted(&mut out, path);
470    out.push('"');
471    out
472}
473
474fn needs_c_quoting(s: &str) -> bool {
475    s.bytes().any(byte_needs_escape)
476}
477
478/// git escapes any byte below 0x20, the DEL byte and everything above it
479/// (0x7f..=0xff — `core.quotePath` octal-escapes non-ASCII), plus the two
480/// in-quote metacharacters `"` and `\`.
481fn byte_needs_escape(byte: u8) -> bool {
482    matches!(byte, b'"' | b'\\') || !(0x20..0x7f).contains(&byte)
483}
484
485fn push_c_quoted(out: &mut String, s: &str) {
486    for byte in s.bytes() {
487        match byte {
488            b'"' => out.push_str("\\\""),
489            b'\\' => out.push_str("\\\\"),
490            0x07 => out.push_str("\\a"),
491            0x08 => out.push_str("\\b"),
492            0x09 => out.push_str("\\t"),
493            0x0a => out.push_str("\\n"),
494            0x0b => out.push_str("\\v"),
495            0x0c => out.push_str("\\f"),
496            0x0d => out.push_str("\\r"),
497            0x20..=0x7e => out.push(byte as char),
498            other => out.push_str(&format!("\\{other:03o}")),
499        }
500    }
501}
502
503const NO_NEWLINE_MARKER: &str = "\\ No newline at end of file\n";
504
505/// Walk the rendered hunks once and emit each line, splicing in the
506/// `\ No newline at end of file` marker after the line that holds the
507/// file's tail on a side whose source bytes lacked a trailing `\n`.
508///
509/// The diff backend strips line terminators, so per-line equality
510/// collapses `hello` and `hello\n` into the same `LineDiff`. To match
511/// `git diff`'s output (which `git apply --check` accepts), a context
512/// line that sits on the no-newline side's tail has to be split into
513/// a `-` + `+` pair, with the marker attached to the side that lacks
514/// the terminator. The 4-case matrix is in `write_patch_hunks`'s
515/// context-line branch.
516fn write_patch_hunks<W: Write>(
517    change: &FileChange,
518    lines: &[LineDiff],
519    writer: &mut W,
520) -> io::Result<()> {
521    let old_no_eol = !change.eol.old_has_final_newline;
522    let new_no_eol = !change.eol.new_has_final_newline;
523    let old_tail_idx = if old_no_eol && change.eol.old_line_count > 0 {
524        find_side_tail_idx(lines, Side::Old, change.eol.old_line_count)
525    } else {
526        None
527    };
528    let new_tail_idx = if new_no_eol && change.eol.new_line_count > 0 {
529        find_side_tail_idx(lines, Side::New, change.eol.new_line_count)
530    } else {
531        None
532    };
533
534    for (idx, line) in lines.iter().enumerate() {
535        let is_old_tail = Some(idx) == old_tail_idx;
536        let is_new_tail = Some(idx) == new_tail_idx;
537        let needs_old_marker = is_old_tail && old_no_eol;
538        let needs_new_marker = is_new_tail && new_no_eol;
539
540        if line.prefix == " " && (needs_old_marker || needs_new_marker) {
541            if is_old_tail && is_new_tail && needs_old_marker && needs_new_marker {
542                // Both sides' tail lands on this context line and both
543                // lack a trailing newline — emit the line once, then
544                // a single marker that applies to both sides.
545                write_patch_line(writer, line)?;
546                writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
547            } else {
548                // Mixed state: at least one side needs the marker and
549                // the other shouldn't be tagged. Split the context
550                // line into a `-content` / `+content` pair so each
551                // side's marker (or its absence) is unambiguous.
552                writer.write_all(b"-")?;
553                writer.write_all(line.content.as_bytes())?;
554                writer.write_all(b"\n")?;
555                if needs_old_marker {
556                    writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
557                }
558                writer.write_all(b"+")?;
559                writer.write_all(line.content.as_bytes())?;
560                writer.write_all(b"\n")?;
561                if needs_new_marker {
562                    writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
563                }
564            }
565            continue;
566        }
567
568        write_patch_line(writer, line)?;
569        if needs_old_marker && line.prefix == "-" {
570            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
571        }
572        if needs_new_marker && line.prefix == "+" {
573            writer.write_all(NO_NEWLINE_MARKER.as_bytes())?;
574        }
575    }
576    Ok(())
577}
578
579#[derive(Clone, Copy)]
580enum Side {
581    Old,
582    New,
583}
584
585fn find_side_tail_idx(lines: &[LineDiff], side: Side, target: usize) -> Option<usize> {
586    lines.iter().enumerate().rev().find_map(|(idx, line)| {
587        let (on_side, line_number) = match side {
588            Side::Old => (line.prefix == "-" || line.prefix == " ", line.old_line),
589            Side::New => (line.prefix == "+" || line.prefix == " ", line.new_line),
590        };
591        if on_side && line_number == Some(target) {
592            Some(idx)
593        } else {
594            None
595        }
596    })
597}
598
599fn write_patch_line<W: Write>(writer: &mut W, line: &LineDiff) -> io::Result<()> {
600    writer.write_all(line.prefix.as_bytes())?;
601    writer.write_all(line.content.as_bytes())?;
602    writer.write_all(b"\n")
603}
604
605#[cfg(test)]
606mod tests {
607    use objects::object::FileMode;
608
609    use super::{quote_path_for_patch, render_diff_patch, render_diff_patch_bytes};
610    use crate::diff::{DiffReport, FileChange, FileEolState, LineDiff, SymlinkChange};
611
612    fn modified_change_with_eol(path: &str, lines: Vec<LineDiff>, eol: FileEolState) -> FileChange {
613        FileChange {
614            path: path.to_string(),
615            kind: "modified".to_string(),
616            lines: Some(lines),
617            eol,
618            ..Default::default()
619        }
620    }
621
622    fn diff_report_with(changes: Vec<FileChange>) -> DiffReport {
623        DiffReport::new(None, None, changes, None, None, None)
624    }
625
626    #[cfg(unix)]
627    fn hermetic_git_command(dir: &std::path::Path, args: &[&str]) -> std::process::Command {
628        let mut command = std::process::Command::new("git");
629        command
630            .args(args)
631            .current_dir(dir)
632            .env("GIT_CONFIG_GLOBAL", "/dev/null")
633            .env("GIT_CONFIG_SYSTEM", "/dev/null")
634            .env("GIT_AUTHOR_NAME", "Heddle Test")
635            .env("GIT_AUTHOR_EMAIL", "heddle@example.com")
636            .env("GIT_COMMITTER_NAME", "Heddle Test")
637            .env("GIT_COMMITTER_EMAIL", "heddle@example.com");
638        command
639    }
640
641    #[cfg(unix)]
642    fn hermetic_git(dir: &std::path::Path, args: &[&str]) {
643        let status = hermetic_git_command(dir, args)
644            .status()
645            .unwrap_or_else(|err| panic!("git {args:?} should spawn: {err}"));
646        assert!(status.success(), "git {args:?} should succeed");
647    }
648
649    #[cfg(unix)]
650    fn pipe_git_apply(dir: &std::path::Path, args: &[&str], patch: &[u8]) -> std::process::Output {
651        use std::{io::Write, process::Stdio};
652
653        let mut child = hermetic_git_command(dir, args)
654            .stdin(Stdio::piped())
655            .stdout(Stdio::piped())
656            .stderr(Stdio::piped())
657            .spawn()
658            .unwrap_or_else(|err| panic!("git {args:?} should spawn: {err}"));
659        child.stdin.as_mut().unwrap().write_all(patch).unwrap();
660        child
661            .wait_with_output()
662            .unwrap_or_else(|err| panic!("git {args:?} should finish: {err}"))
663    }
664
665    #[cfg(unix)]
666    #[test]
667    fn render_diff_patch_bytes_applies_non_utf8_symlink_target_byte_exactly() {
668        use std::os::unix::ffi::OsStrExt;
669
670        let target = b"target-\xff\xfe";
671        let change = FileChange {
672            path: "linky".to_string(),
673            kind: "added".to_string(),
674            mode: Some(FileMode::Symlink),
675            symlink: Some(SymlinkChange {
676                old: None,
677                new: Some(target.to_vec()),
678            }),
679            ..Default::default()
680        };
681        let patch = render_diff_patch_bytes(&diff_report_with(vec![change]));
682        assert!(
683            patch.windows(target.len()).any(|window| window == target),
684            "patch must carry the raw non-UTF-8 target bytes:\n{}",
685            String::from_utf8_lossy(&patch)
686        );
687
688        let scratch = tempfile::TempDir::new().unwrap();
689        hermetic_git(scratch.path(), &["init", "-q"]);
690        hermetic_git(scratch.path(), &["checkout", "-q", "-b", "main"]);
691
692        let check = pipe_git_apply(scratch.path(), &["apply", "--check"], &patch);
693        assert!(
694            check.status.success(),
695            "git apply --check rejected patch;\nstderr={}\npatch=\n{}",
696            String::from_utf8_lossy(&check.stderr),
697            String::from_utf8_lossy(&patch)
698        );
699        let applied = pipe_git_apply(scratch.path(), &["apply"], &patch);
700        assert!(
701            applied.status.success(),
702            "git apply rejected patch;\nstderr={}\npatch=\n{}",
703            String::from_utf8_lossy(&applied.stderr),
704            String::from_utf8_lossy(&patch)
705        );
706
707        let applied_target = std::fs::read_link(scratch.path().join("linky")).unwrap();
708        assert_eq!(
709            applied_target.as_os_str().as_bytes(),
710            target,
711            "applied symlink target must be byte-exact"
712        );
713    }
714
715    /// A mode-only modify (exec-bit flip, no content change) must render
716    /// as a header-only `diff --git` + `old mode`/`new mode` block with
717    /// no `@@` hunk. Regressing this drops the chmod from the patch so
718    /// `git apply` can't reproduce the permission change (cid 3318629228).
719    #[test]
720    fn render_diff_patch_emits_mode_only_header_for_chmod() {
721        let change = FileChange {
722            path: "run.sh".to_string(),
723            kind: "modified".to_string(),
724            lines: Some(Vec::new()),
725            old_mode: Some(FileMode::Normal),
726            mode: Some(FileMode::Executable),
727            ..Default::default()
728        };
729        let rendered = render_diff_patch(&diff_report_with(vec![change]));
730        assert!(
731            rendered.contains("diff --git a/run.sh b/run.sh"),
732            "chmod-only must emit the `diff --git` header:\n{rendered}"
733        );
734        assert!(
735            rendered.contains("old mode 100644") && rendered.contains("new mode 100755"),
736            "chmod-only must emit `old mode`/`new mode`:\n{rendered}"
737        );
738        assert!(
739            !rendered.contains("@@") && !rendered.contains("--- a/"),
740            "chmod-only is header-only — no hunk body:\n{rendered}"
741        );
742    }
743
744    #[test]
745    fn render_diff_patch_emits_gitlink_mode_without_blob_hunk() {
746        let change = FileChange {
747            path: "vendor".to_string(),
748            kind: "added".to_string(),
749            lines: Some(Vec::new()),
750            mode: Some(FileMode::Gitlink),
751            ..Default::default()
752        };
753
754        let rendered = render_diff_patch(&diff_report_with(vec![change]));
755
756        assert!(
757            rendered.contains("new file mode 160000"),
758            "gitlinks must render their durable mode, not a regular-file mode:\n{rendered}"
759        );
760        assert!(
761            !rendered.contains("@@") && !rendered.contains("heddle-submodule:"),
762            "gitlink patch output must not synthesize legacy marker blob content:\n{rendered}"
763        );
764    }
765
766    /// A modify that changes BOTH content and mode emits the mode-header
767    /// pair AND the usual `--- /+++` line-diff body.
768    #[test]
769    fn render_diff_patch_emits_mode_headers_with_content_hunk() {
770        let change = FileChange {
771            path: "run.sh".to_string(),
772            kind: "modified".to_string(),
773            lines: Some(vec![
774                LineDiff::with_lines("@", "@ -1,1 +1,1 @@", None, None),
775                LineDiff::with_lines("-", "echo old", Some(1), None),
776                LineDiff::with_lines("+", "echo new", None, Some(1)),
777            ]),
778            old_mode: Some(FileMode::Normal),
779            mode: Some(FileMode::Executable),
780            ..Default::default()
781        };
782        let rendered = render_diff_patch(&diff_report_with(vec![change]));
783        assert!(
784            rendered.contains("old mode 100644") && rendered.contains("new mode 100755"),
785            "content+mode change must still emit the mode headers:\n{rendered}"
786        );
787        assert!(
788            rendered.contains("--- a/run.sh")
789                && rendered.contains("+++ b/run.sh")
790                && rendered.contains("+echo new"),
791            "content+mode change must still emit the line-diff body:\n{rendered}"
792        );
793    }
794
795    /// An unchanged mode on a modify with no hunk body is a genuine
796    /// no-op and must emit nothing — guards against the mode branch
797    /// firing when `old_mode == mode`.
798    #[test]
799    fn render_diff_patch_skips_modify_with_same_mode_and_no_body() {
800        let change = FileChange {
801            path: "run.sh".to_string(),
802            kind: "modified".to_string(),
803            lines: Some(Vec::new()),
804            old_mode: Some(FileMode::Normal),
805            mode: Some(FileMode::Normal),
806            ..Default::default()
807        };
808        let rendered = render_diff_patch(&diff_report_with(vec![change]));
809        assert!(
810            rendered.is_empty(),
811            "no-op modify (same mode, no body) must emit nothing:\n{rendered}"
812        );
813    }
814
815    /// A binary content modify (`binary: true`, `lines: None`) must emit
816    /// git's `Binary files … differ` marker with a *placeholder* index
817    /// line. Silently dropping it would let `git apply` "succeed" while
818    /// the binary content stayed stale (cid 3319484747); the index line
819    /// flips git into binary-patch mode so it refuses the whole patch
820    /// instead of skipping the block.
821    #[test]
822    fn render_diff_patch_binary_modify_emits_marker_with_index() {
823        let change = FileChange {
824            path: "binary.bin".to_string(),
825            kind: "modified".to_string(),
826            binary: true,
827            lines: None,
828            mode: Some(FileMode::Normal),
829            old_mode: Some(FileMode::Normal),
830            ..Default::default()
831        };
832        let rendered = render_diff_patch(&diff_report_with(vec![change]));
833        assert!(
834            rendered.contains("diff --git a/binary.bin b/binary.bin"),
835            "binary modify must emit a diff header:\n{rendered}"
836        );
837        assert!(
838            rendered.contains("index 0000000..0000000 100644"),
839            "binary modify must emit a placeholder index line:\n{rendered}"
840        );
841        assert!(
842            rendered.contains("Binary files a/binary.bin and b/binary.bin differ"),
843            "binary modify must emit the binary marker:\n{rendered}"
844        );
845        assert!(
846            !rendered.contains("--- a/binary.bin"),
847            "binary modify must not emit a text hunk header:\n{rendered}"
848        );
849    }
850
851    /// A binary modify whose mode *also* changed emits the
852    /// `old mode`/`new mode` pair (so the chmod is recorded) followed by
853    /// the placeholder index + binary marker — never a mode-only chmod
854    /// patch that git apply would accept while leaving stale binary
855    /// content (cid 3319484747).
856    #[test]
857    fn render_diff_patch_binary_modify_with_mode_change_keeps_marker() {
858        let change = FileChange {
859            path: "binary.bin".to_string(),
860            kind: "modified".to_string(),
861            binary: true,
862            lines: None,
863            old_mode: Some(FileMode::Normal),
864            mode: Some(FileMode::Executable),
865            ..Default::default()
866        };
867        let rendered = render_diff_patch(&diff_report_with(vec![change]));
868        assert!(
869            rendered.contains("old mode 100644") && rendered.contains("new mode 100755"),
870            "binary+mode change must still record the chmod:\n{rendered}"
871        );
872        assert!(
873            rendered.contains("index 0000000..0000000"),
874            "binary+mode change must emit the placeholder index line:\n{rendered}"
875        );
876        assert!(
877            rendered.contains("Binary files a/binary.bin and b/binary.bin differ"),
878            "binary+mode change must still emit the binary marker:\n{rendered}"
879        );
880    }
881
882    /// A binary add emits `new file mode` + placeholder index + marker;
883    /// a binary delete mirrors it with `deleted file mode`.
884    #[test]
885    fn render_diff_patch_binary_add_and_delete_emit_markers() {
886        let added = FileChange {
887            path: "added.bin".to_string(),
888            kind: "added".to_string(),
889            binary: true,
890            lines: None,
891            mode: Some(FileMode::Normal),
892            ..Default::default()
893        };
894        let rendered = render_diff_patch(&diff_report_with(vec![added]));
895        assert!(
896            rendered.contains("new file mode 100644")
897                && rendered.contains("index 0000000..0000000")
898                && rendered.contains("Binary files /dev/null and b/added.bin differ"),
899            "binary add marker:\n{rendered}"
900        );
901
902        let deleted = FileChange {
903            path: "gone.bin".to_string(),
904            kind: "deleted".to_string(),
905            binary: true,
906            lines: None,
907            mode: Some(FileMode::Normal),
908            ..Default::default()
909        };
910        let rendered = render_diff_patch(&diff_report_with(vec![deleted]));
911        assert!(
912            rendered.contains("deleted file mode 100644")
913                && rendered.contains("index 0000000..0000000")
914                && rendered.contains("Binary files a/gone.bin and /dev/null differ"),
915            "binary delete marker:\n{rendered}"
916        );
917    }
918
919    /// A change whose `lines` vector is present but empty must also
920    /// be skipped — the file path is known but there's no hunk body
921    /// to render. Mixed batches (one renderable, one empty) must keep
922    /// rendering the renderable change.
923    #[test]
924    fn render_diff_patch_skips_change_with_empty_lines() {
925        let empty = FileChange {
926            path: "empty.txt".to_string(),
927            kind: "modified".to_string(),
928            lines: Some(Vec::new()),
929            ..Default::default()
930        };
931        let real = modified_change_with_eol(
932            "real.txt",
933            vec![
934                LineDiff::with_lines("@", "@ -1,1 +1,1 @@", None, None),
935                LineDiff::with_lines("-", "old", Some(1), None),
936                LineDiff::with_lines("+", "new", None, Some(1)),
937            ],
938            FileEolState::default(),
939        );
940        let rendered = render_diff_patch(&diff_report_with(vec![empty, real]));
941        assert!(
942            !rendered.contains("empty.txt"),
943            "skipped change must not emit a header: {rendered}"
944        );
945        assert!(
946            rendered.contains("--- a/real.txt"),
947            "renderable change must still be emitted: {rendered}"
948        );
949    }
950
951    /// When both sides lack a trailing newline AND their tails land on
952    /// the same context line, the renderer emits the line once and a
953    /// single `\ No newline at end of file` marker that documents both
954    /// sides' state. `git diff` does the same — two markers in a row
955    /// would be a corruption.
956    #[test]
957    fn render_diff_patch_collapses_both_side_no_eol_marker_on_shared_tail() {
958        // `more` is the tail for both sides; the change is on the line
959        // above (hello -> world). Both blobs end without `\n`.
960        let lines = vec![
961            LineDiff::with_lines("@", "@ -1,2 +1,2 @@", None, None),
962            LineDiff::with_lines("-", "hello", Some(1), None),
963            LineDiff::with_lines("+", "world", None, Some(1)),
964            LineDiff::with_lines(" ", "more", Some(2), Some(2)),
965        ];
966        let eol = FileEolState {
967            old_has_final_newline: false,
968            new_has_final_newline: false,
969            old_line_count: 2,
970            new_line_count: 2,
971        };
972        let change = modified_change_with_eol("tail.txt", lines, eol);
973        let rendered = render_diff_patch(&diff_report_with(vec![change]));
974
975        let marker_count = rendered.matches("\\ No newline at end of file").count();
976        assert_eq!(
977            marker_count, 1,
978            "shared-tail double-no-eol must emit exactly one marker, got:\n{rendered}"
979        );
980        // The context line must NOT be split into `-more`/`+more` —
981        // that's the wrong branch and would confuse `git apply` about
982        // whether the line is being modified.
983        assert!(
984            !rendered.contains("-more\n"),
985            "context tail must not be split when both sides agree:\n{rendered}"
986        );
987        assert!(
988            !rendered.contains("+more\n"),
989            "context tail must not be split when both sides agree:\n{rendered}"
990        );
991        assert!(
992            rendered.contains(" more\n\\ No newline at end of file\n"),
993            "marker must sit immediately after the shared context line:\n{rendered}"
994        );
995    }
996
997    /// When only the OLD side lacks a trailing newline and its tail is
998    /// a context line, the renderer must split that line into a
999    /// `-content` (with the marker after it) + `+content` pair so the
1000    /// patch unambiguously documents that the OLD file ends without
1001    /// `\n` while the NEW file ends with one.
1002    #[test]
1003    fn render_diff_patch_splits_context_tail_when_only_old_lacks_newline() {
1004        // Diff for OLD `hello` (no eol) -> NEW `hello\nmore\n`:
1005        // ` hello` is the old tail; `+more` is the trailing addition.
1006        let lines = vec![
1007            LineDiff::with_lines("@", "@ -1,1 +1,2 @@", None, None),
1008            LineDiff::with_lines(" ", "hello", Some(1), Some(1)),
1009            LineDiff::with_lines("+", "more", None, Some(2)),
1010        ];
1011        let eol = FileEolState {
1012            old_has_final_newline: false,
1013            new_has_final_newline: true,
1014            old_line_count: 1,
1015            new_line_count: 2,
1016        };
1017        let change = modified_change_with_eol("old.txt", lines, eol);
1018        let rendered = render_diff_patch(&diff_report_with(vec![change]));
1019
1020        assert!(
1021            rendered.contains("-hello\n\\ No newline at end of file\n+hello\n"),
1022            "OLD-side context-tail split must emit `-hello` + marker + `+hello`:\n{rendered}"
1023        );
1024        // Only the OLD side carries a marker — the NEW side ends with
1025        // `\n` so its tail line must NOT be followed by a marker.
1026        let marker_count = rendered.matches("\\ No newline at end of file").count();
1027        assert_eq!(
1028            marker_count, 1,
1029            "exactly one marker expected (OLD side only):\n{rendered}"
1030        );
1031    }
1032
1033    /// Mirror of the OLD-only case: when only the NEW side lacks a
1034    /// trailing newline and its tail is a shared context line, the
1035    /// split emits `-content` + `+content` + marker so the patch
1036    /// states "the file ends without `\n` after applying".
1037    #[test]
1038    fn render_diff_patch_splits_context_tail_when_only_new_lacks_newline() {
1039        // Diff for OLD `hello\nmore\n` -> NEW `hello` (no eol):
1040        // ` hello` is the new tail; `-more` is the removal.
1041        let lines = vec![
1042            LineDiff::with_lines("@", "@ -1,2 +1,1 @@", None, None),
1043            LineDiff::with_lines(" ", "hello", Some(1), Some(1)),
1044            LineDiff::with_lines("-", "more", Some(2), None),
1045        ];
1046        let eol = FileEolState {
1047            old_has_final_newline: true,
1048            new_has_final_newline: false,
1049            old_line_count: 2,
1050            new_line_count: 1,
1051        };
1052        let change = modified_change_with_eol("new.txt", lines, eol);
1053        let rendered = render_diff_patch(&diff_report_with(vec![change]));
1054
1055        assert!(
1056            rendered.contains("-hello\n+hello\n\\ No newline at end of file\n"),
1057            "NEW-side context-tail split must emit `-hello` + `+hello` + marker:\n{rendered}"
1058        );
1059        let marker_count = rendered.matches("\\ No newline at end of file").count();
1060        assert_eq!(
1061            marker_count, 1,
1062            "exactly one marker expected (NEW side only):\n{rendered}"
1063        );
1064    }
1065
1066    /// When the tail is a `-` (deletion) on the OLD side and the OLD
1067    /// blob lacked a trailing newline, the marker goes right after the
1068    /// `-line` — same as `git diff` for a delete-the-last-line patch
1069    /// against a no-eol source. The `+` branch is the mirror.
1070    #[test]
1071    fn render_diff_patch_marker_after_minus_line_when_old_tail_is_deletion() {
1072        // OLD has 2 lines (no eol on `tail`), NEW has 1 line (`only`,
1073        // with eol). The diff is two replacements; the second `-tail`
1074        // is the OLD tail.
1075        let lines = vec![
1076            LineDiff::with_lines("@", "@ -1,2 +1,1 @@", None, None),
1077            LineDiff::with_lines("-", "only", Some(1), None),
1078            LineDiff::with_lines("-", "tail", Some(2), None),
1079            LineDiff::with_lines("+", "only", None, Some(1)),
1080        ];
1081        let eol = FileEolState {
1082            old_has_final_newline: false,
1083            new_has_final_newline: true,
1084            old_line_count: 2,
1085            new_line_count: 1,
1086        };
1087        let change = modified_change_with_eol("del.txt", lines, eol);
1088        let rendered = render_diff_patch(&diff_report_with(vec![change]));
1089
1090        assert!(
1091            rendered.contains("-tail\n\\ No newline at end of file\n"),
1092            "marker must follow the OLD tail deletion line:\n{rendered}"
1093        );
1094    }
1095
1096    /// Pin git's C-style path quoting byte-for-byte. The conformance
1097    /// harness round-trips the common classes through real `git apply`;
1098    /// this covers the exact escape spellings (including the `\a \b \v \f
1099    /// \r` controls and octal fallback) the integration cells don't reach.
1100    #[test]
1101    fn quote_path_matches_git_c_style() {
1102        // Simple paths — and spaces, which git leaves bare — emit unquoted.
1103        assert_eq!(quote_path_for_patch("a/", "src/main.rs"), "a/src/main.rs");
1104        assert_eq!(
1105            quote_path_for_patch("a/", "with space.txt"),
1106            "a/with space.txt"
1107        );
1108        // Tab/newline/quote/backslash force quoting; the prefix is escaped
1109        // inside the quotes, matching git's `quote_two`.
1110        assert_eq!(quote_path_for_patch("a/", "tab\there"), "\"a/tab\\there\"");
1111        assert_eq!(
1112            quote_path_for_patch("b/", "line\nbreak"),
1113            "\"b/line\\nbreak\""
1114        );
1115        assert_eq!(quote_path_for_patch("a/", "quo\"te"), "\"a/quo\\\"te\"");
1116        assert_eq!(
1117            quote_path_for_patch("a/", "back\\slash"),
1118            "\"a/back\\\\slash\""
1119        );
1120        // Non-ASCII (UTF-8 é = 0xC3 0xA9) → per-byte octal.
1121        assert_eq!(quote_path_for_patch("a/", "café"), "\"a/caf\\303\\251\"");
1122        // `rename from`/`rename to` quote the bare path (empty prefix).
1123        assert_eq!(quote_path_for_patch("", "x\ty"), "\"x\\ty\"");
1124        // The remaining named C-escapes plus a low control byte (octal).
1125        assert_eq!(
1126            quote_path_for_patch("", "\u{07}\u{08}\u{0b}\u{0c}\r\u{01}"),
1127            "\"\\a\\b\\v\\f\\r\\001\""
1128        );
1129    }
1130}