Skip to main content

loom_diff/
structured.rs

1//! Structured representation of a line-level diff. The shape is
2//! tuned for JSON serialization (consumed by the gateway + web UI)
3//! while staying lossless for `unified_diff_string` to render.
4
5use crate::looks_binary;
6use serde::{Deserialize, Serialize};
7use similar::{ChangeTag, TextDiff};
8
9/// Why a diff couldn't be rendered as text.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
11pub enum BinaryReason {
12    /// One side contains null bytes (heuristic: looks_binary).
13    NullByte,
14    /// Bytes did not decode as utf-8 cleanly.
15    NotUtf8,
16}
17
18/// Per-line tag — additions, deletions, unchanged context lines, plus
19/// the two structural markers around a hunk gap.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
21#[serde(rename_all = "snake_case")]
22pub enum DiffLineKind {
23    /// Line in `a` only — rendered with `-`.
24    Delete,
25    /// Line in `b` only — rendered with `+`.
26    Insert,
27    /// Line present in both — rendered with ` `.
28    Equal,
29}
30
31/// One line in a hunk.
32#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33pub struct DiffLine {
34    /// Whether this is `+`, `-`, or context.
35    pub kind: DiffLineKind,
36    /// 1-based line number on the `a` side; `None` for inserts.
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub a_line: Option<u32>,
39    /// 1-based line number on the `b` side; `None` for deletes.
40    #[serde(skip_serializing_if = "Option::is_none")]
41    pub b_line: Option<u32>,
42    /// Line content WITHOUT the trailing newline (we re-add on render).
43    pub content: String,
44}
45
46/// A contiguous run of changes plus surrounding context.
47#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
48pub struct DiffHunk {
49    /// Starting line on the `a` side (1-based).
50    pub a_start: u32,
51    /// Number of `a` lines in this hunk.
52    pub a_count: u32,
53    /// Starting line on the `b` side (1-based).
54    pub b_start: u32,
55    /// Number of `b` lines in this hunk.
56    pub b_count: u32,
57    /// Lines of the hunk, in order.
58    pub lines: Vec<DiffLine>,
59}
60
61/// File-level status.
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
63#[serde(rename_all = "snake_case")]
64pub enum FileDiffStatus {
65    /// The file is identical on both sides.
66    Unchanged,
67    /// File added (only present on `b`).
68    Added,
69    /// File deleted (only present on `a`).
70    Deleted,
71    /// File present on both sides with content changes.
72    Modified,
73    /// File present on both sides but binary; no line-diff produced.
74    Binary {
75        /// Why the rendering was skipped.
76        reason: BinaryReason,
77    },
78}
79
80/// A diff for a single path. The shape carries enough information to
81/// render git-style unified output, group changes per-hunk in a UI,
82/// and surface summary stats.
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
84pub struct FileDiff {
85    /// Path on the `a` side. May differ from `b_path` for renames; v1
86    /// always sets these equal.
87    pub a_path: Option<String>,
88    /// Path on the `b` side.
89    pub b_path: Option<String>,
90    /// File-level status.
91    pub status: FileDiffStatus,
92    /// Hunks; empty when `status` is `Unchanged`/`Binary`.
93    pub hunks: Vec<DiffHunk>,
94    /// Total `+` lines across all hunks.
95    pub additions: u32,
96    /// Total `-` lines across all hunks.
97    pub deletions: u32,
98}
99
100/// Build a [`FileDiff`] from two byte slices. Detects binary content
101/// (null-byte heuristic, then utf-8 check) and surfaces it as a
102/// `Binary` status without trying to render lines.
103pub fn diff_blobs(
104    a: Option<&[u8]>,
105    b: Option<&[u8]>,
106    a_path: Option<&str>,
107    b_path: Option<&str>,
108    context_lines: usize,
109) -> FileDiff {
110    let status = match (a, b) {
111        (None, None) => FileDiffStatus::Unchanged,
112        (None, Some(_)) => FileDiffStatus::Added,
113        (Some(_), None) => FileDiffStatus::Deleted,
114        (Some(_), Some(_)) => FileDiffStatus::Modified, // refined below
115    };
116
117    let mut out = FileDiff {
118        a_path: a_path.map(str::to_string),
119        b_path: b_path.map(str::to_string),
120        status,
121        hunks: Vec::new(),
122        additions: 0,
123        deletions: 0,
124    };
125
126    // Binary checks — both sides. Either side being binary disqualifies
127    // the diff from line-rendering.
128    let any_binary = a.map_or(false, looks_binary) || b.map_or(false, looks_binary);
129    if any_binary {
130        out.status = FileDiffStatus::Binary {
131            reason: BinaryReason::NullByte,
132        };
133        return out;
134    }
135
136    let a_text = match a {
137        Some(b) => match std::str::from_utf8(b) {
138            Ok(s) => Some(s.to_string()),
139            Err(_) => {
140                out.status = FileDiffStatus::Binary {
141                    reason: BinaryReason::NotUtf8,
142                };
143                return out;
144            }
145        },
146        None => None,
147    };
148    let b_text = match b {
149        Some(b) => match std::str::from_utf8(b) {
150            Ok(s) => Some(s.to_string()),
151            Err(_) => {
152                out.status = FileDiffStatus::Binary {
153                    reason: BinaryReason::NotUtf8,
154                };
155                return out;
156            }
157        },
158        None => None,
159    };
160
161    if a_text.as_deref() == b_text.as_deref() {
162        out.status = FileDiffStatus::Unchanged;
163        return out;
164    }
165
166    let a_str = a_text.as_deref().unwrap_or("");
167    let b_str = b_text.as_deref().unwrap_or("");
168
169    let diff = TextDiff::from_lines(a_str, b_str);
170
171    for group in diff.grouped_ops(context_lines) {
172        let mut hunk_lines: Vec<DiffLine> = Vec::new();
173        let mut hunk_a_start = u32::MAX;
174        let mut hunk_b_start = u32::MAX;
175        let mut hunk_a_count: u32 = 0;
176        let mut hunk_b_count: u32 = 0;
177
178        for op in group {
179            for change in diff.iter_changes(&op) {
180                let content = strip_trailing_newline(change.value());
181                let a_line = change.old_index().map(|i| (i as u32) + 1);
182                let b_line = change.new_index().map(|i| (i as u32) + 1);
183
184                if let Some(n) = a_line {
185                    if hunk_a_start == u32::MAX {
186                        hunk_a_start = n;
187                    }
188                }
189                if let Some(n) = b_line {
190                    if hunk_b_start == u32::MAX {
191                        hunk_b_start = n;
192                    }
193                }
194
195                let kind = match change.tag() {
196                    ChangeTag::Delete => {
197                        hunk_a_count += 1;
198                        out.deletions += 1;
199                        DiffLineKind::Delete
200                    }
201                    ChangeTag::Insert => {
202                        hunk_b_count += 1;
203                        out.additions += 1;
204                        DiffLineKind::Insert
205                    }
206                    ChangeTag::Equal => {
207                        hunk_a_count += 1;
208                        hunk_b_count += 1;
209                        DiffLineKind::Equal
210                    }
211                };
212
213                hunk_lines.push(DiffLine {
214                    kind,
215                    a_line,
216                    b_line,
217                    content,
218                });
219            }
220        }
221
222        // Empty starts (e.g. file added with no `a` side) → 0.
223        let a_start = if hunk_a_start == u32::MAX {
224            0
225        } else {
226            hunk_a_start
227        };
228        let b_start = if hunk_b_start == u32::MAX {
229            0
230        } else {
231            hunk_b_start
232        };
233
234        out.hunks.push(DiffHunk {
235            a_start,
236            a_count: hunk_a_count,
237            b_start,
238            b_count: hunk_b_count,
239            lines: hunk_lines,
240        });
241    }
242
243    if out.hunks.is_empty() && a_text.as_deref() == b_text.as_deref() {
244        out.status = FileDiffStatus::Unchanged;
245    }
246    out
247}
248
249/// Convenience: diff two text strings, treating absent strings as
250/// empty (file added/deleted). Provided for tests and for callers
251/// that already hold UTF-8 strings.
252pub fn file_diff(
253    a: Option<&str>,
254    b: Option<&str>,
255    a_path: Option<&str>,
256    b_path: Option<&str>,
257    context_lines: usize,
258) -> FileDiff {
259    diff_blobs(
260        a.map(str::as_bytes),
261        b.map(str::as_bytes),
262        a_path,
263        b_path,
264        context_lines,
265    )
266}
267
268fn strip_trailing_newline(s: &str) -> String {
269    s.strip_suffix("\r\n")
270        .or_else(|| s.strip_suffix('\n'))
271        .unwrap_or(s)
272        .to_string()
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    #[test]
280    fn unchanged_files_produce_no_hunks() {
281        let d = file_diff(
282            Some("a\nb\nc\n"),
283            Some("a\nb\nc\n"),
284            Some("x.txt"),
285            Some("x.txt"),
286            3,
287        );
288        assert_eq!(d.status, FileDiffStatus::Unchanged);
289        assert!(d.hunks.is_empty());
290        assert_eq!(d.additions, 0);
291        assert_eq!(d.deletions, 0);
292    }
293
294    #[test]
295    fn added_file_yields_only_inserts() {
296        let d = file_diff(None, Some("hello\nworld\n"), None, Some("greet.txt"), 3);
297        assert_eq!(d.status, FileDiffStatus::Added);
298        assert_eq!(d.additions, 2);
299        assert_eq!(d.deletions, 0);
300        let lines: Vec<_> = d
301            .hunks
302            .iter()
303            .flat_map(|h| h.lines.iter().map(|l| l.kind))
304            .collect();
305        assert!(lines.iter().all(|k| matches!(k, DiffLineKind::Insert)));
306    }
307
308    #[test]
309    fn deleted_file_yields_only_deletes() {
310        let d = file_diff(Some("a\nb\n"), None, Some("gone.txt"), None, 3);
311        assert_eq!(d.status, FileDiffStatus::Deleted);
312        assert_eq!(d.additions, 0);
313        assert_eq!(d.deletions, 2);
314    }
315
316    #[test]
317    fn modified_file_groups_hunks_with_context() {
318        let a = "alpha\nbeta\ngamma\ndelta\nepsilon\n";
319        let b = "alpha\nBETA\ngamma\ndelta\nepsilon\n";
320        let d = file_diff(Some(a), Some(b), Some("greek.txt"), Some("greek.txt"), 1);
321        assert_eq!(d.status, FileDiffStatus::Modified);
322        assert_eq!(d.additions, 1);
323        assert_eq!(d.deletions, 1);
324        // single hunk with 1 line of context above and below
325        assert_eq!(d.hunks.len(), 1);
326        let kinds: Vec<_> = d.hunks[0].lines.iter().map(|l| l.kind).collect();
327        assert_eq!(
328            kinds,
329            vec![
330                DiffLineKind::Equal,
331                DiffLineKind::Delete,
332                DiffLineKind::Insert,
333                DiffLineKind::Equal,
334            ]
335        );
336    }
337
338    #[test]
339    fn binary_blobs_surface_as_binary() {
340        let a = b"hello\n".to_vec();
341        let mut b = b"hello".to_vec();
342        b.push(0); // null byte → binary
343        let d = diff_blobs(Some(&a), Some(&b), Some("x"), Some("x"), 3);
344        assert!(matches!(
345            d.status,
346            FileDiffStatus::Binary {
347                reason: BinaryReason::NullByte
348            }
349        ));
350    }
351
352    #[test]
353    fn round_trips_through_json() {
354        let d = file_diff(Some("a\n"), Some("b\n"), Some("p"), Some("p"), 3);
355        let s = serde_json::to_string(&d).unwrap();
356        let back: FileDiff = serde_json::from_str(&s).unwrap();
357        assert_eq!(d, back);
358    }
359}