loom-diff 0.1.0

Line-level diff for loom. Pure-function unified-diff over byte slices and text strings, used by both the loom CLI and the loom-gateway. Layered above weave-sdk; lower levels untouched.
Documentation
//! Structured representation of a line-level diff. The shape is
//! tuned for JSON serialization (consumed by the gateway + web UI)
//! while staying lossless for `unified_diff_string` to render.

use crate::looks_binary;
use serde::{Deserialize, Serialize};
use similar::{ChangeTag, TextDiff};

/// Why a diff couldn't be rendered as text.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BinaryReason {
    /// One side contains null bytes (heuristic: looks_binary).
    NullByte,
    /// Bytes did not decode as utf-8 cleanly.
    NotUtf8,
}

/// Per-line tag — additions, deletions, unchanged context lines, plus
/// the two structural markers around a hunk gap.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DiffLineKind {
    /// Line in `a` only — rendered with `-`.
    Delete,
    /// Line in `b` only — rendered with `+`.
    Insert,
    /// Line present in both — rendered with ` `.
    Equal,
}

/// One line in a hunk.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DiffLine {
    /// Whether this is `+`, `-`, or context.
    pub kind: DiffLineKind,
    /// 1-based line number on the `a` side; `None` for inserts.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub a_line: Option<u32>,
    /// 1-based line number on the `b` side; `None` for deletes.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub b_line: Option<u32>,
    /// Line content WITHOUT the trailing newline (we re-add on render).
    pub content: String,
}

/// A contiguous run of changes plus surrounding context.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DiffHunk {
    /// Starting line on the `a` side (1-based).
    pub a_start: u32,
    /// Number of `a` lines in this hunk.
    pub a_count: u32,
    /// Starting line on the `b` side (1-based).
    pub b_start: u32,
    /// Number of `b` lines in this hunk.
    pub b_count: u32,
    /// Lines of the hunk, in order.
    pub lines: Vec<DiffLine>,
}

/// File-level status.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FileDiffStatus {
    /// The file is identical on both sides.
    Unchanged,
    /// File added (only present on `b`).
    Added,
    /// File deleted (only present on `a`).
    Deleted,
    /// File present on both sides with content changes.
    Modified,
    /// File present on both sides but binary; no line-diff produced.
    Binary {
        /// Why the rendering was skipped.
        reason: BinaryReason,
    },
}

/// A diff for a single path. The shape carries enough information to
/// render git-style unified output, group changes per-hunk in a UI,
/// and surface summary stats.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FileDiff {
    /// Path on the `a` side. May differ from `b_path` for renames; v1
    /// always sets these equal.
    pub a_path: Option<String>,
    /// Path on the `b` side.
    pub b_path: Option<String>,
    /// File-level status.
    pub status: FileDiffStatus,
    /// Hunks; empty when `status` is `Unchanged`/`Binary`.
    pub hunks: Vec<DiffHunk>,
    /// Total `+` lines across all hunks.
    pub additions: u32,
    /// Total `-` lines across all hunks.
    pub deletions: u32,
}

/// Build a [`FileDiff`] from two byte slices. Detects binary content
/// (null-byte heuristic, then utf-8 check) and surfaces it as a
/// `Binary` status without trying to render lines.
pub fn diff_blobs(
    a: Option<&[u8]>,
    b: Option<&[u8]>,
    a_path: Option<&str>,
    b_path: Option<&str>,
    context_lines: usize,
) -> FileDiff {
    let status = match (a, b) {
        (None, None) => FileDiffStatus::Unchanged,
        (None, Some(_)) => FileDiffStatus::Added,
        (Some(_), None) => FileDiffStatus::Deleted,
        (Some(_), Some(_)) => FileDiffStatus::Modified, // refined below
    };

    let mut out = FileDiff {
        a_path: a_path.map(str::to_string),
        b_path: b_path.map(str::to_string),
        status,
        hunks: Vec::new(),
        additions: 0,
        deletions: 0,
    };

    // Binary checks — both sides. Either side being binary disqualifies
    // the diff from line-rendering.
    let any_binary = a.map_or(false, looks_binary) || b.map_or(false, looks_binary);
    if any_binary {
        out.status = FileDiffStatus::Binary {
            reason: BinaryReason::NullByte,
        };
        return out;
    }

    let a_text = match a {
        Some(b) => match std::str::from_utf8(b) {
            Ok(s) => Some(s.to_string()),
            Err(_) => {
                out.status = FileDiffStatus::Binary {
                    reason: BinaryReason::NotUtf8,
                };
                return out;
            }
        },
        None => None,
    };
    let b_text = match b {
        Some(b) => match std::str::from_utf8(b) {
            Ok(s) => Some(s.to_string()),
            Err(_) => {
                out.status = FileDiffStatus::Binary {
                    reason: BinaryReason::NotUtf8,
                };
                return out;
            }
        },
        None => None,
    };

    if a_text.as_deref() == b_text.as_deref() {
        out.status = FileDiffStatus::Unchanged;
        return out;
    }

    let a_str = a_text.as_deref().unwrap_or("");
    let b_str = b_text.as_deref().unwrap_or("");

    let diff = TextDiff::from_lines(a_str, b_str);

    for group in diff.grouped_ops(context_lines) {
        let mut hunk_lines: Vec<DiffLine> = Vec::new();
        let mut hunk_a_start = u32::MAX;
        let mut hunk_b_start = u32::MAX;
        let mut hunk_a_count: u32 = 0;
        let mut hunk_b_count: u32 = 0;

        for op in group {
            for change in diff.iter_changes(&op) {
                let content = strip_trailing_newline(change.value());
                let a_line = change.old_index().map(|i| (i as u32) + 1);
                let b_line = change.new_index().map(|i| (i as u32) + 1);

                if let Some(n) = a_line {
                    if hunk_a_start == u32::MAX {
                        hunk_a_start = n;
                    }
                }
                if let Some(n) = b_line {
                    if hunk_b_start == u32::MAX {
                        hunk_b_start = n;
                    }
                }

                let kind = match change.tag() {
                    ChangeTag::Delete => {
                        hunk_a_count += 1;
                        out.deletions += 1;
                        DiffLineKind::Delete
                    }
                    ChangeTag::Insert => {
                        hunk_b_count += 1;
                        out.additions += 1;
                        DiffLineKind::Insert
                    }
                    ChangeTag::Equal => {
                        hunk_a_count += 1;
                        hunk_b_count += 1;
                        DiffLineKind::Equal
                    }
                };

                hunk_lines.push(DiffLine {
                    kind,
                    a_line,
                    b_line,
                    content,
                });
            }
        }

        // Empty starts (e.g. file added with no `a` side) → 0.
        let a_start = if hunk_a_start == u32::MAX {
            0
        } else {
            hunk_a_start
        };
        let b_start = if hunk_b_start == u32::MAX {
            0
        } else {
            hunk_b_start
        };

        out.hunks.push(DiffHunk {
            a_start,
            a_count: hunk_a_count,
            b_start,
            b_count: hunk_b_count,
            lines: hunk_lines,
        });
    }

    if out.hunks.is_empty() && a_text.as_deref() == b_text.as_deref() {
        out.status = FileDiffStatus::Unchanged;
    }
    out
}

/// Convenience: diff two text strings, treating absent strings as
/// empty (file added/deleted). Provided for tests and for callers
/// that already hold UTF-8 strings.
pub fn file_diff(
    a: Option<&str>,
    b: Option<&str>,
    a_path: Option<&str>,
    b_path: Option<&str>,
    context_lines: usize,
) -> FileDiff {
    diff_blobs(
        a.map(str::as_bytes),
        b.map(str::as_bytes),
        a_path,
        b_path,
        context_lines,
    )
}

fn strip_trailing_newline(s: &str) -> String {
    s.strip_suffix("\r\n")
        .or_else(|| s.strip_suffix('\n'))
        .unwrap_or(s)
        .to_string()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn unchanged_files_produce_no_hunks() {
        let d = file_diff(
            Some("a\nb\nc\n"),
            Some("a\nb\nc\n"),
            Some("x.txt"),
            Some("x.txt"),
            3,
        );
        assert_eq!(d.status, FileDiffStatus::Unchanged);
        assert!(d.hunks.is_empty());
        assert_eq!(d.additions, 0);
        assert_eq!(d.deletions, 0);
    }

    #[test]
    fn added_file_yields_only_inserts() {
        let d = file_diff(None, Some("hello\nworld\n"), None, Some("greet.txt"), 3);
        assert_eq!(d.status, FileDiffStatus::Added);
        assert_eq!(d.additions, 2);
        assert_eq!(d.deletions, 0);
        let lines: Vec<_> = d
            .hunks
            .iter()
            .flat_map(|h| h.lines.iter().map(|l| l.kind))
            .collect();
        assert!(lines.iter().all(|k| matches!(k, DiffLineKind::Insert)));
    }

    #[test]
    fn deleted_file_yields_only_deletes() {
        let d = file_diff(Some("a\nb\n"), None, Some("gone.txt"), None, 3);
        assert_eq!(d.status, FileDiffStatus::Deleted);
        assert_eq!(d.additions, 0);
        assert_eq!(d.deletions, 2);
    }

    #[test]
    fn modified_file_groups_hunks_with_context() {
        let a = "alpha\nbeta\ngamma\ndelta\nepsilon\n";
        let b = "alpha\nBETA\ngamma\ndelta\nepsilon\n";
        let d = file_diff(Some(a), Some(b), Some("greek.txt"), Some("greek.txt"), 1);
        assert_eq!(d.status, FileDiffStatus::Modified);
        assert_eq!(d.additions, 1);
        assert_eq!(d.deletions, 1);
        // single hunk with 1 line of context above and below
        assert_eq!(d.hunks.len(), 1);
        let kinds: Vec<_> = d.hunks[0].lines.iter().map(|l| l.kind).collect();
        assert_eq!(
            kinds,
            vec![
                DiffLineKind::Equal,
                DiffLineKind::Delete,
                DiffLineKind::Insert,
                DiffLineKind::Equal,
            ]
        );
    }

    #[test]
    fn binary_blobs_surface_as_binary() {
        let a = b"hello\n".to_vec();
        let mut b = b"hello".to_vec();
        b.push(0); // null byte → binary
        let d = diff_blobs(Some(&a), Some(&b), Some("x"), Some("x"), 3);
        assert!(matches!(
            d.status,
            FileDiffStatus::Binary {
                reason: BinaryReason::NullByte
            }
        ));
    }

    #[test]
    fn round_trips_through_json() {
        let d = file_diff(Some("a\n"), Some("b\n"), Some("p"), Some("p"), 3);
        let s = serde_json::to_string(&d).unwrap();
        let back: FileDiff = serde_json::from_str(&s).unwrap();
        assert_eq!(d, back);
    }
}