Skip to main content

crw_diff/
git_diff.rs

1//! Git-diff (markdown) surface: a unified text diff plus a parse-diff-style
2//! AST, BOTH derived from the same `similar` op stream so they can never
3//! disagree. There is no `parse-diff` crate in Rust; the AST is synthesized
4//! directly from `similar`'s `DiffOp`/`ChangeTag` stream.
5
6use crw_core::types::{DiffAst, DiffChange, DiffChunk, DiffFile};
7use similar::{ChangeTag, TextDiff};
8
9const CONTEXT_RADIUS: usize = 3;
10
11/// Output of a git-diff computation: the unified `text` surface and the typed
12/// AST. Both come from one op stream over the same normalized inputs.
13pub struct GitDiff {
14    pub text: String,
15    pub ast: DiffAst,
16}
17
18/// Compute the unified text + AST between two already-normalized markdown
19/// strings. `max_changes` caps the number of AST change-lines; on overflow the
20/// AST is marked `truncated` (the full snapshot is retained by the caller, so
21/// the change is recoverable). The `text` surface is always complete.
22pub fn compute(previous: &str, current: &str, max_changes: usize) -> GitDiff {
23    let diff = TextDiff::from_lines(previous, current);
24
25    // Unified text surface (always complete, independent of the AST cap).
26    let text = diff
27        .unified_diff()
28        .context_radius(CONTEXT_RADIUS)
29        .header("previous", "current")
30        .to_string();
31
32    // AST surface, synthesized from the same op stream.
33    let mut chunks: Vec<DiffChunk> = Vec::new();
34    let mut additions = 0usize;
35    let mut deletions = 0usize;
36    let mut emitted = 0usize;
37    let mut truncated = false;
38
39    'outer: for group in diff.grouped_ops(CONTEXT_RADIUS).iter() {
40        let (Some(first), Some(last)) = (group.first(), group.last()) else {
41            continue;
42        };
43        let old_start = first.old_range().start;
44        let new_start = first.new_range().start;
45        let old_lines = last.old_range().end - old_start;
46        let new_lines = last.new_range().end - new_start;
47        let header = format!(
48            "@@ -{},{} +{},{} @@",
49            old_start + 1,
50            old_lines,
51            new_start + 1,
52            new_lines
53        );
54
55        let mut changes: Vec<DiffChange> = Vec::new();
56        for op in group {
57            for change in diff.iter_changes(op) {
58                if emitted >= max_changes {
59                    truncated = true;
60                    break 'outer;
61                }
62                let content = change.value().trim_end_matches('\n').to_string();
63                let dc = match change.tag() {
64                    ChangeTag::Delete => {
65                        deletions += 1;
66                        DiffChange {
67                            change_type: "del".into(),
68                            content,
69                            ln: change.old_index().map(|i| i + 1),
70                            ln1: None,
71                            ln2: None,
72                        }
73                    }
74                    ChangeTag::Insert => {
75                        additions += 1;
76                        DiffChange {
77                            change_type: "add".into(),
78                            content,
79                            ln: change.new_index().map(|i| i + 1),
80                            ln1: None,
81                            ln2: None,
82                        }
83                    }
84                    ChangeTag::Equal => DiffChange {
85                        change_type: "normal".into(),
86                        content,
87                        ln: None,
88                        ln1: change.old_index().map(|i| i + 1),
89                        ln2: change.new_index().map(|i| i + 1),
90                    },
91                };
92                emitted += 1;
93                changes.push(dc);
94            }
95        }
96
97        chunks.push(DiffChunk {
98            content: header,
99            changes,
100            old_start: old_start + 1,
101            old_lines,
102            new_start: new_start + 1,
103            new_lines,
104        });
105    }
106
107    let file = DiffFile {
108        from: "previous".into(),
109        to: "current".into(),
110        additions,
111        deletions,
112        chunks,
113    };
114    let ast = DiffAst {
115        files: vec![file],
116        additions,
117        deletions,
118        truncated,
119    };
120
121    GitDiff { text, ast }
122}
123
124#[cfg(test)]
125mod tests {
126    use super::*;
127
128    #[test]
129    fn identical_input_yields_empty_diff() {
130        let g = compute("a\nb\nc", "a\nb\nc", 5000);
131        assert_eq!(g.ast.additions, 0);
132        assert_eq!(g.ast.deletions, 0);
133        assert!(g.ast.files[0].chunks.is_empty());
134    }
135
136    #[test]
137    fn single_line_change_counts() {
138        let g = compute("# Pricing\nStarter $19", "# Pricing\nStarter $24", 5000);
139        assert_eq!(g.ast.additions, 1);
140        assert_eq!(g.ast.deletions, 1);
141        assert!(g.text.contains("-Starter $19"));
142        assert!(g.text.contains("+Starter $24"));
143        // text and AST agree on counts
144        let add_in_ast: usize = g.ast.files[0]
145            .chunks
146            .iter()
147            .flat_map(|c| &c.changes)
148            .filter(|c| c.change_type == "add")
149            .count();
150        assert_eq!(add_in_ast, g.ast.additions);
151    }
152
153    #[test]
154    fn cap_marks_truncated() {
155        let prev = (0..100)
156            .map(|i| format!("line {i}"))
157            .collect::<Vec<_>>()
158            .join("\n");
159        let cur = (0..100)
160            .map(|i| format!("changed {i}"))
161            .collect::<Vec<_>>()
162            .join("\n");
163        let g = compute(&prev, &cur, 10);
164        assert!(g.ast.truncated);
165    }
166}