Skip to main content

fallow_output/
diff.rs

1use std::path::Path;
2
3use rustc_hash::{FxHashMap, FxHashSet};
4
5/// Refuse to parse a unified diff larger than this.
6pub const MAX_DIFF_BYTES: u64 = 10 * 1024 * 1024;
7
8/// Stop indexing added lines past this count.
9pub const MAX_ADDED_LINES: usize = 1_000_000;
10
11/// Parsed, command-neutral index of files and added lines in a unified diff.
12#[derive(Debug, Default, Clone)]
13pub struct DiffIndex {
14    added_lines: FxHashMap<String, FxHashSet<u64>>,
15    touched_files: FxHashSet<String>,
16    added_line_count: usize,
17    rename_pairs: FxHashMap<String, String>,
18}
19
20/// Mutable cursor state threaded through unified-diff parsing.
21#[derive(Default)]
22struct DiffParseState {
23    current_file: Option<String>,
24    new_line: u64,
25    pending_rename_from: Option<String>,
26}
27
28impl DiffIndex {
29    #[must_use]
30    pub fn from_unified_diff(diff: &str) -> Self {
31        let mut index = Self::default();
32        let mut state = DiffParseState::default();
33
34        for line in diff.lines() {
35            if index.handle_diff_header_line(line, &mut state) {
36                continue;
37            }
38            index.handle_diff_content_line(line, &mut state);
39        }
40
41        index
42    }
43
44    fn handle_diff_header_line(&mut self, line: &str, state: &mut DiffParseState) -> bool {
45        if line.starts_with("diff --git ") {
46            state.pending_rename_from = None;
47            return true;
48        }
49        if let Some(rest) = line.strip_prefix("rename from ") {
50            state.pending_rename_from = Some(rest.to_owned());
51            return true;
52        }
53        if let Some(rest) = line.strip_prefix("rename to ") {
54            if let Some(from) = state.pending_rename_from.take() {
55                self.rename_pairs.insert(rest.to_owned(), from);
56                self.touched_files.insert(rest.to_owned());
57            }
58            return true;
59        }
60        if let Some(path) = line.strip_prefix("+++ b/") {
61            state.current_file = Some(path.to_string());
62            self.touched_files.insert(path.to_string());
63            return true;
64        }
65        if line.starts_with("+++ /dev/null") {
66            state.current_file = None;
67            return true;
68        }
69        if let Some(header) = line.strip_prefix("@@ ") {
70            if let Some(start) = parse_new_hunk_start(header) {
71                state.new_line = start;
72            }
73            return true;
74        }
75        false
76    }
77
78    fn handle_diff_content_line(&mut self, line: &str, state: &mut DiffParseState) {
79        let Some(path) = state.current_file.as_ref() else {
80            return;
81        };
82        if line.starts_with('+') && !line.starts_with("+++") {
83            if self.added_line_count < MAX_ADDED_LINES {
84                self.added_lines
85                    .entry(path.clone())
86                    .or_default()
87                    .insert(state.new_line);
88                self.added_line_count += 1;
89            }
90            state.new_line += 1;
91        } else if !line.starts_with('-') {
92            state.new_line += 1;
93        }
94    }
95
96    #[must_use]
97    pub fn old_path_for(&self, head_path: &str) -> Option<&str> {
98        self.rename_pairs.get(head_path).map(String::as_str)
99    }
100
101    #[must_use]
102    pub fn added_line_count(&self) -> usize {
103        self.added_line_count
104    }
105
106    #[must_use]
107    pub fn touches_file(&self, path: &str) -> bool {
108        self.touched_files.contains(path)
109    }
110
111    #[must_use]
112    pub fn range_overlaps_added(&self, path: &str, start: u64, end: u64) -> bool {
113        if end < start {
114            return false;
115        }
116        let Some(added) = self.added_lines.get(path) else {
117            return false;
118        };
119        let lo = start.max(1);
120        added.iter().any(|&line| line >= lo && line <= end)
121    }
122
123    #[must_use]
124    pub fn line_is_added(&self, path: &str, line: u64) -> bool {
125        self.added_lines
126            .get(path)
127            .is_some_and(|lines| lines.contains(&line))
128    }
129
130    #[must_use]
131    pub fn line_within_added_context(&self, path: &str, line: u64, radius: u64) -> bool {
132        self.added_lines
133            .get(path)
134            .is_some_and(|lines| lines.iter().any(|added| line.abs_diff(*added) <= radius))
135    }
136
137    #[must_use]
138    pub fn added_lines_in(&self, path: &str) -> Option<&FxHashSet<u64>> {
139        self.added_lines.get(path)
140    }
141}
142
143#[must_use]
144pub fn relative_to_diff_path(path: &Path, root: &Path) -> Option<String> {
145    if let Ok(stripped) = path.strip_prefix(root) {
146        return Some(stripped.to_string_lossy().replace('\\', "/"));
147    }
148    if fallow_types::path_util::is_absolute_path_any_platform(path) {
149        return None;
150    }
151    Some(path.to_string_lossy().replace('\\', "/"))
152}
153
154pub fn parse_new_hunk_start(header: &str) -> Option<u64> {
155    let plus = header.find('+')?;
156    let rest = &header[plus + 1..];
157    let end = rest
158        .find(|c: char| c == ',' || c.is_ascii_whitespace())
159        .unwrap_or(rest.len());
160    rest[..end].parse().ok()
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    #[test]
168    fn from_unified_diff_caps_added_lines_at_threshold() {
169        let header =
170            "diff --git a/big.txt b/big.txt\n--- a/big.txt\n+++ b/big.txt\n@@ -0,0 +1,100 @@\n";
171        let mut body = String::with_capacity(MAX_ADDED_LINES * 16);
172        for _ in 0..(MAX_ADDED_LINES + 100) {
173            body.push_str("+x\n");
174        }
175        let mut diff = String::with_capacity(header.len() + body.len());
176        diff.push_str(header);
177        diff.push_str(&body);
178
179        let index = DiffIndex::from_unified_diff(&diff);
180        assert!(
181            index.added_line_count() <= MAX_ADDED_LINES,
182            "indexed {} lines, cap is {MAX_ADDED_LINES}",
183            index.added_line_count()
184        );
185    }
186
187    #[test]
188    fn range_overlaps_added_hotspot_starting_before_diff_touches_inside() {
189        let diff = "\
190diff --git a/src/big.ts b/src/big.ts
191--- a/src/big.ts
192+++ b/src/big.ts
193@@ -114,1 +114,2 @@
194 ctx
195+touched
196";
197        let index = DiffIndex::from_unified_diff(diff);
198        assert!(index.range_overlaps_added("src/big.ts", 10, 120));
199        assert!(!index.range_overlaps_added("src/other.ts", 10, 120));
200        assert!(!index.range_overlaps_added("src/big.ts", 10, 100));
201        assert!(!index.range_overlaps_added("src/big.ts", 200, 100));
202    }
203
204    #[test]
205    fn rename_header_records_old_path() {
206        let diff = "\
207diff --git a/src/old.ts b/src/new.ts
208similarity index 90%
209rename from src/old.ts
210rename to src/new.ts
211--- a/src/old.ts
212+++ b/src/new.ts
213@@ -1,1 +1,1 @@
214-old
215+new
216";
217        let index = DiffIndex::from_unified_diff(diff);
218        assert_eq!(index.old_path_for("src/new.ts"), Some("src/old.ts"));
219        assert!(index.touches_file("src/new.ts"));
220    }
221
222    #[test]
223    fn empty_diff_has_zero_added_lines_and_no_touched_files() {
224        let index = DiffIndex::from_unified_diff("");
225        assert_eq!(index.added_line_count(), 0);
226        assert!(!index.touches_file("src/a.ts"));
227    }
228
229    #[test]
230    fn delete_only_diff_records_no_added_lines() {
231        let diff = "\
232diff --git a/src/a.ts b/src/a.ts
233--- a/src/a.ts
234+++ /dev/null
235@@ -1,1 +0,0 @@
236-old
237";
238        let index = DiffIndex::from_unified_diff(diff);
239        assert_eq!(index.added_line_count(), 0);
240        assert!(!index.touches_file("src/a.ts"));
241    }
242
243    #[test]
244    fn relative_to_diff_path_strips_absolute_root() {
245        let root = Path::new("/project");
246        let path = Path::new("/project/src/a.ts");
247        assert_eq!(
248            relative_to_diff_path(path, root).as_deref(),
249            Some("src/a.ts")
250        );
251    }
252
253    #[test]
254    fn relative_to_diff_path_passes_through_relative() {
255        let root = Path::new("/project");
256        let path = Path::new("src/a.ts");
257        assert_eq!(
258            relative_to_diff_path(path, root).as_deref(),
259            Some("src/a.ts")
260        );
261    }
262
263    #[test]
264    fn relative_to_diff_path_returns_none_for_path_outside_root() {
265        let root = Path::new("/project");
266        let path = Path::new("/elsewhere/src/a.ts");
267        assert!(relative_to_diff_path(path, root).is_none());
268    }
269}