1use std::path::Path;
2
3use rustc_hash::{FxHashMap, FxHashSet};
4
5pub const MAX_DIFF_BYTES: u64 = 10 * 1024 * 1024;
7
8pub const MAX_ADDED_LINES: usize = 1_000_000;
10
11#[derive(Debug, Default, Clone)]
13pub struct DiffIndex {
14 added_lines: FxHashMap<String, FxHashSet<u64>>,
15 touched_files: FxHashSet<String>,
16 added_line_count: usize,
17 rename_pairs: FxHashMap<String, String>,
18}
19
20#[derive(Default)]
22struct DiffParseState {
23 current_file: Option<String>,
24 new_line: u64,
25 pending_rename_from: Option<String>,
26}
27
28impl DiffIndex {
29 #[must_use]
30 pub fn from_unified_diff(diff: &str) -> Self {
31 let mut index = Self::default();
32 let mut state = DiffParseState::default();
33
34 for line in diff.lines() {
35 if index.handle_diff_header_line(line, &mut state) {
36 continue;
37 }
38 index.handle_diff_content_line(line, &mut state);
39 }
40
41 index
42 }
43
44 fn handle_diff_header_line(&mut self, line: &str, state: &mut DiffParseState) -> bool {
45 if line.starts_with("diff --git ") {
46 state.pending_rename_from = None;
47 return true;
48 }
49 if let Some(rest) = line.strip_prefix("rename from ") {
50 state.pending_rename_from = Some(rest.to_owned());
51 return true;
52 }
53 if let Some(rest) = line.strip_prefix("rename to ") {
54 if let Some(from) = state.pending_rename_from.take() {
55 self.rename_pairs.insert(rest.to_owned(), from);
56 self.touched_files.insert(rest.to_owned());
57 }
58 return true;
59 }
60 if let Some(path) = line.strip_prefix("+++ b/") {
61 state.current_file = Some(path.to_string());
62 self.touched_files.insert(path.to_string());
63 return true;
64 }
65 if line.starts_with("+++ /dev/null") {
66 state.current_file = None;
67 return true;
68 }
69 if let Some(header) = line.strip_prefix("@@ ") {
70 if let Some(start) = parse_new_hunk_start(header) {
71 state.new_line = start;
72 }
73 return true;
74 }
75 false
76 }
77
78 fn handle_diff_content_line(&mut self, line: &str, state: &mut DiffParseState) {
79 let Some(path) = state.current_file.as_ref() else {
80 return;
81 };
82 if line.starts_with('+') && !line.starts_with("+++") {
83 if self.added_line_count < MAX_ADDED_LINES {
84 self.added_lines
85 .entry(path.clone())
86 .or_default()
87 .insert(state.new_line);
88 self.added_line_count += 1;
89 }
90 state.new_line += 1;
91 } else if !line.starts_with('-') {
92 state.new_line += 1;
93 }
94 }
95
96 #[must_use]
97 pub fn old_path_for(&self, head_path: &str) -> Option<&str> {
98 self.rename_pairs.get(head_path).map(String::as_str)
99 }
100
101 #[must_use]
102 pub fn added_line_count(&self) -> usize {
103 self.added_line_count
104 }
105
106 #[must_use]
107 pub fn touches_file(&self, path: &str) -> bool {
108 self.touched_files.contains(path)
109 }
110
111 #[must_use]
112 pub fn range_overlaps_added(&self, path: &str, start: u64, end: u64) -> bool {
113 if end < start {
114 return false;
115 }
116 let Some(added) = self.added_lines.get(path) else {
117 return false;
118 };
119 let lo = start.max(1);
120 added.iter().any(|&line| line >= lo && line <= end)
121 }
122
123 #[must_use]
124 pub fn line_is_added(&self, path: &str, line: u64) -> bool {
125 self.added_lines
126 .get(path)
127 .is_some_and(|lines| lines.contains(&line))
128 }
129
130 #[must_use]
131 pub fn line_within_added_context(&self, path: &str, line: u64, radius: u64) -> bool {
132 self.added_lines
133 .get(path)
134 .is_some_and(|lines| lines.iter().any(|added| line.abs_diff(*added) <= radius))
135 }
136
137 #[must_use]
138 pub fn added_lines_in(&self, path: &str) -> Option<&FxHashSet<u64>> {
139 self.added_lines.get(path)
140 }
141}
142
143#[must_use]
144pub fn relative_to_diff_path(path: &Path, root: &Path) -> Option<String> {
145 if let Ok(stripped) = path.strip_prefix(root) {
146 return Some(stripped.to_string_lossy().replace('\\', "/"));
147 }
148 if fallow_types::path_util::is_absolute_path_any_platform(path) {
149 return None;
150 }
151 Some(path.to_string_lossy().replace('\\', "/"))
152}
153
154pub fn parse_new_hunk_start(header: &str) -> Option<u64> {
155 let plus = header.find('+')?;
156 let rest = &header[plus + 1..];
157 let end = rest
158 .find(|c: char| c == ',' || c.is_ascii_whitespace())
159 .unwrap_or(rest.len());
160 rest[..end].parse().ok()
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166
167 #[test]
168 fn from_unified_diff_caps_added_lines_at_threshold() {
169 let header =
170 "diff --git a/big.txt b/big.txt\n--- a/big.txt\n+++ b/big.txt\n@@ -0,0 +1,100 @@\n";
171 let mut body = String::with_capacity(MAX_ADDED_LINES * 16);
172 for _ in 0..(MAX_ADDED_LINES + 100) {
173 body.push_str("+x\n");
174 }
175 let mut diff = String::with_capacity(header.len() + body.len());
176 diff.push_str(header);
177 diff.push_str(&body);
178
179 let index = DiffIndex::from_unified_diff(&diff);
180 assert!(
181 index.added_line_count() <= MAX_ADDED_LINES,
182 "indexed {} lines, cap is {MAX_ADDED_LINES}",
183 index.added_line_count()
184 );
185 }
186
187 #[test]
188 fn range_overlaps_added_hotspot_starting_before_diff_touches_inside() {
189 let diff = "\
190diff --git a/src/big.ts b/src/big.ts
191--- a/src/big.ts
192+++ b/src/big.ts
193@@ -114,1 +114,2 @@
194 ctx
195+touched
196";
197 let index = DiffIndex::from_unified_diff(diff);
198 assert!(index.range_overlaps_added("src/big.ts", 10, 120));
199 assert!(!index.range_overlaps_added("src/other.ts", 10, 120));
200 assert!(!index.range_overlaps_added("src/big.ts", 10, 100));
201 assert!(!index.range_overlaps_added("src/big.ts", 200, 100));
202 }
203
204 #[test]
205 fn rename_header_records_old_path() {
206 let diff = "\
207diff --git a/src/old.ts b/src/new.ts
208similarity index 90%
209rename from src/old.ts
210rename to src/new.ts
211--- a/src/old.ts
212+++ b/src/new.ts
213@@ -1,1 +1,1 @@
214-old
215+new
216";
217 let index = DiffIndex::from_unified_diff(diff);
218 assert_eq!(index.old_path_for("src/new.ts"), Some("src/old.ts"));
219 assert!(index.touches_file("src/new.ts"));
220 }
221
222 #[test]
223 fn empty_diff_has_zero_added_lines_and_no_touched_files() {
224 let index = DiffIndex::from_unified_diff("");
225 assert_eq!(index.added_line_count(), 0);
226 assert!(!index.touches_file("src/a.ts"));
227 }
228
229 #[test]
230 fn delete_only_diff_records_no_added_lines() {
231 let diff = "\
232diff --git a/src/a.ts b/src/a.ts
233--- a/src/a.ts
234+++ /dev/null
235@@ -1,1 +0,0 @@
236-old
237";
238 let index = DiffIndex::from_unified_diff(diff);
239 assert_eq!(index.added_line_count(), 0);
240 assert!(!index.touches_file("src/a.ts"));
241 }
242
243 #[test]
244 fn relative_to_diff_path_strips_absolute_root() {
245 let root = Path::new("/project");
246 let path = Path::new("/project/src/a.ts");
247 assert_eq!(
248 relative_to_diff_path(path, root).as_deref(),
249 Some("src/a.ts")
250 );
251 }
252
253 #[test]
254 fn relative_to_diff_path_passes_through_relative() {
255 let root = Path::new("/project");
256 let path = Path::new("src/a.ts");
257 assert_eq!(
258 relative_to_diff_path(path, root).as_deref(),
259 Some("src/a.ts")
260 );
261 }
262
263 #[test]
264 fn relative_to_diff_path_returns_none_for_path_outside_root() {
265 let root = Path::new("/project");
266 let path = Path::new("/elsewhere/src/a.ts");
267 assert!(relative_to_diff_path(path, root).is_none());
268 }
269}