semantic_diff/diff/
parser.rs1use super::{DiffData, DiffFile, DiffLine, DiffSegment, Hunk, LineType, SegmentTag};
2use similar::{ChangeTag, TextDiff};
3
4pub fn parse(raw: &str) -> DiffData {
6 let mut binary_files = Vec::new();
8 for line in raw.lines() {
9 if line.starts_with("Binary files ") && line.ends_with(" differ") {
10 if let Some(path) = extract_binary_path(line) {
11 binary_files.push(path);
12 }
13 }
14 }
15
16 let mut patch = unidiff::PatchSet::new();
18 let _ = patch.parse(raw);
19
20 let files = patch
22 .files()
23 .iter()
24 .filter_map(|pf| {
25 let source = validate_diff_path(&pf.source_file).unwrap_or_default();
26 let target = validate_diff_path(&pf.target_file).unwrap_or_default();
27
28 if target.is_empty() {
30 return None;
31 }
32
33 let target = resolve_if_symlink(&target);
35
36 let is_rename = is_rename_file(&source, &target);
37
38 let hunks = pf
39 .hunks()
40 .iter()
41 .map(|h| {
42 let lines = h
43 .lines()
44 .iter()
45 .filter_map(|line| {
46 let content = line.value.clone();
47 if content.starts_with("\\ No newline") {
49 return None;
50 }
51 let line_type = match line.line_type.as_str() {
52 "+" => LineType::Added,
53 "-" => LineType::Removed,
54 _ => LineType::Context,
55 };
56 Some(DiffLine {
57 line_type,
58 content,
59 inline_segments: None,
60 })
61 })
62 .collect();
63
64 let mut hunk = Hunk {
65 header: format!(
66 "@@ -{},{} +{},{} @@",
67 h.source_start,
68 h.source_length,
69 h.target_start,
70 h.target_length
71 ),
72 source_start: h.source_start,
73 target_start: h.target_start,
74 lines,
75 };
76 compute_inline_diffs(&mut hunk);
77 hunk
78 })
79 .collect();
80
81 Some(DiffFile {
82 source_file: source,
83 target_file: target,
84 is_rename,
85 is_untracked: false,
86 hunks,
87 added_count: pf.added(),
88 removed_count: pf.removed(),
89 })
90 })
91 .collect();
92
93 DiffData {
94 files,
95 binary_files,
96 }
97}
98
99fn is_rename_file(source: &str, target: &str) -> bool {
101 let s = source.trim_start_matches("a/");
102 let t = target.trim_start_matches("b/");
103 s != t && source != "/dev/null" && target != "/dev/null"
104}
105
106pub fn compute_inline_diffs(hunk: &mut Hunk) {
112 let len = hunk.lines.len();
113 let mut i = 0;
114
115 while i < len {
116 let removed_start = i;
118 while i < len && hunk.lines[i].line_type == LineType::Removed {
119 i += 1;
120 }
121 let removed_end = i;
122
123 let added_start = i;
125 while i < len && hunk.lines[i].line_type == LineType::Added {
126 i += 1;
127 }
128 let added_end = i;
129
130 let removed_count = removed_end - removed_start;
131 let added_count = added_end - added_start;
132
133 if removed_count > 0 && added_count > 0 {
135 let pairs = removed_count.min(added_count);
136 for p in 0..pairs {
137 let ri = removed_start + p;
138 let ai = added_start + p;
139
140 let old_content = &hunk.lines[ri].content;
141 let new_content = &hunk.lines[ai].content;
142
143 if old_content.len() > 500 || new_content.len() > 500 {
145 continue;
146 }
147
148 let diff = TextDiff::from_words(old_content.as_str(), new_content.as_str());
149
150 let mut old_segments = Vec::new();
151 let mut new_segments = Vec::new();
152
153 for change in diff.iter_all_changes() {
154 let text = change.value().to_string();
155 match change.tag() {
156 ChangeTag::Equal => {
157 old_segments.push(DiffSegment {
158 tag: SegmentTag::Equal,
159 text: text.clone(),
160 });
161 new_segments.push(DiffSegment {
162 tag: SegmentTag::Equal,
163 text,
164 });
165 }
166 ChangeTag::Delete => {
167 old_segments.push(DiffSegment {
168 tag: SegmentTag::Changed,
169 text,
170 });
171 }
172 ChangeTag::Insert => {
173 new_segments.push(DiffSegment {
174 tag: SegmentTag::Changed,
175 text,
176 });
177 }
178 }
179 }
180
181 hunk.lines[ri].inline_segments = Some(old_segments);
182 hunk.lines[ai].inline_segments = Some(new_segments);
183 }
184 }
185
186 if i == removed_start {
188 i += 1;
189 }
190 }
191}
192
193fn extract_binary_path(line: &str) -> Option<String> {
195 let rest = line.strip_prefix("Binary files ")?;
197 let rest = rest.strip_suffix(" differ")?;
198 let parts: Vec<&str> = rest.splitn(2, " and ").collect();
200 if parts.len() == 2 {
201 let target = parts[1].trim_start_matches("b/");
203 validate_diff_path(target)
205 } else {
206 None
207 }
208}
209
210fn validate_diff_path(path: &str) -> Option<String> {
212 let path = path.trim_start_matches("a/").trim_start_matches("b/");
214 if path.starts_with('/') {
216 tracing::warn!("Rejected absolute path from diff: {}", path);
217 return None;
218 }
219 if path.split('/').any(|component| component == "..") {
221 tracing::warn!("Rejected traversal path from diff: {}", path);
222 return None;
223 }
224 if path.contains('\0') {
226 tracing::warn!("Rejected path with null byte from diff");
227 return None;
228 }
229 Some(path.to_string())
230}
231
232fn resolve_if_symlink(path: &str) -> String {
235 let p = std::path::Path::new(path);
236 match std::fs::symlink_metadata(p) {
238 Ok(meta) if meta.file_type().is_symlink() => {
239 match std::fs::canonicalize(p) {
240 Ok(resolved) => {
241 if let Ok(cwd) = std::env::current_dir() {
243 let canonical_cwd = std::fs::canonicalize(&cwd).unwrap_or(cwd);
244 if resolved.starts_with(&canonical_cwd) {
245 resolved.to_string_lossy().to_string()
246 } else {
247 tracing::warn!(
248 "Symlink {} resolves outside repo root to {}, using original path",
249 path,
250 resolved.display()
251 );
252 path.to_string()
253 }
254 } else {
255 path.to_string()
256 }
257 }
258 Err(_) => path.to_string(),
259 }
260 }
261 _ => path.to_string(),
262 }
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 #[test]
270 fn test_validate_diff_path_normal() {
271 assert_eq!(
272 validate_diff_path("src/main.rs"),
273 Some("src/main.rs".to_string())
274 );
275 }
276
277 #[test]
278 fn test_validate_diff_path_traversal_rejected() {
279 assert_eq!(validate_diff_path("../../../etc/passwd"), None);
280 }
281
282 #[test]
283 fn test_validate_diff_path_embedded_traversal_rejected() {
284 assert_eq!(validate_diff_path("src/../lib.rs"), None);
285 }
286
287 #[test]
288 fn test_validate_diff_path_absolute_rejected() {
289 assert_eq!(validate_diff_path("/etc/passwd"), None);
290 }
291
292 #[test]
293 fn test_validate_diff_path_normal_nested() {
294 assert_eq!(
295 validate_diff_path("normal/path/file.rs"),
296 Some("normal/path/file.rs".to_string())
297 );
298 }
299
300 #[test]
301 fn test_validate_diff_path_strips_prefix() {
302 assert_eq!(
303 validate_diff_path("b/src/main.rs"),
304 Some("src/main.rs".to_string())
305 );
306 assert_eq!(
307 validate_diff_path("a/src/main.rs"),
308 Some("src/main.rs".to_string())
309 );
310 }
311
312 #[test]
313 fn test_validate_diff_path_null_byte_rejected() {
314 assert_eq!(validate_diff_path("src/\0evil.rs"), None);
315 }
316
317 #[test]
318 fn test_extract_binary_path_with_traversal_returns_none() {
319 let line = "Binary files a/normal.png and b/../../../etc/shadow differ";
320 assert_eq!(extract_binary_path(line), None);
321 }
322
323 #[test]
324 fn test_extract_binary_path_valid() {
325 let line = "Binary files a/icon.png and b/icon.png differ";
326 assert_eq!(extract_binary_path(line), Some("icon.png".to_string()));
327 }
328
329 #[test]
330 fn test_parse_with_traversal_path_skipped() {
331 let raw = "diff --git a/../../../etc/passwd b/../../../etc/passwd\n\
333 --- a/../../../etc/passwd\n\
334 +++ b/../../../etc/passwd\n\
335 @@ -0,0 +1 @@\n\
336 +malicious content\n";
337 let result = parse(raw);
338 assert!(
340 result.files.iter().all(|f| !f.target_file.contains("..")),
341 "Traversal paths should be rejected"
342 );
343 }
344
345 #[test]
346 fn test_resolve_if_symlink_nonexistent() {
347 let result = resolve_if_symlink("nonexistent/path/file.rs");
349 assert_eq!(result, "nonexistent/path/file.rs");
350 }
351}