Skip to main content

diffguard_diff/
unified.rs

1use std::path::Path;
2
3use diffguard_types::Scope;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum ChangeKind {
7    Added,
8    Changed,
9    Deleted,
10}
11
12// ============================================================================
13// Detection functions for special diff content
14// ============================================================================
15
16/// Detects if a line indicates a binary file in the diff.
17///
18/// Binary files are marked with lines like:
19/// - "Binary files a/foo.png and b/foo.png differ"
20/// - "Binary files /dev/null and b/foo.png differ"
21///
22/// Requirements: 4.1
23pub fn is_binary_file(line: &str) -> bool {
24    line.starts_with("Binary files ") && line.contains(" differ")
25}
26
27/// Detects if a line indicates a submodule change.
28///
29/// Submodule changes are marked with lines like:
30/// - "Subproject commit abc123..."
31///
32/// Requirements: 4.2
33pub fn is_submodule(line: &str) -> bool {
34    line.starts_with("Subproject commit ")
35}
36
37/// Detects if a line indicates a deleted file mode.
38///
39/// Deleted files are marked with lines like:
40/// - "deleted file mode 100644"
41///
42/// Requirements: 4.5
43pub fn is_deleted_file(line: &str) -> bool {
44    line.starts_with("deleted file mode ")
45}
46
47/// Detects if a line indicates a new file mode.
48///
49/// New files are marked with lines like:
50/// - "new file mode 100644"
51pub fn is_new_file(line: &str) -> bool {
52    line.starts_with("new file mode ")
53}
54
55/// Detects if a diff section represents a mode-only change (no content changes).
56///
57/// Mode-only changes have lines like:
58/// - "old mode 100644"
59/// - "new mode 100755"
60///
61/// This function checks for the "old mode" marker which indicates a mode change.
62/// A mode-only change is one where only the file permissions changed, not the content.
63///
64/// Requirements: 4.4
65pub fn is_mode_change_only(line: &str) -> bool {
66    line.starts_with("old mode ") || line.starts_with("new mode ")
67}
68
69/// Parses a rename line and extracts the source path.
70///
71/// Rename lines look like:
72/// - "rename from path/to/old/file.rs"
73///
74/// Returns the path after "rename from " if the line matches, None otherwise.
75///
76/// Requirements: 4.3
77pub fn parse_rename_from(line: &str) -> Option<String> {
78    let rest = line.strip_prefix("rename from ")?;
79    parse_rename_path(rest)
80}
81
82/// Parses a rename line and extracts the destination path.
83///
84/// Rename lines look like:
85/// - "rename to path/to/new/file.rs"
86///
87/// Returns the path after "rename to " if the line matches, None otherwise.
88///
89/// Requirements: 4.3
90pub fn parse_rename_to(line: &str) -> Option<String> {
91    let rest = line.strip_prefix("rename to ")?;
92    parse_rename_path(rest)
93}
94
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct DiffLine {
97    pub path: String,
98    pub line: u32,
99    pub content: String,
100    pub kind: ChangeKind,
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
104pub struct DiffStats {
105    pub files: u32,
106    pub lines: u32,
107}
108
109#[derive(Debug, thiserror::Error)]
110pub enum DiffParseError {
111    #[error("malformed hunk header: {0}")]
112    MalformedHunkHeader(String),
113}
114
115/// Parse a unified diff (git-style) and return scoped lines in diff order.
116///
117/// `scope` controls whether we return:
118/// - `Scope::Added`: all added lines
119/// - `Scope::Changed`: only added lines that directly follow at least one removed line in the same hunk
120/// - `Scope::Modified`: alias of `Scope::Changed`
121/// - `Scope::Deleted`: removed lines
122///
123/// Special cases handled:
124/// - Binary files: skipped (no lines extracted)
125/// - Submodule changes: skipped (no lines extracted)
126/// - Deleted files: skipped unless `scope = deleted`
127/// - Mode-only changes: skipped (no lines extracted)
128/// - Renamed files: uses the new (destination) path
129/// - Malformed content: continues processing subsequent files
130pub fn parse_unified_diff(
131    diff_text: &str,
132    scope: Scope,
133) -> Result<(Vec<DiffLine>, DiffStats), DiffParseError> {
134    let mut out: Vec<DiffLine> = Vec::new();
135    let mut current_path: Option<String> = None;
136
137    let mut old_line_no: u32 = 0;
138    let mut new_line_no: u32 = 0;
139    let mut in_hunk = false;
140
141    // For "changed"/"modified" scopes: treat '+' lines as changed if a '-' was seen
142    // since the last context line.
143    let mut pending_removed = false;
144
145    // Track special file status for the current file
146    let mut skip_current_file = false;
147    let mut rename_to_path: Option<String> = None;
148
149    for raw in diff_text.lines() {
150        if raw.starts_with("diff --git ") {
151            // Reset state for new file
152            in_hunk = false;
153            pending_removed = false;
154            skip_current_file = false;
155            rename_to_path = None;
156
157            // Example: diff --git a/foo b/foo
158            if let Some(p) = parse_diff_git_line(raw) {
159                current_path = Some(p);
160            }
161            continue;
162        }
163
164        // Detect binary files (Requirements 4.1)
165        if is_binary_file(raw) {
166            skip_current_file = true;
167            continue;
168        }
169
170        // Detect submodule changes (Requirements 4.2)
171        if is_submodule(raw) {
172            skip_current_file = true;
173            continue;
174        }
175
176        // Detect deleted files (Requirements 4.5)
177        if is_deleted_file(raw) {
178            skip_current_file = !matches!(scope, Scope::Deleted);
179            continue;
180        }
181
182        // Detect mode changes (Requirements 4.4)
183        // Mode-only changes are skipped - they have no content to scan
184        if is_mode_change_only(raw) {
185            continue;
186        }
187
188        // Detect renamed files (Requirements 4.3)
189        if let Some(to_path) = parse_rename_to(raw) {
190            rename_to_path = Some(to_path);
191            continue;
192        }
193
194        // Skip "rename from" lines (we only care about the destination)
195        if parse_rename_from(raw).is_some() {
196            continue;
197        }
198
199        if raw.starts_with("+++ ") {
200            // Prefer the +++ path if present, unless we have a rename_to path
201            if rename_to_path.is_none() {
202                if let Some(p) = parse_plus_plus_plus(raw) {
203                    current_path = Some(p);
204                }
205            } else {
206                // Use the rename_to path for renamed files
207                current_path = rename_to_path.take();
208            }
209            continue;
210        }
211
212        if raw.starts_with("@@") {
213            if skip_current_file {
214                continue;
215            }
216
217            // Try to parse the hunk header, but continue processing on error (Requirements 4.6)
218            match parse_hunk_header(raw) {
219                Ok(hdr) => {
220                    old_line_no = hdr.old_start;
221                    new_line_no = hdr.new_start;
222                    in_hunk = true;
223                    pending_removed = false;
224                }
225                Err(_) => {
226                    // Malformed hunk header - skip this hunk but continue processing
227                    // This allows subsequent files to be processed (Requirements 4.6)
228                    in_hunk = false;
229                }
230            }
231            continue;
232        }
233
234        // Skip if we're not in a hunk or if the current file should be skipped
235        if !in_hunk || skip_current_file {
236            continue;
237        }
238
239        let Some(path) = current_path.as_deref() else {
240            continue;
241        };
242
243        // Skip file marker lines
244        if raw.starts_with("+++") || raw.starts_with("---") {
245            continue;
246        }
247
248        if raw.starts_with('\\') {
249            // "\\ No newline at end of file"
250            continue;
251        }
252
253        let first = raw.as_bytes().first().copied();
254        match first {
255            Some(b'+') => {
256                // Check if this is a submodule content line (Requirements 4.2)
257                let content = &raw[1..];
258                if is_submodule(content) {
259                    skip_current_file = true;
260                    in_hunk = false;
261                    continue;
262                }
263
264                // Added line.
265                let is_changed = pending_removed;
266                let include = match scope {
267                    Scope::Added => true,
268                    Scope::Changed | Scope::Modified => is_changed,
269                    Scope::Deleted => false,
270                };
271
272                if include {
273                    out.push(DiffLine {
274                        path: path.to_string(),
275                        line: new_line_no,
276                        content: content.to_string(),
277                        kind: if is_changed {
278                            ChangeKind::Changed
279                        } else {
280                            ChangeKind::Added
281                        },
282                    });
283                }
284
285                new_line_no = new_line_no.saturating_add(1);
286            }
287            Some(b'-') => {
288                // Check if this is a submodule content line (Requirements 4.2)
289                let content = &raw[1..];
290                if is_submodule(content) {
291                    skip_current_file = true;
292                    in_hunk = false;
293                    continue;
294                }
295
296                // Removed line.
297                if matches!(scope, Scope::Deleted) {
298                    out.push(DiffLine {
299                        path: path.to_string(),
300                        line: old_line_no,
301                        content: content.to_string(),
302                        kind: ChangeKind::Deleted,
303                    });
304                }
305                pending_removed = true;
306                old_line_no = old_line_no.saturating_add(1);
307            }
308            Some(b' ') => {
309                // Context line.
310                pending_removed = false;
311                old_line_no = old_line_no.saturating_add(1);
312                new_line_no = new_line_no.saturating_add(1);
313            }
314            _ => {}
315        }
316    }
317
318    let mut files = std::collections::BTreeSet::<String>::new();
319    for l in &out {
320        files.insert(l.path.clone());
321    }
322
323    let stats = DiffStats {
324        files: files.len() as u32,
325        lines: out.len() as u32,
326    };
327
328    Ok((out, stats))
329}
330
331#[derive(Debug, Clone, Copy, PartialEq, Eq)]
332struct HunkHeader {
333    old_start: u32,
334    new_start: u32,
335}
336
337fn parse_hunk_header(line: &str) -> Result<HunkHeader, DiffParseError> {
338    // Formats:
339    // @@ -1,2 +3,4 @@
340    // @@ -1 +3 @@
341    let minus = line
342        .split_whitespace()
343        .nth(1)
344        .ok_or_else(|| DiffParseError::MalformedHunkHeader(line.to_string()))?;
345    let plus = line
346        .split_whitespace()
347        .nth(2)
348        .ok_or_else(|| DiffParseError::MalformedHunkHeader(line.to_string()))?;
349
350    let old_start = parse_hunk_range_start(minus, '-', line)?;
351    let new_start = parse_hunk_range_start(plus, '+', line)?;
352
353    Ok(HunkHeader {
354        old_start,
355        new_start,
356    })
357}
358
359fn parse_hunk_range_start(
360    token: &str,
361    expected_prefix: char,
362    full_line: &str,
363) -> Result<u32, DiffParseError> {
364    let range = token
365        .strip_prefix(expected_prefix)
366        .ok_or_else(|| DiffParseError::MalformedHunkHeader(full_line.to_string()))?;
367    let start_str = range.split(',').next().unwrap_or(range);
368    start_str
369        .parse()
370        .map_err(|_| DiffParseError::MalformedHunkHeader(full_line.to_string()))
371}
372
373fn parse_diff_git_line(line: &str) -> Option<String> {
374    // diff --git a/foo b/foo
375    let rest = line.strip_prefix("diff --git ")?;
376    let tokens = tokenize_git_paths(rest, 2);
377    if tokens.len() < 2 {
378        return None;
379    }
380    let b = unquote_git_token(&tokens[1]);
381    strip_prefix_path(&b)
382}
383
384fn parse_plus_plus_plus(line: &str) -> Option<String> {
385    // +++ b/foo
386    let rest = line.strip_prefix("+++ ")?;
387    let token = parse_single_git_path(rest)?;
388    if token == "/dev/null" {
389        return None;
390    }
391    strip_prefix_path(&token)
392}
393
394fn strip_prefix_path(p: &str) -> Option<String> {
395    // strips a/ or b/
396    let p = p.trim();
397    let p = p
398        .strip_prefix("a/")
399        .or_else(|| p.strip_prefix("b/"))
400        .unwrap_or(p);
401
402    // Normalize to forward slashes for receipts.
403    let normalized = Path::new(p)
404        .components()
405        .map(|c| c.as_os_str().to_string_lossy())
406        .collect::<Vec<_>>()
407        .join("/");
408
409    if normalized.is_empty() {
410        None
411    } else {
412        Some(normalized)
413    }
414}
415
416#[derive(Debug, Clone)]
417struct GitPathToken {
418    value: String,
419    quoted: bool,
420}
421
422fn tokenize_git_paths(input: &str, limit: usize) -> Vec<GitPathToken> {
423    let mut tokens = Vec::new();
424    let mut buf = String::new();
425    let mut quoted = false;
426    let mut in_quote = false;
427    let mut chars = input.chars().peekable();
428
429    while let Some(ch) = chars.next() {
430        if in_quote {
431            if ch == '\\' {
432                if let Some(next) = chars.next() {
433                    buf.push('\\');
434                    buf.push(next);
435                } else {
436                    buf.push('\\');
437                }
438                continue;
439            }
440
441            if ch == '"' {
442                in_quote = false;
443                continue;
444            }
445
446            buf.push(ch);
447            continue;
448        }
449
450        if ch.is_whitespace() {
451            if !buf.is_empty() {
452                tokens.push(GitPathToken {
453                    value: buf.clone(),
454                    quoted,
455                });
456                buf.clear();
457                quoted = false;
458                if tokens.len() >= limit {
459                    return tokens;
460                }
461            }
462            continue;
463        }
464
465        if ch == '"' {
466            in_quote = true;
467            quoted = true;
468            continue;
469        }
470
471        buf.push(ch);
472    }
473
474    if !buf.is_empty() && tokens.len() < limit {
475        tokens.push(GitPathToken { value: buf, quoted });
476    }
477
478    tokens
479}
480
481fn parse_single_git_path(input: &str) -> Option<String> {
482    let tokens = tokenize_git_paths(input, 1);
483    tokens.first().map(unquote_git_token)
484}
485
486fn parse_rename_path(input: &str) -> Option<String> {
487    let trimmed = input.trim();
488    if trimmed.is_empty() {
489        return None;
490    }
491
492    if trimmed.starts_with('"') {
493        return parse_single_git_path(trimmed);
494    }
495
496    Some(trimmed.to_string())
497}
498
499fn unquote_git_token(token: &GitPathToken) -> String {
500    if token.quoted {
501        unescape_git_path(&token.value)
502    } else {
503        token.value.clone()
504    }
505}
506
507fn unescape_git_path(s: &str) -> String {
508    let mut out: Vec<u8> = Vec::with_capacity(s.len());
509    let mut iter = s.as_bytes().iter().copied().peekable();
510
511    while let Some(b) = iter.next() {
512        if b != b'\\' {
513            out.push(b);
514            continue;
515        }
516
517        let Some(next) = iter.next() else {
518            out.push(b'\\');
519            break;
520        };
521
522        match next {
523            b'\\' => out.push(b'\\'),
524            b'"' => out.push(b'"'),
525            b'n' => out.push(b'\n'),
526            b't' => out.push(b'\t'),
527            b'r' => out.push(b'\r'),
528            b' ' => out.push(b' '),
529            b'0'..=b'7' => {
530                let mut val = (next - b'0') as u32;
531                for _ in 0..2 {
532                    match iter.peek().copied() {
533                        Some(d) if (b'0'..=b'7').contains(&d) => {
534                            val = val * 8 + (d - b'0') as u32;
535                            iter.next();
536                        }
537                        _ => break,
538                    }
539                }
540                out.push((val & 0xFF) as u8);
541            }
542            _ => {
543                out.push(b'\\');
544                out.push(next);
545            }
546        }
547    }
548
549    String::from_utf8_lossy(&out).into_owned()
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555
556    #[test]
557    fn parses_added_lines() {
558        let diff = r#"
559
560diff --git a/src/lib.rs b/src/lib.rs
561index 0000000..1111111 100644
562--- a/src/lib.rs
563+++ b/src/lib.rs
564@@ -1,1 +1,2 @@
565 fn a() {}
566+fn b() { let _ = 1; }
567"#;
568
569        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
570        assert_eq!(stats.files, 1);
571        assert_eq!(stats.lines, 1);
572        assert_eq!(lines[0].path, "src/lib.rs");
573        assert_eq!(lines[0].line, 2);
574        assert_eq!(lines[0].kind, ChangeKind::Added);
575    }
576
577    #[test]
578    fn parses_changed_lines_only_when_requested() {
579        let diff = r#"
580
581diff --git a/src/lib.rs b/src/lib.rs
582--- a/src/lib.rs
583+++ b/src/lib.rs
584@@ -1,1 +1,1 @@
585-fn a() { 1 }
586+fn a() { 2 }
587"#;
588
589        let (added, _) = parse_unified_diff(diff, Scope::Added).unwrap();
590        assert_eq!(added.len(), 1);
591
592        let (changed, _) = parse_unified_diff(diff, Scope::Changed).unwrap();
593        assert_eq!(changed.len(), 1);
594        assert_eq!(changed[0].kind, ChangeKind::Changed);
595    }
596
597    #[test]
598    fn modified_scope_behaves_like_changed_scope() {
599        let diff = r#"
600
601diff --git a/src/lib.rs b/src/lib.rs
602--- a/src/lib.rs
603+++ b/src/lib.rs
604@@ -1,1 +1,1 @@
605-fn a() { 1 }
606+fn a() { 2 }
607"#;
608
609        let (changed, changed_stats) = parse_unified_diff(diff, Scope::Changed).unwrap();
610        let (modified, modified_stats) = parse_unified_diff(diff, Scope::Modified).unwrap();
611
612        assert_eq!(changed, modified);
613        assert_eq!(changed_stats, modified_stats);
614    }
615
616    #[test]
617    fn does_not_treat_pure_additions_as_changed() {
618        let diff = r#"
619
620diff --git a/a.txt b/a.txt
621--- a/a.txt
622+++ b/a.txt
623@@ -0,0 +1,1 @@
624+hello
625"#;
626
627        let (changed, _) = parse_unified_diff(diff, Scope::Changed).unwrap();
628        assert_eq!(changed.len(), 0);
629    }
630
631    #[test]
632    fn parses_deleted_lines_when_requested() {
633        let diff = r#"
634
635diff --git a/src/lib.rs b/src/lib.rs
636--- a/src/lib.rs
637+++ b/src/lib.rs
638@@ -1,3 +1,2 @@
639 fn a() {}
640-fn b() {}
641-fn c() {}
642+fn c() { println!("updated"); }
643"#;
644
645        let (lines, stats) = parse_unified_diff(diff, Scope::Deleted).unwrap();
646        assert_eq!(stats.files, 1);
647        assert_eq!(stats.lines, 2);
648
649        assert_eq!(lines[0].path, "src/lib.rs");
650        assert_eq!(lines[0].line, 2);
651        assert_eq!(lines[0].content, "fn b() {}");
652        assert_eq!(lines[0].kind, ChangeKind::Deleted);
653
654        assert_eq!(lines[1].path, "src/lib.rs");
655        assert_eq!(lines[1].line, 3);
656        assert_eq!(lines[1].content, "fn c() {}");
657        assert_eq!(lines[1].kind, ChangeKind::Deleted);
658    }
659
660    #[test]
661    fn skips_submodule_marker_lines() {
662        let diff = r#"
663diff --git a/submodule b/submodule
664Subproject commit abc123
665"#;
666
667        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
668        assert_eq!(stats.files, 0);
669        assert!(lines.is_empty());
670    }
671
672    #[test]
673    fn skips_hunks_without_current_path() {
674        let diff = r#"
675@@ -0,0 +1 @@
676+hello
677"#;
678
679        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
680        assert_eq!(stats.files, 0);
681        assert!(lines.is_empty());
682    }
683
684    #[test]
685    fn skips_no_newline_marker() {
686        let diff = r#"
687diff --git a/a.txt b/a.txt
688--- a/a.txt
689+++ b/a.txt
690@@ -1 +1 @@
691+hello
692\\ No newline at end of file
693"#;
694
695        let (lines, _) = parse_unified_diff(diff, Scope::Added).unwrap();
696        assert_eq!(lines.len(), 1);
697    }
698
699    #[test]
700    fn skips_submodule_line_inside_hunk() {
701        let diff = r#"
702diff --git a/submodule b/submodule
703--- a/submodule
704+++ b/submodule
705@@ -0,0 +1 @@
706+Subproject commit abc123
707"#;
708
709        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
710        assert_eq!(stats.files, 0);
711        assert!(lines.is_empty());
712    }
713
714    #[test]
715    fn strip_prefix_path_empty_returns_none() {
716        assert!(strip_prefix_path("a/").is_none());
717        assert!(strip_prefix_path("").is_none());
718    }
719
720    #[test]
721    fn tokenize_git_paths_trailing_escape_in_quote() {
722        let tokens = tokenize_git_paths(r#""path\"#, 1);
723        assert_eq!(tokens.len(), 1);
724        assert_eq!(tokens[0].value, "path\\");
725    }
726
727    #[test]
728    fn parse_single_git_path_empty_returns_none() {
729        assert!(parse_single_git_path("   ").is_none());
730    }
731
732    // ========================================================================
733    // Tests for detection functions (Requirements 4.1-4.5)
734    // ========================================================================
735
736    #[test]
737    fn is_binary_file_detects_binary_markers() {
738        // Standard binary file marker
739        assert!(is_binary_file(
740            "Binary files a/image.png and b/image.png differ"
741        ));
742        // Binary file added from /dev/null
743        assert!(is_binary_file(
744            "Binary files /dev/null and b/new.bin differ"
745        ));
746        // Binary file deleted to /dev/null
747        assert!(is_binary_file(
748            "Binary files a/old.bin and /dev/null differ"
749        ));
750    }
751
752    #[test]
753    fn is_binary_file_rejects_non_binary_lines() {
754        assert!(!is_binary_file("diff --git a/foo b/foo"));
755        assert!(!is_binary_file("+++ b/foo"));
756        assert!(!is_binary_file("Binary files")); // Missing " differ"
757        assert!(!is_binary_file("Some binary files differ")); // Wrong prefix
758        assert!(!is_binary_file("")); // Empty line
759    }
760
761    #[test]
762    fn is_submodule_detects_submodule_commits() {
763        assert!(is_submodule("Subproject commit abc123def456"));
764        assert!(is_submodule(
765            "Subproject commit 0000000000000000000000000000000000000000"
766        ));
767    }
768
769    #[test]
770    fn is_submodule_rejects_non_submodule_lines() {
771        assert!(!is_submodule("diff --git a/foo b/foo"));
772        assert!(!is_submodule("Subproject")); // Incomplete
773        assert!(!is_submodule("commit abc123")); // Wrong prefix
774        assert!(!is_submodule("")); // Empty line
775    }
776
777    #[test]
778    fn is_deleted_file_detects_deleted_mode() {
779        assert!(is_deleted_file("deleted file mode 100644"));
780        assert!(is_deleted_file("deleted file mode 100755"));
781        assert!(is_deleted_file("deleted file mode 120000")); // Symlink
782    }
783
784    #[test]
785    fn is_deleted_file_rejects_non_deleted_lines() {
786        assert!(!is_deleted_file("new file mode 100644"));
787        assert!(!is_deleted_file("diff --git a/foo b/foo"));
788        assert!(!is_deleted_file("deleted file")); // Incomplete
789        assert!(!is_deleted_file("")); // Empty line
790    }
791
792    #[test]
793    fn is_new_file_detects_new_mode() {
794        assert!(is_new_file("new file mode 100644"));
795        assert!(is_new_file("new file mode 100755"));
796        assert!(is_new_file("new file mode 120000")); // Symlink
797    }
798
799    #[test]
800    fn is_new_file_rejects_non_new_lines() {
801        assert!(!is_new_file("deleted file mode 100644"));
802        assert!(!is_new_file("diff --git a/foo b/foo"));
803        assert!(!is_new_file("new file")); // Incomplete
804        assert!(!is_new_file("")); // Empty line
805    }
806
807    #[test]
808    fn is_mode_change_only_detects_mode_changes() {
809        assert!(is_mode_change_only("old mode 100644"));
810        assert!(is_mode_change_only("new mode 100755"));
811        assert!(is_mode_change_only("old mode 100755"));
812        assert!(is_mode_change_only("new mode 100644"));
813    }
814
815    #[test]
816    fn is_mode_change_only_rejects_non_mode_lines() {
817        assert!(!is_mode_change_only("diff --git a/foo b/foo"));
818        assert!(!is_mode_change_only("deleted file mode 100644"));
819        assert!(!is_mode_change_only("new file mode 100644"));
820        assert!(!is_mode_change_only("mode 100644")); // Missing old/new prefix
821        assert!(!is_mode_change_only("")); // Empty line
822    }
823
824    #[test]
825    fn parse_rename_from_extracts_source_path() {
826        assert_eq!(
827            parse_rename_from("rename from src/old/path.rs"),
828            Some("src/old/path.rs".to_string())
829        );
830        assert_eq!(
831            parse_rename_from("rename from file.txt"),
832            Some("file.txt".to_string())
833        );
834        assert_eq!(
835            parse_rename_from("rename from path/with spaces/file.rs"),
836            Some("path/with spaces/file.rs".to_string())
837        );
838        assert_eq!(
839            parse_rename_from("rename from \"path/with spaces/file.rs\""),
840            Some("path/with spaces/file.rs".to_string())
841        );
842    }
843
844    #[test]
845    fn parse_rename_from_returns_none_for_non_rename_lines() {
846        assert_eq!(parse_rename_from("rename to src/new/path.rs"), None);
847        assert_eq!(parse_rename_from("diff --git a/foo b/foo"), None);
848        assert_eq!(parse_rename_from("rename from"), None); // Empty path is still valid
849        assert_eq!(parse_rename_from(""), None);
850    }
851
852    #[test]
853    fn parse_rename_path_empty_returns_none() {
854        assert_eq!(parse_rename_path("   "), None);
855    }
856
857    #[test]
858    fn parse_rename_to_extracts_destination_path() {
859        assert_eq!(
860            parse_rename_to("rename to src/new/path.rs"),
861            Some("src/new/path.rs".to_string())
862        );
863        assert_eq!(
864            parse_rename_to("rename to file.txt"),
865            Some("file.txt".to_string())
866        );
867        assert_eq!(
868            parse_rename_to("rename to path/with spaces/file.rs"),
869            Some("path/with spaces/file.rs".to_string())
870        );
871        assert_eq!(
872            parse_rename_to("rename to \"path/with spaces/file.rs\""),
873            Some("path/with spaces/file.rs".to_string())
874        );
875    }
876
877    #[test]
878    fn parse_rename_to_returns_none_for_non_rename_lines() {
879        assert_eq!(parse_rename_to("rename from src/old/path.rs"), None);
880        assert_eq!(parse_rename_to("diff --git a/foo b/foo"), None);
881        assert_eq!(parse_rename_to("rename to"), None); // Empty path is still valid
882        assert_eq!(parse_rename_to(""), None);
883    }
884
885    #[test]
886    fn parse_diff_git_line_parses_paths() {
887        assert_eq!(
888            parse_diff_git_line("diff --git a/src/lib.rs b/src/lib.rs"),
889            Some("src/lib.rs".to_string())
890        );
891        assert_eq!(
892            parse_diff_git_line(r#"diff --git "a/dir name/file.rs" "b/dir name/file.rs""#),
893            Some("dir name/file.rs".to_string())
894        );
895        assert_eq!(
896            parse_diff_git_line(
897                r#"diff --git "a/dir\ name/\"file\".rs" "b/dir\ name/\"file\".rs""#
898            ),
899            Some("dir name/\"file\".rs".to_string())
900        );
901        assert_eq!(parse_diff_git_line("diff --git a/only"), None);
902    }
903
904    #[test]
905    fn parse_diff_git_line_rejects_missing_prefix() {
906        assert_eq!(
907            parse_diff_git_line("diff --gi a/src/lib.rs b/src/lib.rs"),
908            None
909        );
910        assert_eq!(parse_diff_git_line("not a diff header"), None);
911    }
912
913    #[test]
914    fn parse_unified_diff_skips_malformed_diff_git_line() {
915        let diff = r#"
916diff --git a/only
917@@ -1,1 +1,1 @@
918+line
919"#;
920
921        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
922        assert!(lines.is_empty());
923        assert_eq!(stats.files, 0);
924        assert_eq!(stats.lines, 0);
925    }
926
927    #[test]
928    fn parse_plus_plus_plus_parses_paths() {
929        assert_eq!(
930            parse_plus_plus_plus("+++ b/src/lib.rs"),
931            Some("src/lib.rs".to_string())
932        );
933        assert_eq!(parse_plus_plus_plus("+++ /dev/null"), None);
934        assert_eq!(
935            parse_plus_plus_plus(r#"+++ "b/dir name/file.rs""#),
936            Some("dir name/file.rs".to_string())
937        );
938        assert_eq!(
939            parse_plus_plus_plus(r#"+++ "b/dir\ name/\"file\".rs""#),
940            Some("dir name/\"file\".rs".to_string())
941        );
942    }
943
944    #[test]
945    fn parse_plus_plus_plus_rejects_invalid_lines() {
946        assert_eq!(parse_plus_plus_plus("++ b/src/lib.rs"), None);
947        assert_eq!(parse_plus_plus_plus("+++ "), None);
948    }
949
950    #[test]
951    fn parse_hunk_header_rejects_non_numeric_start() {
952        let err = parse_hunk_header("@@ -1,2 +x,4 @@").unwrap_err();
953        assert!(matches!(err, DiffParseError::MalformedHunkHeader(_)));
954    }
955
956    #[test]
957    fn parse_hunk_header_rejects_missing_plus_section() {
958        let err = parse_hunk_header("@@ -1,2").unwrap_err();
959        assert!(matches!(err, DiffParseError::MalformedHunkHeader(_)));
960    }
961
962    #[test]
963    fn tokenize_git_paths_respects_quotes_and_limits() {
964        let tokens = tokenize_git_paths(r#"a/one "b/two two" c/three"#, 2);
965        assert_eq!(tokens.len(), 2);
966        assert_eq!(tokens[0].value, "a/one");
967        assert!(!tokens[0].quoted);
968        assert_eq!(tokens[1].value, "b/two two");
969        assert!(tokens[1].quoted);
970
971        let tokens = tokenize_git_paths("   a b", 2);
972        assert_eq!(tokens.len(), 2);
973        assert_eq!(tokens[0].value, "a");
974        assert_eq!(tokens[1].value, "b");
975
976        let tokens = tokenize_git_paths("a ", 2);
977        assert_eq!(tokens.len(), 1);
978        assert_eq!(tokens[0].value, "a");
979
980        let tokens = tokenize_git_paths("a", 0);
981        assert!(tokens.is_empty());
982
983        let tokens = tokenize_git_paths("a b c", 1);
984        assert_eq!(tokens.len(), 1);
985        assert_eq!(tokens[0].value, "a");
986    }
987
988    #[test]
989    fn unescape_git_path_handles_common_escapes() {
990        assert_eq!(
991            unescape_git_path(r#"dir\ name\"quote\"\\tab\tnewline\ncarriage\rend"#),
992            "dir name\"quote\"\\tab\tnewline\ncarriage\rend"
993        );
994        assert_eq!(unescape_git_path(r#"octal\141\040space"#), "octala space");
995        assert_eq!(unescape_git_path(r#"weird\q"#), "weird\\q");
996        assert_eq!(unescape_git_path("endswith\\"), "endswith\\");
997    }
998
999    #[test]
1000    fn unescape_git_path_handles_octal_limits_and_control_chars() {
1001        assert_eq!(unescape_git_path(r#"\7"#).as_bytes(), &[7]);
1002        assert_eq!(unescape_git_path(r#"\1234"#), "S4");
1003        assert_eq!(unescape_git_path(r#"a\rb"#).as_bytes(), b"a\rb");
1004        assert_eq!(unescape_git_path(r#"\12x"#).as_bytes(), b"\nx");
1005    }
1006
1007    // ========================================================================
1008    // Tests for parse_unified_diff special case handling (Requirements 4.1-4.6)
1009    // ========================================================================
1010
1011    #[test]
1012    fn skips_binary_files() {
1013        // Binary file should be skipped, but subsequent text file should be parsed
1014        let diff = r#"
1015diff --git a/image.png b/image.png
1016index 0000000..1111111 100644
1017Binary files a/image.png and b/image.png differ
1018diff --git a/src/lib.rs b/src/lib.rs
1019--- a/src/lib.rs
1020+++ b/src/lib.rs
1021@@ -1,1 +1,2 @@
1022 fn a() {}
1023+fn b() {}
1024"#;
1025
1026        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1027        assert_eq!(stats.files, 1);
1028        assert_eq!(stats.lines, 1);
1029        assert_eq!(lines[0].path, "src/lib.rs");
1030        assert_eq!(lines[0].content, "fn b() {}");
1031    }
1032
1033    #[test]
1034    fn skips_submodule_changes() {
1035        // Submodule change should be skipped, but subsequent file should be parsed
1036        let diff = r#"
1037diff --git a/vendor/lib b/vendor/lib
1038index abc1234..def5678 160000
1039--- a/vendor/lib
1040+++ b/vendor/lib
1041@@ -1 +1 @@
1042-Subproject commit abc1234567890abcdef1234567890abcdef123456
1043+Subproject commit def5678901234567890abcdef1234567890abcdef
1044diff --git a/src/main.rs b/src/main.rs
1045--- a/src/main.rs
1046+++ b/src/main.rs
1047@@ -1,1 +1,2 @@
1048 fn main() {}
1049+fn helper() {}
1050"#;
1051
1052        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1053        assert_eq!(stats.files, 1);
1054        assert_eq!(stats.lines, 1);
1055        assert_eq!(lines[0].path, "src/main.rs");
1056        assert_eq!(lines[0].content, "fn helper() {}");
1057    }
1058
1059    #[test]
1060    fn skips_deleted_files_for_added_scope() {
1061        // Deleted file should be skipped for Added scope, but subsequent file should be parsed
1062        let diff = r#"
1063diff --git a/old_file.rs b/old_file.rs
1064deleted file mode 100644
1065index abc1234..0000000
1066--- a/old_file.rs
1067+++ /dev/null
1068@@ -1,3 +0,0 @@
1069-fn old() {}
1070-fn deprecated() {}
1071-fn removed() {}
1072diff --git a/new_file.rs b/new_file.rs
1073new file mode 100644
1074--- /dev/null
1075+++ b/new_file.rs
1076@@ -0,0 +1,1 @@
1077+fn new() {}
1078"#;
1079
1080        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1081        assert_eq!(stats.files, 1);
1082        assert_eq!(stats.lines, 1);
1083        assert_eq!(lines[0].path, "new_file.rs");
1084        assert_eq!(lines[0].content, "fn new() {}");
1085    }
1086
1087    #[test]
1088    fn deleted_scope_includes_deleted_files() {
1089        let diff = r#"
1090diff --git a/old_file.rs b/old_file.rs
1091deleted file mode 100644
1092index abc1234..0000000
1093--- a/old_file.rs
1094+++ /dev/null
1095@@ -1,3 +0,0 @@
1096-fn old() {}
1097-fn deprecated() {}
1098-fn removed() {}
1099"#;
1100
1101        let (lines, stats) = parse_unified_diff(diff, Scope::Deleted).unwrap();
1102        assert_eq!(stats.files, 1);
1103        assert_eq!(stats.lines, 3);
1104        assert_eq!(lines[0].line, 1);
1105        assert_eq!(lines[1].line, 2);
1106        assert_eq!(lines[2].line, 3);
1107        assert_eq!(lines[0].content, "fn old() {}");
1108        assert_eq!(lines[1].content, "fn deprecated() {}");
1109        assert_eq!(lines[2].content, "fn removed() {}");
1110        assert!(lines.iter().all(|l| l.kind == ChangeKind::Deleted));
1111    }
1112
1113    #[test]
1114    fn skips_mode_only_changes() {
1115        // Mode-only change (chmod) should be skipped, but subsequent file should be parsed
1116        let diff = r#"
1117diff --git a/script.sh b/script.sh
1118old mode 100644
1119new mode 100755
1120diff --git a/src/lib.rs b/src/lib.rs
1121--- a/src/lib.rs
1122+++ b/src/lib.rs
1123@@ -1,1 +1,2 @@
1124 fn a() {}
1125+fn b() {}
1126"#;
1127
1128        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1129        assert_eq!(stats.files, 1);
1130        assert_eq!(stats.lines, 1);
1131        assert_eq!(lines[0].path, "src/lib.rs");
1132        assert_eq!(lines[0].content, "fn b() {}");
1133    }
1134
1135    #[test]
1136    fn uses_new_path_for_renamed_files() {
1137        // Renamed file should use the new path
1138        let diff = r#"
1139diff --git a/old/path.rs b/new/path.rs
1140similarity index 95%
1141rename from old/path.rs
1142rename to new/path.rs
1143--- a/old/path.rs
1144+++ b/new/path.rs
1145@@ -1,1 +1,2 @@
1146 fn existing() {}
1147+fn added() {}
1148"#;
1149
1150        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1151        assert_eq!(stats.files, 1);
1152        assert_eq!(stats.lines, 1);
1153        assert_eq!(lines[0].path, "new/path.rs");
1154        assert_eq!(lines[0].content, "fn added() {}");
1155    }
1156
1157    #[test]
1158    fn parses_quoted_paths_in_headers() {
1159        let diff = r#"
1160diff --git "a/dir name/file.rs" "b/dir name/file.rs"
1161--- "a/dir name/file.rs"
1162+++ "b/dir name/file.rs"
1163@@ -1,1 +1,2 @@
1164 fn a() {}
1165+fn b() {}
1166"#;
1167
1168        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1169        assert_eq!(stats.files, 1);
1170        assert_eq!(stats.lines, 1);
1171        assert_eq!(lines[0].path, "dir name/file.rs");
1172        assert_eq!(lines[0].content, "fn b() {}");
1173    }
1174
1175    #[test]
1176    fn ignores_lines_outside_hunks() {
1177        let diff = r#"
1178diff --git a/src/lib.rs b/src/lib.rs
1179--- a/src/lib.rs
1180+++ b/src/lib.rs
1181+fn should_not_be_seen()
1182@@ -1,1 +1,2 @@
1183 fn a() {}
1184+fn b() {}
1185"#;
1186
1187        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1188        assert_eq!(stats.files, 1);
1189        assert_eq!(stats.lines, 1);
1190        assert_eq!(lines[0].content, "fn b() {}");
1191    }
1192
1193    #[test]
1194    fn skips_file_markers_inside_hunks() {
1195        let diff = r#"
1196diff --git a/src/lib.rs b/src/lib.rs
1197--- a/src/lib.rs
1198+++ b/src/lib.rs
1199@@ -1,1 +1,3 @@
1200 fn a() {}
1201++++not_a_marker
1202+fn b() {}
1203"#;
1204
1205        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1206        assert_eq!(stats.files, 1);
1207        assert_eq!(stats.lines, 1);
1208        assert_eq!(lines[0].content, "fn b() {}");
1209    }
1210
1211    #[test]
1212    fn continues_after_malformed_hunk_header() {
1213        // Malformed hunk header should not stop processing of subsequent files
1214        let diff = r#"
1215diff --git a/bad.rs b/bad.rs
1216--- a/bad.rs
1217+++ b/bad.rs
1218@@ malformed hunk header
1219+this line should be skipped
1220diff --git a/good.rs b/good.rs
1221--- a/good.rs
1222+++ b/good.rs
1223@@ -1,1 +1,2 @@
1224 fn a() {}
1225+fn b() {}
1226"#;
1227
1228        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1229        assert_eq!(stats.files, 1);
1230        assert_eq!(stats.lines, 1);
1231        assert_eq!(lines[0].path, "good.rs");
1232        assert_eq!(lines[0].content, "fn b() {}");
1233    }
1234
1235    #[test]
1236    fn handles_multiple_special_cases_in_one_diff() {
1237        // Multiple special cases should all be handled correctly
1238        let diff = r#"
1239diff --git a/image.png b/image.png
1240Binary files a/image.png and b/image.png differ
1241diff --git a/vendor/lib b/vendor/lib
1242--- a/vendor/lib
1243+++ b/vendor/lib
1244@@ -1 +1 @@
1245-Subproject commit abc123
1246+Subproject commit def456
1247diff --git a/old.rs b/old.rs
1248deleted file mode 100644
1249--- a/old.rs
1250+++ /dev/null
1251@@ -1 +0,0 @@
1252-fn old() {}
1253diff --git a/script.sh b/script.sh
1254old mode 100644
1255new mode 100755
1256diff --git a/renamed.rs b/newname.rs
1257rename from renamed.rs
1258rename to newname.rs
1259--- a/renamed.rs
1260+++ b/newname.rs
1261@@ -1,1 +1,2 @@
1262 fn existing() {}
1263+fn in_renamed() {}
1264diff --git a/normal.rs b/normal.rs
1265--- a/normal.rs
1266+++ b/normal.rs
1267@@ -1,1 +1,2 @@
1268 fn a() {}
1269+fn in_normal() {}
1270"#;
1271
1272        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1273        assert_eq!(stats.files, 2);
1274        assert_eq!(stats.lines, 2);
1275
1276        // Check renamed file uses new path
1277        let renamed_line = lines.iter().find(|l| l.content == "fn in_renamed() {}");
1278        assert!(renamed_line.is_some());
1279        assert_eq!(renamed_line.unwrap().path, "newname.rs");
1280
1281        // Check normal file is parsed
1282        let normal_line = lines.iter().find(|l| l.content == "fn in_normal() {}");
1283        assert!(normal_line.is_some());
1284        assert_eq!(normal_line.unwrap().path, "normal.rs");
1285    }
1286
1287    #[test]
1288    fn binary_file_added_from_dev_null() {
1289        // New binary file should be skipped
1290        let diff = r#"
1291diff --git a/new_image.png b/new_image.png
1292new file mode 100644
1293Binary files /dev/null and b/new_image.png differ
1294diff --git a/src/lib.rs b/src/lib.rs
1295--- a/src/lib.rs
1296+++ b/src/lib.rs
1297@@ -1,1 +1,2 @@
1298 fn a() {}
1299+fn b() {}
1300"#;
1301
1302        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1303        assert_eq!(stats.files, 1);
1304        assert_eq!(lines[0].path, "src/lib.rs");
1305    }
1306
1307    #[test]
1308    fn renamed_file_with_no_content_changes() {
1309        // Pure rename with no content changes should still use new path if there are hunks
1310        let diff = r#"
1311diff --git a/old.rs b/new.rs
1312similarity index 100%
1313rename from old.rs
1314rename to new.rs
1315"#;
1316
1317        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1318        // No content changes, so no lines extracted
1319        assert_eq!(stats.files, 0);
1320        assert_eq!(stats.lines, 0);
1321        assert!(lines.is_empty());
1322    }
1323
1324    // ========================================================================
1325    // Edge case tests (Requirements 9.1, 9.2, 9.6)
1326    // ========================================================================
1327
1328    /// Tests for empty hunks - hunks with no added/removed lines (Requirement 9.1)
1329    #[test]
1330    fn handles_empty_hunk_context_only() {
1331        // A hunk with only context lines (no additions or removals)
1332        let diff = r#"
1333diff --git a/src/lib.rs b/src/lib.rs
1334--- a/src/lib.rs
1335+++ b/src/lib.rs
1336@@ -1,3 +1,3 @@
1337 fn a() {}
1338 fn b() {}
1339 fn c() {}
1340"#;
1341
1342        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1343        assert_eq!(stats.files, 0);
1344        assert_eq!(stats.lines, 0);
1345        assert!(lines.is_empty());
1346    }
1347
1348    #[test]
1349    fn handles_empty_hunk_zero_lines() {
1350        // A hunk header indicating zero lines in the new file
1351        let diff = r#"
1352diff --git a/empty.rs b/empty.rs
1353new file mode 100644
1354--- /dev/null
1355+++ b/empty.rs
1356@@ -0,0 +0,0 @@
1357"#;
1358
1359        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1360        assert_eq!(stats.files, 0);
1361        assert_eq!(stats.lines, 0);
1362        assert!(lines.is_empty());
1363    }
1364
1365    #[test]
1366    fn handles_multiple_empty_hunks() {
1367        // Multiple hunks with only context lines
1368        let diff = r#"
1369diff --git a/src/lib.rs b/src/lib.rs
1370--- a/src/lib.rs
1371+++ b/src/lib.rs
1372@@ -1,2 +1,2 @@
1373 fn a() {}
1374 fn b() {}
1375@@ -10,2 +10,2 @@
1376 fn x() {}
1377 fn y() {}
1378"#;
1379
1380        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1381        assert_eq!(stats.files, 0);
1382        assert_eq!(stats.lines, 0);
1383        assert!(lines.is_empty());
1384    }
1385
1386    /// Tests for multiple files in a single diff (Requirement 9.2)
1387    #[test]
1388    fn parses_multiple_files_in_single_diff() {
1389        let diff = r#"
1390diff --git a/src/first.rs b/src/first.rs
1391--- a/src/first.rs
1392+++ b/src/first.rs
1393@@ -1,1 +1,2 @@
1394 fn first_existing() {}
1395+fn first_added() {}
1396diff --git a/src/second.rs b/src/second.rs
1397--- a/src/second.rs
1398+++ b/src/second.rs
1399@@ -1,1 +1,2 @@
1400 fn second_existing() {}
1401+fn second_added() {}
1402diff --git a/src/third.rs b/src/third.rs
1403--- a/src/third.rs
1404+++ b/src/third.rs
1405@@ -1,1 +1,2 @@
1406 fn third_existing() {}
1407+fn third_added() {}
1408"#;
1409
1410        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1411        assert_eq!(stats.files, 3);
1412        assert_eq!(stats.lines, 3);
1413
1414        // Verify each file is parsed correctly
1415        let first = lines.iter().find(|l| l.path == "src/first.rs");
1416        assert!(first.is_some());
1417        assert_eq!(first.unwrap().content, "fn first_added() {}");
1418        assert_eq!(first.unwrap().line, 2);
1419
1420        let second = lines.iter().find(|l| l.path == "src/second.rs");
1421        assert!(second.is_some());
1422        assert_eq!(second.unwrap().content, "fn second_added() {}");
1423        assert_eq!(second.unwrap().line, 2);
1424
1425        let third = lines.iter().find(|l| l.path == "src/third.rs");
1426        assert!(third.is_some());
1427        assert_eq!(third.unwrap().content, "fn third_added() {}");
1428        assert_eq!(third.unwrap().line, 2);
1429    }
1430
1431    #[test]
1432    fn parses_multiple_files_with_multiple_hunks_each() {
1433        let diff = r#"
1434diff --git a/src/a.rs b/src/a.rs
1435--- a/src/a.rs
1436+++ b/src/a.rs
1437@@ -1,1 +1,2 @@
1438 fn a1() {}
1439+fn a2() {}
1440@@ -10,1 +11,2 @@
1441 fn a10() {}
1442+fn a11() {}
1443diff --git a/src/b.rs b/src/b.rs
1444--- a/src/b.rs
1445+++ b/src/b.rs
1446@@ -1,1 +1,2 @@
1447 fn b1() {}
1448+fn b2() {}
1449@@ -20,1 +21,2 @@
1450 fn b20() {}
1451+fn b21() {}
1452"#;
1453
1454        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1455        assert_eq!(stats.files, 2);
1456        assert_eq!(stats.lines, 4);
1457
1458        // Verify lines from file a
1459        let a_lines: Vec<_> = lines.iter().filter(|l| l.path == "src/a.rs").collect();
1460        assert_eq!(a_lines.len(), 2);
1461        assert!(
1462            a_lines
1463                .iter()
1464                .any(|l| l.content == "fn a2() {}" && l.line == 2)
1465        );
1466        assert!(
1467            a_lines
1468                .iter()
1469                .any(|l| l.content == "fn a11() {}" && l.line == 12)
1470        );
1471
1472        // Verify lines from file b
1473        let b_lines: Vec<_> = lines.iter().filter(|l| l.path == "src/b.rs").collect();
1474        assert_eq!(b_lines.len(), 2);
1475        assert!(
1476            b_lines
1477                .iter()
1478                .any(|l| l.content == "fn b2() {}" && l.line == 2)
1479        );
1480        assert!(
1481            b_lines
1482                .iter()
1483                .any(|l| l.content == "fn b21() {}" && l.line == 22)
1484        );
1485    }
1486
1487    #[test]
1488    fn parses_multiple_files_preserves_order() {
1489        let diff = r#"
1490diff --git a/z.rs b/z.rs
1491--- a/z.rs
1492+++ b/z.rs
1493@@ -1,1 +1,2 @@
1494 fn z() {}
1495+fn z_added() {}
1496diff --git a/a.rs b/a.rs
1497--- a/a.rs
1498+++ b/a.rs
1499@@ -1,1 +1,2 @@
1500 fn a() {}
1501+fn a_added() {}
1502diff --git a/m.rs b/m.rs
1503--- a/m.rs
1504+++ b/m.rs
1505@@ -1,1 +1,2 @@
1506 fn m() {}
1507+fn m_added() {}
1508"#;
1509
1510        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1511        assert_eq!(stats.files, 3);
1512        assert_eq!(stats.lines, 3);
1513
1514        // Verify order is preserved (z, a, m - not alphabetically sorted)
1515        assert_eq!(lines[0].path, "z.rs");
1516        assert_eq!(lines[1].path, "a.rs");
1517        assert_eq!(lines[2].path, "m.rs");
1518    }
1519
1520    /// Tests for Unicode content in diff lines (Requirement 9.6)
1521    #[test]
1522    fn handles_unicode_in_added_lines() {
1523        let diff = r#"
1524diff --git a/src/i18n.rs b/src/i18n.rs
1525--- a/src/i18n.rs
1526+++ b/src/i18n.rs
1527@@ -1,1 +1,4 @@
1528 fn greet() {}
1529+let hello_jp = "こんにちは";
1530+let hello_cn = "你好";
1531+let hello_kr = "안녕하세요";
1532"#;
1533
1534        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1535        assert_eq!(stats.files, 1);
1536        assert_eq!(stats.lines, 3);
1537
1538        assert_eq!(lines[0].content, "let hello_jp = \"こんにちは\";");
1539        assert_eq!(lines[1].content, "let hello_cn = \"你好\";");
1540        assert_eq!(lines[2].content, "let hello_kr = \"안녕하세요\";");
1541    }
1542
1543    #[test]
1544    fn handles_unicode_emojis_in_diff() {
1545        let diff = r#"
1546diff --git a/src/emoji.rs b/src/emoji.rs
1547--- a/src/emoji.rs
1548+++ b/src/emoji.rs
1549@@ -1,1 +1,3 @@
1550 fn emoji() {}
1551+let rocket = "🚀";
1552+let thumbs_up = "👍🏽";
1553"#;
1554
1555        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1556        assert_eq!(stats.files, 1);
1557        assert_eq!(stats.lines, 2);
1558
1559        assert_eq!(lines[0].content, "let rocket = \"🚀\";");
1560        assert_eq!(lines[1].content, "let thumbs_up = \"👍🏽\";");
1561    }
1562
1563    #[test]
1564    fn handles_unicode_in_file_paths() {
1565        let diff = r#"
1566diff --git a/src/日本語.rs b/src/日本語.rs
1567--- a/src/日本語.rs
1568+++ b/src/日本語.rs
1569@@ -1,1 +1,2 @@
1570 fn existing() {}
1571+fn 新しい関数() {}
1572"#;
1573
1574        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1575        assert_eq!(stats.files, 1);
1576        assert_eq!(stats.lines, 1);
1577
1578        assert_eq!(lines[0].path, "src/日本語.rs");
1579        assert_eq!(lines[0].content, "fn 新しい関数() {}");
1580    }
1581
1582    #[test]
1583    fn handles_unicode_special_characters() {
1584        // Test various Unicode categories: math symbols, arrows, box drawing, etc.
1585        let diff = r#"
1586diff --git a/src/symbols.rs b/src/symbols.rs
1587--- a/src/symbols.rs
1588+++ b/src/symbols.rs
1589@@ -1,1 +1,5 @@
1590 fn symbols() {}
1591+let math = "∑∏∫∂∇";
1592+let arrows = "→←↑↓↔";
1593+let box_drawing = "┌─┐│└─┘";
1594+let currency = "€£¥₹₽";
1595"#;
1596
1597        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1598        assert_eq!(stats.files, 1);
1599        assert_eq!(stats.lines, 4);
1600
1601        assert_eq!(lines[0].content, "let math = \"∑∏∫∂∇\";");
1602        assert_eq!(lines[1].content, "let arrows = \"→←↑↓↔\";");
1603        assert_eq!(lines[2].content, "let box_drawing = \"┌─┐│└─┘\";");
1604        assert_eq!(lines[3].content, "let currency = \"€£¥₹₽\";");
1605    }
1606
1607    #[test]
1608    fn handles_mixed_unicode_and_ascii() {
1609        let diff = r#"
1610diff --git a/src/mixed.rs b/src/mixed.rs
1611--- a/src/mixed.rs
1612+++ b/src/mixed.rs
1613@@ -1,1 +1,2 @@
1614 fn mixed() {}
1615+let message = "Hello 世界! Welcome to Rust 🦀";
1616"#;
1617
1618        let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap();
1619        assert_eq!(stats.files, 1);
1620        assert_eq!(stats.lines, 1);
1621
1622        assert_eq!(
1623            lines[0].content,
1624            "let message = \"Hello 世界! Welcome to Rust 🦀\";"
1625        );
1626    }
1627
1628    #[test]
1629    fn handles_unicode_in_changed_lines() {
1630        // Test that Unicode works correctly with Scope::Changed
1631        let diff = r#"
1632diff --git a/src/i18n.rs b/src/i18n.rs
1633--- a/src/i18n.rs
1634+++ b/src/i18n.rs
1635@@ -1,1 +1,1 @@
1636-let greeting = "Hello";
1637+let greeting = "Привет";
1638"#;
1639
1640        let (lines, stats) = parse_unified_diff(diff, Scope::Changed).unwrap();
1641        assert_eq!(stats.files, 1);
1642        assert_eq!(stats.lines, 1);
1643
1644        assert_eq!(lines[0].content, "let greeting = \"Привет\";");
1645        assert_eq!(lines[0].kind, ChangeKind::Changed);
1646    }
1647}