diffscope 0.5.3

A composable code review engine with smart analysis, confidence scoring, and professional reporting
use anyhow::Result;
use serde::{Deserialize, Serialize};
use similar::TextDiff;
use std::path::PathBuf;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UnifiedDiff {
    pub file_path: PathBuf,
    pub old_content: Option<String>,
    pub new_content: Option<String>,
    pub hunks: Vec<DiffHunk>,
    pub is_binary: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiffHunk {
    pub old_start: usize,
    pub old_lines: usize,
    pub new_start: usize,
    pub new_lines: usize,
    pub context: String,
    pub changes: Vec<DiffLine>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiffLine {
    pub old_line_no: Option<usize>,
    pub new_line_no: Option<usize>,
    pub change_type: ChangeType,
    pub content: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum ChangeType {
    Added,
    Removed,
    Context,
}

pub struct DiffParser;

impl DiffParser {
    pub fn parse_unified_diff(diff_content: &str) -> Result<Vec<UnifiedDiff>> {
        let mut diffs = Vec::new();
        let lines: Vec<&str> = diff_content.lines().collect();
        let mut i = 0;

        while i < lines.len() {
            if lines[i].starts_with("diff --git") {
                let diff = Self::parse_single_file_diff(&lines, &mut i)?;
                diffs.push(diff);
            } else {
                i += 1;
            }
        }

        Ok(diffs)
    }

    pub fn parse_text_diff(old_content: &str, new_content: &str, file_path: PathBuf) -> Result<UnifiedDiff> {
        let diff = TextDiff::from_lines(old_content, new_content);
        let mut hunks = Vec::new();

        for group in diff.grouped_ops(3) {
            let mut hunk_lines = Vec::new();
            let mut old_start = None;
            let mut new_start = None;
            let mut old_count = 0;
            let mut new_count = 0;

            for op in group {
                match op.tag() {
                    similar::DiffTag::Delete => {
                        for old_idx in op.old_range() {
                            if old_start.is_none() {
                                old_start = Some(old_idx + 1);
                            }
                            old_count += 1;
                            hunk_lines.push(DiffLine {
                                old_line_no: Some(old_idx + 1),
                                new_line_no: None,
                                change_type: ChangeType::Removed,
                                content: diff.old_slices()[old_idx].to_string(),
                            });
                        }
                    }
                    similar::DiffTag::Insert => {
                        for new_idx in op.new_range() {
                            if new_start.is_none() {
                                new_start = Some(new_idx + 1);
                            }
                            new_count += 1;
                            hunk_lines.push(DiffLine {
                                old_line_no: None,
                                new_line_no: Some(new_idx + 1),
                                change_type: ChangeType::Added,
                                content: diff.new_slices()[new_idx].to_string(),
                            });
                        }
                    }
                    similar::DiffTag::Equal => {
                        for (old_idx, new_idx) in op.old_range().zip(op.new_range()) {
                            if old_start.is_none() {
                                old_start = Some(old_idx + 1);
                            }
                            if new_start.is_none() {
                                new_start = Some(new_idx + 1);
                            }
                            old_count += 1;
                            new_count += 1;
                            hunk_lines.push(DiffLine {
                                old_line_no: Some(old_idx + 1),
                                new_line_no: Some(new_idx + 1),
                                change_type: ChangeType::Context,
                                content: diff.old_slices()[old_idx].to_string(),
                            });
                        }
                    }
                    similar::DiffTag::Replace => {
                        for old_idx in op.old_range() {
                            if old_start.is_none() {
                                old_start = Some(old_idx + 1);
                            }
                            old_count += 1;
                            hunk_lines.push(DiffLine {
                                old_line_no: Some(old_idx + 1),
                                new_line_no: None,
                                change_type: ChangeType::Removed,
                                content: diff.old_slices()[old_idx].to_string(),
                            });
                        }
                        for new_idx in op.new_range() {
                            if new_start.is_none() {
                                new_start = Some(new_idx + 1);
                            }
                            new_count += 1;
                            hunk_lines.push(DiffLine {
                                old_line_no: None,
                                new_line_no: Some(new_idx + 1),
                                change_type: ChangeType::Added,
                                content: diff.new_slices()[new_idx].to_string(),
                            });
                        }
                    }
                }
            }

            if !hunk_lines.is_empty() {
                hunks.push(DiffHunk {
                    old_start: old_start.unwrap_or(1),
                    old_lines: old_count,
                    new_start: new_start.unwrap_or(1),
                    new_lines: new_count,
                    context: format!("@@ -{},{} +{},{} @@", 
                        old_start.unwrap_or(1), old_count,
                        new_start.unwrap_or(1), new_count),
                    changes: hunk_lines,
                });
            }
        }

        Ok(UnifiedDiff {
            file_path,
            old_content: Some(old_content.to_string()),
            new_content: Some(new_content.to_string()),
            hunks,
            is_binary: false,
        })
    }

    fn parse_single_file_diff(lines: &[&str], i: &mut usize) -> Result<UnifiedDiff> {
        let file_line = lines[*i];
        let file_path = Self::extract_file_path(file_line)?;
        *i += 1;

        while *i < lines.len() && !lines[*i].starts_with("@@") && !lines[*i].starts_with("diff --git") {
            *i += 1;
        }

        let mut hunks = Vec::new();
        
        while *i < lines.len() && lines[*i].starts_with("@@") {
            let hunk = Self::parse_hunk(lines, i)?;
            hunks.push(hunk);
        }

        Ok(UnifiedDiff {
            file_path: PathBuf::from(file_path),
            old_content: None,
            new_content: None,
            hunks,
            is_binary: false,
        })
    }

    fn extract_file_path(line: &str) -> Result<String> {
        let parts: Vec<&str> = line.split_whitespace().collect();
        if parts.len() >= 4 {
            Ok(parts[2].trim_start_matches("a/").to_string())
        } else {
            anyhow::bail!("Invalid diff header: {}", line)
        }
    }

    fn parse_hunk(lines: &[&str], i: &mut usize) -> Result<DiffHunk> {
        let header = lines[*i];
        let (old_start, old_lines, new_start, new_lines) = Self::parse_hunk_header(header)?;
        *i += 1;

        let mut changes = Vec::new();
        let mut old_line = old_start;
        let mut new_line = new_start;

        while *i < lines.len() && !lines[*i].starts_with("@@") && !lines[*i].starts_with("diff --git") {
            let line = lines[*i];
            if line.is_empty() {
                *i += 1;
                continue;
            }

            let (change_type, content) = match line.chars().next() {
                Some('+') => (ChangeType::Added, &line[1..]),
                Some('-') => (ChangeType::Removed, &line[1..]),
                Some(' ') => (ChangeType::Context, &line[1..]),
                _ => (ChangeType::Context, line),
            };

            let diff_line = match change_type {
                ChangeType::Added => {
                    let line_no = new_line;
                    new_line += 1;
                    DiffLine {
                        old_line_no: None,
                        new_line_no: Some(line_no),
                        change_type,
                        content: content.to_string(),
                    }
                }
                ChangeType::Removed => {
                    let line_no = old_line;
                    old_line += 1;
                    DiffLine {
                        old_line_no: Some(line_no),
                        new_line_no: None,
                        change_type,
                        content: content.to_string(),
                    }
                }
                ChangeType::Context => {
                    let old_no = old_line;
                    let new_no = new_line;
                    old_line += 1;
                    new_line += 1;
                    DiffLine {
                        old_line_no: Some(old_no),
                        new_line_no: Some(new_no),
                        change_type,
                        content: content.to_string(),
                    }
                }
            };

            changes.push(diff_line);
            *i += 1;
        }

        Ok(DiffHunk {
            old_start,
            old_lines,
            new_start,
            new_lines,
            context: header.to_string(),
            changes,
        })
    }

    fn parse_hunk_header(header: &str) -> Result<(usize, usize, usize, usize)> {
        let re = regex::Regex::new(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@")?;
        let caps = re.captures(header)
            .ok_or_else(|| anyhow::anyhow!("Invalid hunk header: {}", header))?;

        let old_start = caps.get(1).unwrap().as_str().parse()?;
        let old_lines = caps.get(2).map_or(1, |m| m.as_str().parse().unwrap_or(1));
        let new_start = caps.get(3).unwrap().as_str().parse()?;
        let new_lines = caps.get(4).map_or(1, |m| m.as_str().parse().unwrap_or(1));

        Ok((old_start, old_lines, new_start, new_lines))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_text_diff() {
        let old = "line1\nline2\nline3";
        let new = "line1\nmodified\nline3\nline4";
        
        let diff = DiffParser::parse_text_diff(old, new, PathBuf::from("test.txt")).unwrap();
        
        assert_eq!(diff.file_path, PathBuf::from("test.txt"));
        assert!(!diff.hunks.is_empty());
    }
}