cached-context 0.3.0

File cache with diff tracking for AI coding agents
Documentation
//! Diff module - computes unified diffs between old and new file content

use similar::{ChangeTag, TextDiff};
use std::collections::HashSet;

/// Result of computing a diff between two file contents
#[derive(Debug, Clone)]
pub struct DiffResult {
    /// Unified diff string
    pub diff: String,
    /// Whether there are any changes
    pub has_changes: bool,
    /// Number of lines changed (added + removed)
    pub lines_changed: usize,
    /// Line numbers in the NEW file that were added or modified
    pub changed_new_lines: Vec<usize>,
}

/// Compute a unified diff between old and new content
///
/// # Arguments
/// * `old_content` - The original file content
/// * `new_content` - The new file content
/// * `file_path` - Path to the file (used in diff headers)
///
/// # Returns
/// A `DiffResult` containing the unified diff string and metadata about changes
pub fn compute_diff(old_content: &str, new_content: &str, file_path: &str) -> DiffResult {
    // Handle edge cases: empty files
    if old_content.is_empty() && new_content.is_empty() {
        return DiffResult {
            diff: String::new(),
            has_changes: false,
            lines_changed: 0,
            changed_new_lines: vec![],
        };
    }

    // Create diff using similar crate
    let diff = TextDiff::from_lines(old_content, new_content);

    // Collect raw diff lines with their line numbers
    let mut raw_lines: Vec<RawDiffLine> = vec![];
    let mut old_line: usize = 1;
    let mut new_line: usize = 1;
    let mut lines_changed: usize = 0;

    for change in diff.iter_all_changes() {
        let tag = change.tag();
        let content = change.value();

        // Remove trailing newline for consistency
        let line_content = content.trim_end_matches('\n');

        match tag {
            ChangeTag::Equal => {
                raw_lines.push(RawDiffLine {
                    line_type: LineType::Keep,
                    content: line_content.to_string(),
                    old_line,
                    new_line,
                });
                old_line += 1;
                new_line += 1;
            }
            ChangeTag::Delete => {
                raw_lines.push(RawDiffLine {
                    line_type: LineType::Remove,
                    content: line_content.to_string(),
                    old_line,
                    new_line,
                });
                old_line += 1;
                lines_changed += 1;
            }
            ChangeTag::Insert => {
                raw_lines.push(RawDiffLine {
                    line_type: LineType::Add,
                    content: line_content.to_string(),
                    old_line,
                    new_line,
                });
                new_line += 1;
                lines_changed += 1;
            }
        }
    }

    // Early return if no changes
    if lines_changed == 0 {
        return DiffResult {
            diff: String::new(),
            has_changes: false,
            lines_changed: 0,
            changed_new_lines: vec![],
        };
    }

    // Collect which lines in the new file were changed
    let mut changed_new_lines_set: HashSet<usize> = HashSet::new();
    for rl in &raw_lines {
        match rl.line_type {
            LineType::Add => {
                changed_new_lines_set.insert(rl.new_line);
            }
            LineType::Remove => {
                // For removals, mark the adjacent new line as affected
                if rl.new_line > 0 {
                    changed_new_lines_set.insert(rl.new_line);
                }
            }
            LineType::Keep => {}
        }
    }

    // Group into hunks with 3 lines of context
    let hunks = format_hunks(&raw_lines);

    // Build the final diff string
    let header = format!("--- a/{}\n+++ b/{}", file_path, file_path);
    let diff = if hunks.is_empty() {
        header
    } else {
        format!("{}\n{}", header, hunks)
    };

    // Sort the changed new lines
    let mut changed_new_lines: Vec<usize> = changed_new_lines_set.into_iter().collect();
    changed_new_lines.sort();

    DiffResult {
        diff,
        has_changes: true,
        lines_changed,
        changed_new_lines,
    }
}

/// Internal representation of a line in the diff
#[derive(Debug, Clone, PartialEq)]
struct RawDiffLine {
    line_type: LineType,
    content: String,
    old_line: usize,
    new_line: usize,
}

/// Type of diff line
#[derive(Debug, Clone, PartialEq)]
enum LineType {
    Keep,
    Add,
    Remove,
}

/// Format raw diff lines into hunks with context
fn format_hunks(raw_lines: &[RawDiffLine]) -> String {
    const CONTEXT: usize = 3;
    let mut hunks: Vec<String> = vec![];
    let mut current_hunk: Vec<&RawDiffLine> = vec![];
    let mut last_change_idx: isize = -999;
    // Track the highest raw_lines index already added to the current hunk.
    // This replaces the O(n) contains() check with an O(1) comparison,
    // fixing the O(n^2) behavior for large diffs.
    let mut max_added_idx: usize = 0;

    for (i, line) in raw_lines.iter().enumerate() {
        if line.line_type != LineType::Keep {
            // If this change is far from the last, start a new hunk
            if i as isize - last_change_idx > (CONTEXT * 2 + 1) as isize && !current_hunk.is_empty()
            {
                hunks.push(format_hunk(&current_hunk));
                current_hunk = vec![];
                // Add leading context
                let ctx_start = i.saturating_sub(CONTEXT);
                current_hunk.extend(&raw_lines[ctx_start..i]);
                max_added_idx = i; // leading context covers up to i-1, next push is i
            } else if current_hunk.is_empty() {
                // First hunk, add leading context
                let ctx_start = i.saturating_sub(CONTEXT);
                current_hunk.extend(&raw_lines[ctx_start..i]);
                max_added_idx = i; // leading context covers up to i-1, next push is i
            }
            // Add all lines between last change context end and this change.
            // Use max_added_idx to skip lines already in the hunk (O(1) instead of O(n)).
            let context_end = (last_change_idx as usize).saturating_add(CONTEXT + 1);
            let fill_start = std::cmp::max(context_end, max_added_idx);
            if fill_start < i {
                current_hunk.extend(&raw_lines[fill_start..i]);
            }
            current_hunk.push(line);
            max_added_idx = i + 1;
            last_change_idx = i as isize;
        } else if (i as isize - last_change_idx) <= (CONTEXT as isize) && !current_hunk.is_empty() {
            current_hunk.push(line);
            max_added_idx = i + 1;
        }
    }

    if !current_hunk.is_empty() {
        hunks.push(format_hunk(&current_hunk));
    }

    hunks.join("\n")
}

/// Format a single hunk
fn format_hunk(lines: &[&RawDiffLine]) -> String {
    if lines.is_empty() {
        return String::new();
    }

    let first_line = lines.first().unwrap();

    // Count lines present in each side of the diff.
    // old_count = lines that exist in old file (Keep + Remove)
    // new_count = lines that exist in new file (Keep + Add)
    // Line-number arithmetic is incorrect for pure insertions/deletions because
    // old_line is not incremented for Add entries and new_line is not incremented
    // for Remove entries, causing the formula `last - first + 1` to return 1
    // instead of 0 for the empty side.
    let old_count = lines
        .iter()
        .filter(|l| l.line_type != LineType::Add)
        .count();
    let new_count = lines
        .iter()
        .filter(|l| l.line_type != LineType::Remove)
        .count();

    let mut hunk = format!(
        "@@ -{},{} +{},{} @@",
        first_line.old_line, old_count, first_line.new_line, new_count
    );

    for line in lines {
        let prefix = match line.line_type {
            LineType::Add => "+",
            LineType::Remove => "-",
            LineType::Keep => " ",
        };
        hunk.push_str(&format!("\n{}{}", prefix, line.content));
    }

    hunk
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_identical_content() {
        let content = "line1\nline2\nline3\n";
        let result = compute_diff(content, content, "test.txt");
        assert!(!result.has_changes);
        assert_eq!(result.lines_changed, 0);
        assert!(result.changed_new_lines.is_empty());
        assert!(result.diff.is_empty());
    }

    #[test]
    fn test_empty_to_content() {
        let old = "";
        let new = "line1\nline2\n";
        let result = compute_diff(old, new, "test.txt");
        assert!(result.has_changes);
        // similar crate uses -1,1 format for empty old content
        assert!(result.diff.contains("+line1"));
        assert!(result.diff.contains("+line2"));
    }

    #[test]
    fn test_content_to_empty() {
        let old = "line1\nline2\n";
        let new = "";
        let result = compute_diff(old, new, "test.txt");
        assert!(result.has_changes);
        // similar crate uses +1,0 format for empty new content
        assert!(result.diff.contains("-line1"));
        assert!(result.diff.contains("-line2"));
    }

    #[test]
    fn test_single_line_change() {
        let old = "hello world\n";
        let new = "hello rust\n";
        let result = compute_diff(old, new, "test.txt");
        assert!(result.has_changes);
        assert!(result.diff.contains("-hello world"));
        assert!(result.diff.contains("+hello rust"));
    }

    #[test]
    fn test_add_lines() {
        let old = "line1\nline3\n";
        let new = "line1\nline2\nline3\n";
        let result = compute_diff(old, new, "test.txt");
        assert!(result.has_changes);
        assert!(result.diff.contains("+line2"));
        // Line 2 is added, so it should be in changed_new_lines
        assert!(result.changed_new_lines.contains(&2));
    }

    #[test]
    fn test_remove_lines() {
        let old = "line1\nline2\nline3\n";
        let new = "line1\nline3\n";
        let result = compute_diff(old, new, "test.txt");
        assert!(result.has_changes);
        assert!(result.diff.contains("-line2"));
    }

    #[test]
    fn test_both_empty() {
        let result = compute_diff("", "", "test.txt");
        assert!(!result.has_changes);
        assert_eq!(result.lines_changed, 0);
    }

    #[test]
    fn test_hunk_header_pure_deletion() {
        // All old lines deleted, nothing added — new side must show count 0
        let result = compute_diff("line1\nline2\n", "", "test.txt");
        assert!(result.has_changes);
        assert!(
            result.diff.contains("+1,0") || result.diff.contains("+0,0"),
            "Pure deletion must show 0 new lines in @@ header, got:\n{}",
            result.diff
        );
    }

    #[test]
    fn test_hunk_header_pure_insertion() {
        // Nothing deleted, all new lines added — old side must show count 0
        let result = compute_diff("", "line1\nline2\n", "test.txt");
        assert!(result.has_changes);
        assert!(
            result.diff.contains("-1,0") || result.diff.contains("-0,0"),
            "Pure insertion must show 0 old lines in @@ header, got:\n{}",
            result.diff
        );
    }

    #[test]
    fn test_hunk_header_single_line_replacement() {
        // 1 old line replaced by 1 new line — must be @@ -1,1 +1,1 @@
        let result = compute_diff("hello world\n", "hello rust\n", "test.txt");
        assert!(result.has_changes);
        assert!(
            result.diff.contains("-1,1") && result.diff.contains("+1,1"),
            "1-for-1 replacement must produce @@ -1,1 +1,1 @@, got:\n{}",
            result.diff
        );
    }

    #[test]
    fn test_changed_new_lines_tracking() {
        let old = "a\nb\nc\nd\ne\n";
        let new = "a\nb\nX\nd\nY\n";
        let result = compute_diff(old, new, "test.txt");
        assert!(result.has_changes);
        // Lines 3 and 5 changed (new positions)
        assert!(result.changed_new_lines.contains(&3));
        assert!(result.changed_new_lines.contains(&5));
    }
}