lumen 2.22.0

lumen is a command-line tool that uses AI to generate commit messages, summarise git diffs or past commits, and more.
use similar::{ChangeTag, TextDiff};

use super::types::{expand_tabs, ChangeType, DiffLine, InlineSegment};

/// Check if a string contains meaningful (non-whitespace) content.
fn has_meaningful_content(s: &str) -> bool {
    s.chars().any(|c| !c.is_whitespace())
}

/// Compute word-level diff segments for a pair of modified lines.
/// Returns Some((old_segments, new_segments)) if word-level highlighting is useful,
/// or None if the lines are too different to benefit from word-level highlighting.
///
/// Following Git's diff-highlight approach, word-level highlighting is only shown
/// when a significant portion of the line is unchanged. This prevents noisy
/// highlighting when unrelated lines are paired together.
fn compute_word_diff(
    old_text: &str,
    new_text: &str,
) -> Option<(Vec<InlineSegment>, Vec<InlineSegment>)> {
    let diff = TextDiff::configure().diff_unicode_words(old_text, new_text);

    let mut old_segments = Vec::new();
    let mut new_segments = Vec::new();

    // Track lengths for ratio calculation
    let mut unchanged_len = 0usize;

    for change in diff.iter_all_changes() {
        let text = change.value().to_string();
        match change.tag() {
            ChangeTag::Equal => {
                // Track meaningful (non-whitespace) unchanged content length
                if has_meaningful_content(&text) {
                    unchanged_len += text.trim().len();
                }
                // Unchanged text goes to both sides, not emphasized
                old_segments.push(InlineSegment {
                    text: text.clone(),
                    emphasized: false,
                });
                new_segments.push(InlineSegment {
                    text,
                    emphasized: false,
                });
            }
            ChangeTag::Delete => {
                // Deleted text only goes to old side, emphasized
                old_segments.push(InlineSegment {
                    text,
                    emphasized: true,
                });
            }
            ChangeTag::Insert => {
                // Inserted text only goes to new side, emphasized
                new_segments.push(InlineSegment {
                    text,
                    emphasized: true,
                });
            }
        }
    }

    // Calculate ratio of unchanged content vs total content
    // Use the longer line as the baseline to be conservative
    let old_trimmed_len = old_text.trim().len();
    let new_trimmed_len = new_text.trim().len();
    let total_len = old_trimmed_len.max(new_trimmed_len);

    // Only show word-level diff if at least 20% of content is unchanged
    // This prevents noisy highlighting when unrelated lines are paired
    const MIN_UNCHANGED_RATIO: f64 = 0.20;
    if total_len == 0 || (unchanged_len as f64 / total_len as f64) < MIN_UNCHANGED_RATIO {
        return None;
    }

    Some((old_segments, new_segments))
}

/// Computes a side-by-side diff using GitHub-style pairing.
///
/// This algorithm pairs consecutive deletions with consecutive insertions,
/// showing them on the same row. This avoids the visual offset where a modified
pub fn compute_side_by_side(old: &str, new: &str, tab_width: usize) -> Vec<DiffLine> {
    let diff = TextDiff::from_lines(old, new);
    let mut lines = Vec::new();
    let mut old_num = 1usize;
    let mut new_num = 1usize;

    // Collect all changes first
    let changes: Vec<_> = diff.iter_all_changes().collect();
    let mut i = 0;

    while i < changes.len() {
        let change = &changes[i];

        match change.tag() {
            ChangeTag::Equal => {
                let text = expand_tabs(change.value().trim_end(), tab_width);
                lines.push(DiffLine {
                    old_line: Some((old_num, text.clone())),
                    new_line: Some((new_num, text)),
                    change_type: ChangeType::Equal,
                    old_segments: None,
                    new_segments: None,
                });
                old_num += 1;
                new_num += 1;
                i += 1;
            }
            ChangeTag::Delete => {
                // Collect consecutive deletions
                let mut deletions = Vec::new();
                while i < changes.len() && changes[i].tag() == ChangeTag::Delete {
                    deletions.push((
                        old_num,
                        expand_tabs(changes[i].value().trim_end(), tab_width),
                    ));
                    old_num += 1;
                    i += 1;
                }

                // Collect consecutive insertions that follow
                let mut insertions = Vec::new();
                while i < changes.len() && changes[i].tag() == ChangeTag::Insert {
                    insertions.push((
                        new_num,
                        expand_tabs(changes[i].value().trim_end(), tab_width),
                    ));
                    new_num += 1;
                    i += 1;
                }

                // Pair deletions with insertions
                let max_len = deletions.len().max(insertions.len());
                for j in 0..max_len {
                    let old_line = deletions.get(j).cloned();
                    let new_line = insertions.get(j).cloned();

                    let change_type = match (&old_line, &new_line) {
                        (Some(_), Some(_)) => ChangeType::Modified,
                        (Some(_), None) => ChangeType::Delete,
                        (None, Some(_)) => ChangeType::Insert,
                        (None, None) => unreachable!(),
                    };

                    // Compute word-level diff for modified lines (if similar enough)
                    let (old_segments, new_segments) =
                        if matches!(change_type, ChangeType::Modified) {
                            let old_text = old_line.as_ref().map(|(_, t)| t.as_str()).unwrap_or("");
                            let new_text = new_line.as_ref().map(|(_, t)| t.as_str()).unwrap_or("");
                            // compute_word_diff returns None if lines are too different
                            if let Some((old_segs, new_segs)) =
                                compute_word_diff(old_text, new_text)
                            {
                                (Some(old_segs), Some(new_segs))
                            } else {
                                (None, None)
                            }
                        } else {
                            (None, None)
                        };

                    lines.push(DiffLine {
                        old_line,
                        new_line,
                        change_type,
                        old_segments,
                        new_segments,
                    });
                }
            }
            ChangeTag::Insert => {
                // Handle insertions that aren't preceded by deletions
                lines.push(DiffLine {
                    old_line: None,
                    new_line: Some((new_num, expand_tabs(change.value().trim_end(), tab_width))),
                    change_type: ChangeType::Insert,
                    old_segments: None,
                    new_segments: None,
                });
                new_num += 1;
                i += 1;
            }
        }
    }
    lines
}

pub fn find_hunk_starts(lines: &[DiffLine]) -> Vec<usize> {
    let mut hunks = Vec::new();
    let mut in_hunk = false;

    for (i, line) in lines.iter().enumerate() {
        let is_change = !matches!(line.change_type, ChangeType::Equal);
        if is_change && !in_hunk {
            hunks.push(i);
            in_hunk = true;
        } else if !is_change {
            in_hunk = false;
        }
    }
    hunks
}