Skip to main content

liboxen/repositories/diffs/
utf8_diff.rs

1use crate::error::OxenError;
2use crate::model::diff::change_type::ChangeType;
3use crate::model::diff::text_diff::LineDiff;
4use crate::model::diff::text_diff::TextDiff;
5
6use difference::{Changeset, Difference};
7use std::path::PathBuf;
8
9/// Adds a slice of lines from a text block to the result vector with a given modification type.
10fn add_lines_to_diff(
11    result: &mut TextDiff,
12    text_block: &str,
13    modification: ChangeType,
14    lines_to_take: Option<(usize, usize)>,
15) {
16    let lines: Vec<&str> = text_block.split('\n').collect();
17    let (start, end) = lines_to_take.unwrap_or((0, lines.len()));
18
19    // Ensure start and end are within bounds
20    let start = start.min(lines.len());
21    let end = end.min(lines.len());
22
23    if start >= end {
24        return;
25    }
26
27    for line in &lines[start..end] {
28        result.lines.push(LineDiff {
29            modification,
30            text: line.to_string(),
31        });
32    }
33}
34
35pub fn diff(
36    original_data: Option<String>,
37    version_file_1: Option<PathBuf>,
38    compare_data: Option<String>,
39    version_file_2: Option<PathBuf>,
40) -> Result<TextDiff, OxenError> {
41    let mut result = TextDiff {
42        filename1: version_file_1
43            .clone()
44            .map(|p| p.to_string_lossy().to_string()),
45        filename2: version_file_2
46            .clone()
47            .map(|p| p.to_string_lossy().to_string()),
48        ..Default::default()
49    };
50
51    let original_data = original_data.unwrap_or_default();
52    let compare_data = compare_data.unwrap_or_default();
53
54    let Changeset { diffs, .. } = Changeset::new(&original_data, &compare_data, "\n");
55    log::debug!("Changeset created with {} diffs", diffs.len());
56
57    // Find the indices of all Add or Rem changes
58    let change_indices: Vec<usize> = diffs
59        .iter()
60        .enumerate()
61        .filter(|(_, d)| !matches!(d, Difference::Same(_)))
62        .map(|(i, _)| i)
63        .collect();
64
65    // If there are no changes, return an empty diff
66    if change_indices.is_empty() {
67        log::debug!("No changes detected, returning empty TextDiff.");
68        return Ok(result);
69    }
70
71    let mut last_processed_diff_idx: i32 = -1;
72    let mut post_context_lines_from_prev_chunk = 0;
73    let mut is_first_chunk = true;
74
75    for &change_idx in &change_indices {
76        if (change_idx as i32) <= last_processed_diff_idx {
77            continue;
78        }
79        log::debug!("Processing change at index: {change_idx}");
80
81        if !is_first_chunk {
82            result.lines.push(LineDiff {
83                modification: ChangeType::Unchanged,
84                text: "...".to_string(),
85            });
86        }
87        is_first_chunk = false;
88
89        let context_diff_idx = change_idx.saturating_sub(1);
90        let mut pre_context_lines_to_skip = 0;
91
92        if (context_diff_idx as i32) == last_processed_diff_idx {
93            pre_context_lines_to_skip = post_context_lines_from_prev_chunk;
94        }
95
96        if change_idx > 0
97            && let Some(Difference::Same(text)) = diffs.get(context_diff_idx)
98        {
99            let lines: Vec<_> = text.split('\n').collect();
100            let desired_start = lines.len().saturating_sub(3);
101            let actual_start = desired_start.max(pre_context_lines_to_skip);
102            log::debug!(
103                "Adding pre-context from diff [{context_diff_idx}], lines [{actual_start}..]"
104            );
105            add_lines_to_diff(
106                &mut result,
107                text,
108                ChangeType::Unchanged,
109                Some((actual_start, lines.len())),
110            );
111        }
112        post_context_lines_from_prev_chunk = 0;
113
114        let mut current_idx = change_idx;
115        while let Some(diff) = diffs.get(current_idx) {
116            match diff {
117                Difference::Add(text) => {
118                    log::debug!("Adding Added block at index {current_idx}");
119                    add_lines_to_diff(&mut result, text, ChangeType::Added, None);
120                }
121                Difference::Rem(text) => {
122                    log::debug!("Adding Removed block at index {current_idx}");
123                    add_lines_to_diff(&mut result, text, ChangeType::Removed, None);
124                }
125                Difference::Same(_) => {
126                    break;
127                }
128            }
129            last_processed_diff_idx = current_idx as i32;
130            current_idx += 1;
131        }
132
133        if let Some(Difference::Same(text)) = diffs.get(current_idx) {
134            let lines: Vec<_> = text.split('\n').collect();
135            let count = 2.min(lines.len());
136            log::debug!("Adding post-context from diff [{current_idx}], lines [..{count}]");
137            add_lines_to_diff(&mut result, text, ChangeType::Unchanged, Some((0, count)));
138
139            last_processed_diff_idx = current_idx as i32;
140            post_context_lines_from_prev_chunk = count;
141        }
142    }
143
144    log::debug!(
145        "contextual_diff returning result with {} lines",
146        result.lines.len()
147    );
148    Ok(result)
149}