use std::borrow::Cow;
use std::collections::HashMap;
use ropey::{Rope, RopeSlice};
use super::trimnl;
const MIN_SIZE: usize = 32;
#[derive(Clone, Debug)]
pub enum DeltaElement<'a> {
Copy(usize, usize), Insert(RopeSlice<'a>),
}
#[derive(Clone, Debug)]
pub struct Delta<'a> {
pub els: Vec<DeltaElement<'a>>,
}
fn find_ne_char_back(
base: &Rope,
base_off: usize,
target: &Rope,
targ_off: usize,
stop: Option<usize>,
) -> usize {
let stop = stop.unwrap_or(usize::MAX);
let mut start = 0;
while base_off >= start && targ_off >= start {
let base_idx = base_off - start;
let targ_idx = targ_off - start;
if start >= stop {
return start;
}
let base_char = base.get_char(base_idx);
let targ_char = target.get_char(targ_idx);
if let (Some(c1), Some(c2)) = (base_char, targ_char) {
if c1 == c2 {
start += 1;
continue;
}
}
break;
}
return start;
}
fn find_ne_char(base: &Rope, base_off: usize, target: &Rope, targ_off: usize) -> usize {
let mut end = 0;
loop {
let base_idx = base_off + end;
let targ_idx = targ_off + end;
if let (Some(c1), Some(c2)) = (base.get_char(base_idx), target.get_char(targ_idx)) {
if c1 == c2 {
end += 1;
continue;
}
}
break;
}
return end;
}
fn find_min_diff_range(base: &Rope, target: &Rope) -> (usize, usize) {
let b_end = base.len_chars();
let t_end = target.len_chars();
let start = find_ne_char(base, 0, target, 0);
let unscanned = b_end.min(t_end) - start;
let end = match unscanned {
0 => 0,
n => {
let b_idx = b_end.saturating_sub(1);
let t_idx = t_end.saturating_sub(1);
find_ne_char_back(base, b_idx, target, t_idx, Some(n))
},
};
return (start, end);
}
pub fn compute_delta<'a>(base: &Rope, target: &'a Rope) -> Delta<'a> {
let mut builder = DiffBuilder::default();
let (start_offset, diff_end) = find_min_diff_range(base, target);
let target_end = target.len_chars() - diff_end;
if start_offset > 0 {
builder.copy(0, 0, start_offset);
}
if start_offset == base.len_chars() && target.len_chars() == base.len_chars() {
return builder.into_delta(target);
}
let line_hashes = make_line_hashes(base, MIN_SIZE);
let line_count = target.len_lines();
let mut matches = Vec::with_capacity(line_count);
let mut targ_line_offset = 0;
let mut prev_base = 0;
let mut needs_subseq = false;
for line in target.slice(start_offset..target_end).lines().map(trimnl) {
let line = Cow::from(line);
let non_ws = non_ws_offset(&line);
if line.len() - non_ws >= MIN_SIZE {
if let Some(base_off) = line_hashes.get(&line[non_ws..]) {
let targ_off = targ_line_offset + non_ws;
matches.push((start_offset + targ_off, *base_off));
if *base_off < prev_base {
needs_subseq = true;
}
prev_base = *base_off;
}
}
targ_line_offset += line.len();
}
let longest_subseq = if needs_subseq {
longest_increasing_region_set(&matches)
} else {
matches
};
let mut prev_end = start_offset;
for (targ_off, base_off) in longest_subseq {
if targ_off <= prev_end {
continue;
}
let (left_dist, mut right_dist) = expand_match(base, target, base_off, targ_off, prev_end);
right_dist = right_dist.min(target_end - targ_off);
let targ_start = targ_off - left_dist;
let base_start = base_off - left_dist;
let len = left_dist + right_dist;
prev_end = targ_start + len;
builder.copy(base_start, targ_start, len);
}
if diff_end > 0 {
builder.copy(base.len_chars() - diff_end, target.len_chars() - diff_end, diff_end);
}
builder.into_delta(target)
}
fn expand_match(
base: &Rope,
target: &Rope,
base_off: usize,
targ_off: usize,
prev_match_targ_end: usize,
) -> (usize, usize) {
let max_left = targ_off - prev_match_targ_end;
let start = find_ne_char_back(base, base_off, target, targ_off, Some(max_left));
let end = find_ne_char(base, base_off, target, targ_off);
(start.min(max_left), end)
}
fn longest_increasing_region_set(items: &[(usize, usize)]) -> Vec<(usize, usize)> {
let mut result = vec![0];
let mut prev_chain = vec![0; items.len()];
for i in 1..items.len() {
let last_idx = *result.last().unwrap();
if items[last_idx].1 < items[i].1 {
prev_chain[i] = last_idx;
result.push(i);
continue;
}
let next_idx = match result.binary_search_by(|&j| items[j].1.cmp(&items[i].1)) {
Ok(_) => continue, Err(idx) => idx,
};
if items[i].1 < items[result[next_idx]].1 {
if next_idx > 0 {
prev_chain[i] = result[next_idx - 1];
}
result[next_idx] = i;
}
}
let mut u = result.len();
let mut v = *result.last().unwrap();
while u != 0 {
u -= 1;
result[u] = v;
v = prev_chain[v];
}
result.iter().map(|i| items[*i]).collect()
}
#[inline]
fn non_ws_offset(s: &str) -> usize {
s.as_bytes().iter().take_while(|b| **b == b' ' || **b == b'\t').count()
}
#[derive(Debug, Clone, Copy)]
struct DiffOp {
target_idx: usize,
base_idx: usize,
len: usize,
}
#[derive(Debug, Clone, Default)]
pub struct DiffBuilder {
ops: Vec<DiffOp>,
}
impl DiffBuilder {
fn copy(&mut self, base: usize, target: usize, len: usize) {
if let Some(prev) = self.ops.last_mut() {
let prev_end = prev.target_idx + prev.len;
let base_end = prev.base_idx + prev.len;
assert!(prev_end <= target, "{} <= {} prev {:?}", prev_end, target, &prev);
if prev_end == target && base_end == base {
prev.len += len;
return;
}
}
self.ops.push(DiffOp { target_idx: target, base_idx: base, len })
}
fn into_delta(self, target: &Rope) -> Delta<'_> {
let mut els = Vec::with_capacity(self.ops.len() * 2);
let mut targ_pos = 0;
for DiffOp { base_idx, target_idx, len } in self.ops {
if target_idx > targ_pos {
let slice = target.slice(targ_pos..target_idx);
els.push(DeltaElement::Insert(slice));
}
els.push(DeltaElement::Copy(base_idx, base_idx + len));
targ_pos = target_idx + len;
}
if targ_pos < target.len_chars() {
let slice = target.slice(targ_pos..target.len_chars());
els.push(DeltaElement::Insert(slice));
}
Delta { els }
}
}
fn make_line_hashes(base: &Rope, min_size: usize) -> HashMap<Cow<'_, str>, usize> {
let mut offset = 0;
let mut line_hashes = HashMap::with_capacity(base.len_chars() / 60);
for line in base.lines().map(trimnl) {
let line = Cow::from(line);
let non_ws = non_ws_offset(&line);
if line.len() - non_ws >= min_size {
let cow = match line {
Cow::Owned(ref s) => Cow::Owned(s[non_ws..].to_string()),
Cow::Borrowed(s) => Cow::Borrowed(&s[non_ws..]),
};
line_hashes.insert(cow, offset + non_ws);
}
offset += line.len();
}
line_hashes
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ne_char() {
let r1 = Rope::from("foobar123");
let r2 = Rope::from("foobar456");
let r3 = Rope::from("afoobar123");
let res = find_ne_char(&r1, 0, &r1, 0);
assert_eq!(res, 9);
let res = find_ne_char(&r1, 0, &r2, 0);
assert_eq!(res, 6);
let res = find_ne_char(&r1, 0, &r3, 0);
assert_eq!(res, 0);
let res = find_ne_char(&r1, 0, &r3, 1);
assert_eq!(res, 9);
let res = find_ne_char(&r2, 0, &r3, 1);
assert_eq!(res, 6);
}
#[test]
fn test_ne_char_back() {
let r1 = Rope::from("foobar123");
let r2 = Rope::from("foobar456");
let r3 = Rope::from("afoobar123");
let res = find_ne_char_back(&r1, 8, &r1, 8, None);
assert_eq!(res, 9);
let res = find_ne_char_back(&r1, 8, &r2, 8, None);
assert_eq!(res, 0);
let res = find_ne_char_back(&r1, 8, &r3, 9, None);
assert_eq!(res, 9);
let res = find_ne_char_back(&r1, 5, &r2, 5, None);
assert_eq!(res, 6);
let res = find_ne_char_back(&r1, 8, &r3, 9, None);
assert_eq!(res, 9);
let res = find_ne_char_back(&r2, 5, &r3, 6, None);
assert_eq!(res, 6);
}
#[test]
fn test_min_diff_range() {
let r1 = Rope::from("hello world\nhello world\nhello\n");
let r2 = Rope::from("hello world\nhello world\nhello world\n");
let res = find_min_diff_range(&r1, &r1);
assert_eq!(res, (30, 0));
let res = find_min_diff_range(&r1, &r2);
assert_eq!(res, (29, 1));
let r1 = Rope::from("abcdef\n1234 5678\nghijkl\n");
let r2 = Rope::from("def\n1234 5678\nghijkl\n");
let res = find_min_diff_range(&r2, &r1);
assert_eq!(res, (0, 21));
}
}