use std::cmp::max;
use difference::Difference;
#[allow(dead_code)]
#[derive(Clone, PartialEq)]
pub enum SplitType {
Character,
Word,
Line,
SmartWord,
}
#[allow(dead_code)]
pub fn diff(orig: &str, edit: &str, split: &SplitType) -> (i32, Vec<Difference>) {
let ch = Changeset::new(orig, edit, split);
(ch.distance, ch.diffs)
}
pub struct Changeset {
pub diffs: Vec<Difference>,
pub split: SplitType,
pub distance: i32,
}
impl Changeset {
pub fn new(orig: &str, edit: &str, split: &SplitType) -> Changeset {
let (dist, common) = lcs(orig, edit, split);
Changeset {
diffs: merge(orig, edit, &common, split),
split: split.to_owned(),
distance: dist,
}
}
}
fn strsplit<'a>(s: &'a str, split: &str) -> Vec<&'a str> {
let mut si = s.split(split);
if split == "" {
si.next();
}
let mut v: Vec<&str> = si.collect();
if split == "" {
v.pop();
}
v
}
fn smartsplit<'a>(s: &'a str) -> Vec<&'a str> {
let slice = s.as_bytes();
let (mut out, last, _, _) = s.chars().fold(
(Vec::new(), 0, 0, 0),
|(mut sum, mut last, mut current, mut state), e| {
let new_state = if e.is_alphanumeric() { 1 } else { state + 1 } as i32;
if state != new_state {
if state > 0 {
sum.push(std::str::from_utf8(&slice[last..current]).unwrap());
}
last = current;
state = new_state;
}
current += e.len_utf8();
(sum, last, current, state)
},
);
out.push(std::str::from_utf8(&slice[last..]).unwrap());
out
}
#[allow(non_snake_case)]
#[cfg_attr(feature = "cargo-clippy", allow(many_single_char_names))]
pub fn lcs(orig: &str, edit: &str, split: &SplitType) -> (i32, String) {
let (a, b) = match split {
SplitType::Character => (strsplit(orig, ""), strsplit(edit, "")),
SplitType::Word => (strsplit(orig, " "), strsplit(edit, " ")),
SplitType::Line => (strsplit(orig, "\n"), strsplit(edit, "\n")),
SplitType::SmartWord => (smartsplit(orig), smartsplit(edit)),
};
let N = a.len();
let M = b.len();
let mut idx: Vec<usize> = Vec::with_capacity(N * M);
idx.resize(N * M, 0);
for i in 0..N {
for j in 0..M {
if b[j] == a[i] {
if i == 0 || j == 0 {
idx[i * M + j] = 1;
} else {
idx[i * M + j] = idx[(i - 1) * M + j - 1] + 1;
}
} else if i == 0 {
if j == 0 {
idx[i * M + j] = 0;
} else {
idx[i * M + j] = idx[i * M + j - 1];
}
} else if j == 0 {
idx[i * M + j] = idx[(i - 1) * M + j];
} else {
idx[i * M + j] = max(idx[i * M + j - 1], idx[(i - 1) * M + j]);
}
}
}
let mut i = (N as isize) - 1;
let mut j = (M as isize) - 1;
let mut lcs = Vec::new();
while i >= 0 && j >= 0 {
let ui = i as usize;
let uj = j as usize;
if a[ui] == b[uj] {
lcs.push(a[ui]);
i -= 1;
j -= 1;
} else if j == 0 && i == 0 {
break;
} else if i == 0 || idx[ui * M + uj - 1] > idx[(ui - 1) * M + uj] {
j -= 1;
} else {
i -= 1;
}
}
lcs.reverse();
(
(N + M - 2 * lcs.len()) as i32,
lcs.join(if *split == SplitType::Word { " " } else { "" }),
)
}
pub fn merge(orig: &str, edit: &str, common: &str, split: &SplitType) -> Vec<Difference> {
let mut ret = Vec::new();
let l = match split {
SplitType::Character => orig.split("").collect(),
SplitType::Word => orig.split(" ").collect(),
SplitType::Line => orig.split("\n").collect(),
SplitType::SmartWord => smartsplit(orig),
};
let r = match split {
SplitType::Character => edit.split("").collect(),
SplitType::Word => edit.split(" ").collect(),
SplitType::Line => edit.split("\n").collect(),
SplitType::SmartWord => smartsplit(edit),
};
let c = match split {
SplitType::Character => common.split("").collect(),
SplitType::Word => common.split(" ").collect(),
SplitType::Line => common.split("\n").collect(),
SplitType::SmartWord => smartsplit(common),
};
let mut l = l.iter().map(|v| *v).peekable();
let mut r = r.iter().map(|v| *v).peekable();
let mut c = c.iter().map(|v| *v).peekable();
if orig == "" {
l.next();
}
if edit == "" {
r.next();
}
if common == "" {
c.next();
}
while l.peek().is_some() || r.peek().is_some() {
let mut same = Vec::new();
while l.peek().is_some() && l.peek() == c.peek() && r.peek() == c.peek() {
same.push(l.next().unwrap());
r.next();
c.next();
}
if !same.is_empty() {
let joined = same.join(if *split == SplitType::Word { " " } else { "" });
if split != &SplitType::Character || joined != "" {
ret.push(Difference::Same(joined));
}
}
let mut rem = Vec::new();
while l.peek().is_some() && l.peek() != c.peek() {
rem.push(l.next().unwrap());
}
if !rem.is_empty() {
ret.push(Difference::Rem(rem.join(if *split == SplitType::Word {
" "
} else {
""
})));
}
let mut add = Vec::new();
while r.peek().is_some() && r.peek() != c.peek() {
add.push(r.next().unwrap());
}
if !add.is_empty() {
ret.push(Difference::Add(add.join(if *split == SplitType::Word {
" "
} else {
""
})));
}
}
ret
}