use std::mem;
use std::cmp::max;
use super::{Diff};
pub fn find_diff<S: OperationScore>(old: &str, new: &str, scorer: &S) -> Diff {
let mut diff = Diff::new();
let mut insert_index = 0;
let mut delete_index = 0;
let old_rev = old.chars().rev().collect::<String>();
let new_rev = new.chars().rev().collect::<String>();
hirschberg(old, new, &old_rev, &new_rev, scorer, &mut diff, &mut insert_index, &mut delete_index);
diff
}
macro_rules! do_insert {
($s: expr, $index: expr, $diff: expr) => (
{
let bytes = $s.bytes().collect::<Vec<_> >();
let byte_len = bytes.len();
$diff.add_insert(*$index, bytes);
*$index += byte_len;
}
);
}
macro_rules! do_delete {
($length: expr, $delete_index: expr, $insert_index: expr, $diff: expr) => (
{
$diff.add_delete(*$insert_index - *$delete_index, $length);
*$delete_index += $length;
*$insert_index += $length;
}
);
}
fn hirschberg<S: OperationScore>(old: &str, new: &str, old_rev: &str, new_rev: &str, scorer: &S, diff: &mut Diff, insert_index: &mut usize, delete_index: &mut usize) {
trace!("'{}' ({}) '{}' ({})", old, old_rev, new, new_rev);
let old_len = old.len();
let new_len = new.len();
if old_len == 0 {
do_insert!(new, insert_index, diff);
} else if new_len == 0 {
do_delete!(old_len, delete_index, insert_index, diff);
}
else if old_len == 1 {
let old_char = old.chars().next().unwrap();
match new.chars().position(|c| c == old_char) {
Some(position) => {
if position > 0 {
do_insert!(new[..position], insert_index, diff);
}
*insert_index += 1;
if new_len - position > 1 {
do_insert!(new[position + 1..], insert_index, diff);
}
} None => {
do_insert!(new, insert_index, diff);
do_delete!(1, delete_index, insert_index, diff);
}
}
}
else if new_len == 1 {
let new_char = new.chars().next().unwrap();
match old.chars().position(|c| c == new_char) {
Some(position) => {
if position > 0 {
do_delete!(position, delete_index, insert_index, diff);
}
*insert_index += 1;
if old_len - position > 1 {
let delete_len = old_len - position - 1;
do_delete!(delete_len, delete_index, insert_index, diff);
}
} None => {
do_insert!(new, insert_index, diff);
do_delete!(old_len, delete_index, insert_index, diff);
}
}
} else {
let old_mid = old_len / 2;
let score_l = nw_score(&old[..old_mid], new, scorer);
let score_r = nw_score(&old_rev[..old_len - old_mid], new_rev, scorer);
let new_mid = score_l.iter()
.zip(score_r.iter().rev())
.map(|(l, r)| l + r)
.zip(0..new_len + 1).max().unwrap().1;
hirschberg(&old[..old_mid], &new[..new_mid], &old_rev[old_len - old_mid..], &new_rev[new_len - new_mid..], scorer, diff, insert_index, delete_index);
hirschberg(&old[old_mid..], &new[new_mid..], &old_rev[..old_len - old_mid], &new_rev[..new_len - new_mid], scorer, diff, insert_index, delete_index);
}
}
pub trait OperationScore {
fn insert_score(&self, c: char) -> i32;
fn delete_score(&self, c: char) -> i32;
fn substitution_score(&self, old: char, new: char) -> i32;
fn match_score(&self, c: char) -> i32;
}
pub struct EditDistance;
impl OperationScore for EditDistance {
#[inline]
fn insert_score(&self, _: char) -> i32 {
-1
}
#[inline]
fn delete_score(&self, _: char) -> i32 {
-1
}
#[inline]
fn substitution_score(&self, _: char, _: char) -> i32 {
-2
}
#[inline]
fn match_score(&self, _: char) -> i32 {
0
}
}
fn nw_score<S: OperationScore>(old: &str, new: &str, scorer: &S) -> Vec<i32> {
trace!("nw_score for '{}' - '{}'", old, new);
let row_len = new.len() + 1;
let mut last_row = Vec::with_capacity(row_len);
let mut this_row = Vec::with_capacity(row_len);
let mut total_insert = 0;
last_row.push(0);
for new_char in new.chars() {
total_insert += scorer.insert_score(new_char);
last_row.push(total_insert);
}
trace!("{:?}", last_row);
for old_char in old.chars() {
this_row.push(last_row[0] + scorer.delete_score(old_char));
for (new_index, new_char) in new.chars().enumerate() {
let score_sub = last_row[new_index] + if old_char == new_char {
scorer.match_score(old_char)
} else {
scorer.substitution_score(old_char, new_char)
};
let score_del = last_row[new_index + 1] + scorer.delete_score(old_char);
let score_ins = this_row[new_index] + scorer.insert_score(new_char);
this_row.push(max(max(score_sub, score_del), score_ins))
}
trace!("{:?}", this_row);
last_row = mem::replace(&mut this_row, Vec::with_capacity(row_len));
}
last_row
}
#[cfg(test)]
mod test {
extern crate env_logger;
use super::{nw_score, find_diff, EditDistance, OperationScore};
use super::super::{Insert, Delete, Diff};
struct ExampleScores;
macro_rules! check_diff {
($start: tt | $new: tt | $scorer: tt | $(($insert_pos : tt, $insert_value: tt)),* | $(($delete_pos: tt, $delete_len: tt)),*) => {
{
check_diff_workaround!($start; $new; $scorer; $(($insert_pos, $insert_value)),*; $(($delete_pos, $delete_len)),*)
}
};
}
macro_rules! check_diff_workaround {
($start: expr ; $new: expr ; $scorer: expr; $(($insert_pos : tt, $insert_value: tt)),* ; $(($delete_pos: tt, $delete_len: tt)),*) => {
{
let diff = find_diff($start, $new, &$scorer);
assert_eq!(Diff {
inserts: vec![$(Insert{position: $insert_pos, data: $insert_value.bytes().collect()}),*],
deletes: vec![$(Delete{position: $delete_pos, len: $delete_len}),*]
}, diff);
assert_eq!(diff.apply_to_string($start).unwrap(), $new.to_string());
}
};
}
impl OperationScore for ExampleScores {
#[inline]
fn insert_score(&self, _: char) -> i32 {
-2
}
#[inline]
fn delete_score(&self, _: char) -> i32 {
-2
}
#[inline]
fn substitution_score(&self, _: char, _: char) -> i32 {
-1
}
#[inline]
fn match_score(&self, _: char) -> i32 {
2
}
}
#[test]
fn score() {
assert_eq!(nw_score("ACGC", "CGTAT", &EditDistance{}), vec![-4, -3, -2, -3, -4, -5]);
assert_eq!(nw_score("AGTA", "TATGC", &EditDistance{}), vec![-4, -3, -2, -3, -4, -5]);
assert_eq!(nw_score("ACGC", "CGTAT", &ExampleScores{}), vec![-8, -4, 0, 1, -1, -3]);
assert_eq!(nw_score("AGTA", "TATGC", &ExampleScores{}), vec![-8, -4, 0, -2, -1, -3]);
}
#[test]
fn do_find_diff() {
check_diff!(
"kitten" |
"kettle" |
EditDistance |
(1, "e"), (5, "l") |
(2, 1), (6, 1)
);
check_diff!(
"meadow" |
"yellowing" |
EditDistance |
(0, "y"), (3, "ll"), (9, "ing") |
(1, 1), (4, 2)
);
check_diff!(" I've" |
" I" |
EditDistance |
|
(2, 3)
);
check_diff!(" I've got a new place" |
" I found a new place" |
EditDistance |
(6, "f"), (9, "und") |
(2, 3), (4, 1), (8, 1)
);
check_diff!(
"Since my baby left me I've got a new place to dwell\nI walk down a lonely street to Heartbreak Hotel." |
"Since my baby left me I found a new place to dwell\nDown at the end of 'Lonely Street' to 'Heartbreak Hotel.'" |
EditDistance |
(27, "f"), (30, "und"), (56, "Down"), (64, "t the"), (72, "en"), (75, " "), (77, "f"), (81, "'L"), (92, "S"), (99, "'"), (104, "'"), (122, "'") |
(23, 3), (25, 1), (29, 1),(55, 1), (56, 1), (62, 2), (69, 2), (72, 3), (79, 1)
);
}
}