use std::{
cmp::min,
collections::HashSet,
mem::swap,
};
use itertools::EitherOrBoth;
use pathfinding::{
kuhn_munkres,
matrix::Matrix,
};
use crate::{
VersionAmount,
version::{
VersionComponent,
VersionPiece,
},
};
fn levenshtein<T: Eq>(from: &[T], to: &[T]) -> usize {
let (from_len, to_len) = (from.len(), to.len());
if from_len == 0 {
return to_len;
}
if to_len == 0 {
return from_len;
}
let (from, to, from_len, to_len) = if from_len > to_len {
(to, from, to_len, from_len)
} else {
(from, to, from_len, to_len)
};
let mut prev: Vec<usize> = (0..=to_len).collect();
let mut curr = vec![0; to_len + 1];
for i in 1..=from_len {
curr[0] = i;
for j in 1..=to_len {
let cost = usize::from(from[i - 1] != to[j - 1]);
curr[j] = min(min(curr[j - 1] + 1, prev[j] + 1), prev[j - 1] + cost);
}
swap(&mut prev, &mut curr);
}
prev[to_len]
}
#[must_use]
pub fn match_version_amounts<'a>(
mut from: &'a [VersionAmount],
mut to: &'a [VersionAmount],
) -> Vec<EitherOrBoth<&'a VersionAmount>> {
if from.is_empty() {
return to.iter().map(EitherOrBoth::Right).collect();
}
if to.is_empty() {
return from.iter().map(EitherOrBoth::Left).collect();
}
if from.len() == 1 && to.len() == 1 && from[0].version == to[0].version {
return vec![EitherOrBoth::Both(&from[0], &to[0])];
}
let swapped = if from.len() > to.len() {
(to, from) = (from, to);
true
} else {
false
};
let from_components: Vec<Vec<VersionComponent>> = from
.iter()
.map(|item| {
item
.version
.into_iter()
.filter_map(VersionPiece::component)
.collect()
})
.collect();
let to_components: Vec<Vec<VersionComponent>> = to
.iter()
.map(|item| {
item
.version
.into_iter()
.filter_map(VersionPiece::component)
.collect()
})
.collect();
let mut distances = Matrix::new(from.len(), to.len(), 0_i32);
for i in 0..from.len() {
for j in 0..to.len() {
distances[(i, j)] =
i32::try_from(levenshtein(&from_components[i], &to_components[j]))
.unwrap_or(i32::MAX);
}
}
let (_cost, matchings) =
kuhn_munkres::kuhn_munkres_min::<i32, Matrix<i32>>(&distances);
let mut remaining = (0..to.len()).collect::<HashSet<usize>>();
let mut pairings =
Vec::<EitherOrBoth<&VersionAmount>>::with_capacity(from.len() + to.len());
for (i, j) in matchings.into_iter().enumerate() {
pairings.push(EitherOrBoth::Both(&from[i], &to[j]));
remaining.remove(&j);
}
if !remaining.is_empty() {
let mut remaining = remaining.iter().map(|&j| &to[j]).collect::<Vec<_>>();
remaining.sort_unstable_by(|left, right| left.version.cmp(&right.version));
pairings.extend(remaining.into_iter().map(EitherOrBoth::Right));
}
if swapped {
pairings = pairings.into_iter().map(EitherOrBoth::flip).collect();
}
pairings
}
#[cfg(test)]
mod tests {
use std::num::NonZeroUsize;
use proptest::proptest;
use super::*;
use crate::{
Version,
version::VersionComponent,
};
fn version_amount(version: &str) -> VersionAmount {
VersionAmount::new(version, NonZeroUsize::MIN)
}
proptest! {
#[test]
fn no_crash_edit_dist(from in r"(\PC-)*(\PC)?", to in r"(\PC-)*(\PC)?") {
let from = Version::from(from);
let from: Vec<VersionComponent> = from
.into_iter()
.filter_map(VersionPiece::component)
.collect();
let to = Version::from(to);
let to: Vec<VersionComponent> = to
.into_iter()
.filter_map(VersionPiece::component)
.collect();
levenshtein(&from, &to);
}
#[test]
fn symmetry_edit_dist(from in r"(\PC-)*(\PC)?", to in r"(\PC-)*(\PC)?") {
let from = Version::from(from);
let from: Vec<VersionComponent> = from
.into_iter()
.filter_map(VersionPiece::component)
.collect();
let to = Version::from(to);
let to: Vec<VersionComponent> = to
.into_iter()
.filter_map(VersionPiece::component)
.collect();
let forward = levenshtein(&from, &to);
let backward = levenshtein(&to, &from);
assert_eq!(forward, backward);
}
}
#[test]
fn basic_component_edit_dist() {
let from = Version::from("foo-123.0-man-pages".to_owned());
let from: Vec<VersionComponent> = from
.into_iter()
.filter_map(VersionPiece::component)
.collect();
let to = Version::from("foo-123.4.12-man-pages".to_owned());
let to: Vec<VersionComponent> =
to.into_iter().filter_map(VersionPiece::component).collect();
assert_eq!(levenshtein(&from, &to), 2);
}
#[test]
fn levenshtein_distance_tests() {
assert_eq!(
levenshtein(
&"kitten".chars().collect::<Vec<_>>(),
&"sitting".chars().collect::<Vec<_>>()
),
3
);
assert_eq!(
levenshtein(
&"".chars().collect::<Vec<_>>(),
&"hello".chars().collect::<Vec<_>>()
),
5
);
assert_eq!(
levenshtein(
&"abcd".chars().collect::<Vec<_>>(),
&"dcba".chars().collect::<Vec<_>>()
),
4
);
assert_eq!(
levenshtein(
&"12345".chars().collect::<Vec<_>>(),
&"12345".chars().collect::<Vec<_>>()
),
0
);
assert_eq!(
levenshtein(
&"distance".chars().collect::<Vec<_>>(),
&"difference".chars().collect::<Vec<_>>()
),
5
);
}
#[test]
fn levenshtein_edge_cases() {
assert_eq!(levenshtein::<char>(&[], &[]), 0);
assert_eq!(levenshtein(&['a'], &[]), 1);
assert_eq!(levenshtein(&[], &['a']), 1);
assert_eq!(levenshtein(&['a'], &['b']), 1);
assert_eq!(levenshtein(&['a'], &['a']), 0);
assert_eq!(
levenshtein(
&"ab".chars().collect::<Vec<_>>(),
&"ba".chars().collect::<Vec<_>>()
),
2
);
assert_eq!(
levenshtein(
&"ABC".chars().collect::<Vec<_>>(),
&"abc".chars().collect::<Vec<_>>()
),
3
);
let long = "a".repeat(1000);
assert_eq!(
levenshtein(
&long.chars().collect::<Vec<_>>(),
&long.chars().collect::<Vec<_>>()
),
0
);
let long_a = "a".repeat(1000);
let long_b = "b".repeat(1000);
assert_eq!(
levenshtein(
&long_a.chars().collect::<Vec<_>>(),
&long_b.chars().collect::<Vec<_>>()
),
1000
);
assert_eq!(
levenshtein(
&"こんにちは".chars().collect::<Vec<_>>(),
&"こんばんは".chars().collect::<Vec<_>>()
),
2
);
assert_eq!(
levenshtein(
&"abc".chars().collect::<Vec<_>>(),
&"abcabc".chars().collect::<Vec<_>>()
),
3
);
assert_eq!(levenshtein(&[1, 2, 3], &[1, 2, 3, 4, 5]), 2);
}
#[test]
fn match_version_amounts_matches_similar_versions() {
let left = [version_amount("6.16.0"), version_amount("5.116.0")];
let right = [version_amount("6.17.0"), version_amount("5.116.0-bin")];
let matched = match_version_amounts(&left, &right);
assert_eq!(matched.len(), 2);
assert!(matched.iter().all(EitherOrBoth::has_left));
assert!(matched.iter().all(EitherOrBoth::has_right));
}
#[test]
fn match_version_amounts_empty() {
let empty: &[VersionAmount] = &[];
let versions = [version_amount("1.0.0")];
let result = match_version_amounts(empty, &versions);
assert_eq!(result.len(), 1);
assert!(matches!(result[0], EitherOrBoth::Right(_)));
let result = match_version_amounts(&versions, empty);
assert_eq!(result.len(), 1);
assert!(matches!(result[0], EitherOrBoth::Left(_)));
let result = match_version_amounts(empty, empty);
assert!(result.is_empty());
}
#[test]
fn match_version_amounts_exact_matches() {
let a = [version_amount("1.0.0"), version_amount("2.0.0")];
let b = [version_amount("1.0.0"), version_amount("2.0.0")];
let result = match_version_amounts(&a, &b);
let both_count = result
.iter()
.filter(|result| matches!(result, EitherOrBoth::Both(_, _)))
.count();
assert_eq!(both_count, 2);
}
#[test]
fn match_version_amounts_unequal_sizes() {
let a = [
version_amount("1.0.0"),
version_amount("2.0.0"),
version_amount("3.0.0"),
];
let b = [version_amount("1.0.0")];
assert_eq!(match_version_amounts(&a, &b).len(), 3);
let a = [version_amount("1.0.0")];
let b = [
version_amount("1.0.0"),
version_amount("2.0.0"),
version_amount("3.0.0"),
];
assert_eq!(match_version_amounts(&a, &b).len(), 3);
}
#[test]
fn match_version_amounts_prefers_exact_matches() {
let a = [version_amount("1.0.0"), version_amount("2.0.0")];
let b = [version_amount("1.0.1"), version_amount("2.0.0")];
let result = match_version_amounts(&a, &b);
assert!(result.iter().any(|result| {
matches!(
result,
EitherOrBoth::Both(left, right)
if left.version.name == "2.0.0" && right.version.name == "2.0.0"
)
}));
}
}