#[cfg(test)]
use alloc::vec;
use alloc::vec::Vec;
use core::fmt::{self, Debug};
use core::hash::{Hash, Hasher};
use core::ops::{Add, Index, Range};
use crate::types::MapType;
#[inline(always)]
#[allow(clippy::neg_cmp_op_on_partial_ord)]
pub fn is_empty_range<T: PartialOrd<T>>(range: &Range<T>) -> bool {
!(range.start < range.end)
}
#[inline(always)]
pub(crate) fn stable_hash<T: Hash + ?Sized>(value: &T) -> u64 {
struct FnvHasher(u64);
impl Hasher for FnvHasher {
#[inline(always)]
fn finish(&self) -> u64 {
self.0
}
#[inline(always)]
fn write(&mut self, bytes: &[u8]) {
for byte in bytes {
self.0 ^= *byte as u64;
self.0 = self.0.wrapping_mul(0x100000001b3);
}
}
}
let mut hasher = FnvHasher(0xcbf29ce484222325);
value.hash(&mut hasher);
hasher.finish()
}
pub struct UniqueItem<'a, Idx: ?Sized> {
lookup: &'a Idx,
index: usize,
}
impl<Idx: ?Sized> UniqueItem<'_, Idx>
where
Idx: Index<usize>,
{
#[inline(always)]
pub fn value(&self) -> &Idx::Output {
&self.lookup[self.index]
}
#[inline(always)]
pub fn original_index(&self) -> usize {
self.index
}
}
impl<'a, Idx: Index<usize> + 'a> Debug for UniqueItem<'a, Idx>
where
Idx::Output: Debug,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("UniqueItem")
.field("value", &self.value())
.field("original_index", &self.original_index())
.finish()
}
}
impl<'a, 'b, A, B> PartialEq<UniqueItem<'a, A>> for UniqueItem<'b, B>
where
A: Index<usize> + 'b + ?Sized,
B: Index<usize> + 'b + ?Sized,
B::Output: PartialEq<A::Output>,
{
#[inline(always)]
fn eq(&self, other: &UniqueItem<'a, A>) -> bool {
self.value() == other.value()
}
}
impl<'a, Idx> Eq for UniqueItem<'a, Idx>
where
Idx: Index<usize> + ?Sized,
Idx::Output: Eq,
{
}
impl<Idx> Hash for UniqueItem<'_, Idx>
where
Idx: Index<usize> + ?Sized,
Idx::Output: Hash,
{
#[inline(always)]
fn hash<H: Hasher>(&self, state: &mut H) {
self.value().hash(state);
}
}
pub fn unique<Idx>(lookup: &Idx, range: Range<usize>) -> Vec<UniqueItem<'_, Idx>>
where
Idx: Index<usize> + ?Sized,
Idx::Output: Hash + Eq,
{
let mut by_hash = MapType::<u64, Vec<(usize, Option<usize>)>>::new();
for index in range {
let hash = stable_hash(&lookup[index]);
let bucket = by_hash.entry(hash).or_default();
let mut found = false;
for (representative, unique_index) in bucket.iter_mut() {
if lookup[index] == lookup[*representative] {
if unique_index.is_some() {
*unique_index = None;
}
found = true;
break;
}
}
if !found {
bucket.push((index, Some(index)));
}
}
let mut rv = by_hash
.into_iter()
.flat_map(|(_, bucket)| bucket.into_iter())
.filter_map(|(_, unique_index)| unique_index)
.map(|index| UniqueItem { lookup, index })
.collect::<Vec<_>>();
rv.sort_by_key(|a| a.original_index());
rv
}
pub fn common_prefix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
if is_empty_range(&old_range) || is_empty_range(&new_range) {
return 0;
}
new_range
.zip(old_range)
.take_while(
#[inline(always)]
|x| new[x.0] == old[x.1],
)
.count()
}
pub fn common_suffix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
if is_empty_range(&old_range) || is_empty_range(&new_range) {
return 0;
}
new_range
.rev()
.zip(old_range.rev())
.take_while(
#[inline(always)]
|x| new[x.0] == old[x.1],
)
.count()
}
struct OffsetLookup<Int> {
offset: usize,
vec: Vec<Int>,
}
impl<Int> Index<usize> for OffsetLookup<Int> {
type Output = Int;
#[inline(always)]
fn index(&self, index: usize) -> &Self::Output {
&self.vec[index - self.offset]
}
}
pub struct IdentifyDistinct<Int> {
old: OffsetLookup<Int>,
new: OffsetLookup<Int>,
}
impl<Int> IdentifyDistinct<Int>
where
Int: Add<Output = Int> + From<u8> + Default + Copy,
{
pub fn new<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Self
where
Old: Index<usize> + ?Sized,
Old::Output: Eq + Hash,
New: Index<usize> + ?Sized,
New::Output: Eq + Hash + PartialEq<Old::Output>,
{
#[derive(Clone, Copy)]
enum Representative {
Old(usize),
New(usize),
}
let mut map = MapType::<u64, Vec<(Representative, Int)>>::new();
let mut old_seq = Vec::new();
let mut new_seq = Vec::new();
let mut next_id = Int::default();
let step = Int::from(1);
let old_start = old_range.start;
let new_start = new_range.start;
for idx in old_range {
let hash = stable_hash(&old[idx]);
let bucket = map.entry(hash).or_default();
let id = if let Some((_, id)) = bucket.iter().find(
|(rep, _)| matches!(rep, Representative::Old(rep_idx) if old[idx] == old[*rep_idx]),
) {
*id
} else {
let id = next_id;
next_id = next_id + step;
bucket.push((Representative::Old(idx), id));
id
};
old_seq.push(id);
}
for idx in new_range {
let hash = stable_hash(&new[idx]);
let bucket = map.entry(hash).or_default();
let id = if let Some((_, id)) = bucket.iter().find(|(rep, _)| match rep {
Representative::Old(rep_idx) => new[idx] == old[*rep_idx],
Representative::New(rep_idx) => new[idx] == new[*rep_idx],
}) {
*id
} else {
let id = next_id;
next_id = next_id + step;
bucket.push((Representative::New(idx), id));
id
};
new_seq.push(id);
}
IdentifyDistinct {
old: OffsetLookup {
offset: old_start,
vec: old_seq,
},
new: OffsetLookup {
offset: new_start,
vec: new_seq,
},
}
}
pub fn old_lookup(&self) -> &impl Index<usize, Output = Int> {
&self.old
}
pub fn new_lookup(&self) -> &impl Index<usize, Output = Int> {
&self.new
}
pub fn old_range(&self) -> Range<usize> {
self.old.offset..self.old.offset + self.old.vec.len()
}
pub fn new_range(&self) -> Range<usize> {
self.new.offset..self.new.offset + self.new.vec.len()
}
}
#[test]
fn test_unique() {
let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
.into_iter()
.map(|x| (*x.value(), x.original_index()))
.collect::<Vec<_>>();
assert_eq!(u, vec![('a', 0), ('c', 2)]);
}
#[test]
fn test_int_hasher() {
let ih = IdentifyDistinct::<u8>::new(
&["", "foo", "bar", "baz"][..],
1..4,
&["", "foo", "blah", "baz"][..],
1..4,
);
assert_eq!(ih.old_lookup()[1], 0);
assert_eq!(ih.old_lookup()[2], 1);
assert_eq!(ih.old_lookup()[3], 2);
assert_eq!(ih.new_lookup()[1], 0);
assert_eq!(ih.new_lookup()[2], 3);
assert_eq!(ih.new_lookup()[3], 2);
assert_eq!(ih.old_range(), 1..4);
assert_eq!(ih.new_range(), 1..4);
}
#[test]
fn test_common_prefix_len() {
assert_eq!(
common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
7
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
4
);
}
#[test]
fn test_common_suffix_len() {
assert_eq!(
common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
4
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
2
);
}