#![cfg(test)]
use alloc::format;
use crate::internals::compare::FuzzyHashCompareTarget;
use crate::internals::compare::position_array::{
BlockHashPositionArray,
BlockHashPositionArrayRef,
BlockHashPositionArrayMutRef,
BlockHashPositionArrayData,
BlockHashPositionArrayImpl,
BlockHashPositionArrayImplMut,
BlockHashPositionArrayImplInternal,
BlockHashPositionArrayImplMutInternal,
block_hash_position_array_element,
};
use crate::internals::hash::block::block_hash;
use crate::internals::hash::test_utils::test_blockhash_content_all;
use crate::internals::test_utils::test_recommended_default;
use crate::internals::utils::u64_lsb_ones;
#[cfg(feature = "unchecked")]
use crate::internals::compare::position_array::BlockHashPositionArrayImplUnchecked;
#[test]
fn prerequisite_64bit() {
assert!(u32::try_from(block_hash::FULL_SIZE)
.map(|x| x <= u64::BITS)
.is_ok());
}
#[test]
fn test_has_sequences() {
assert!(block_hash_position_array_element::has_sequences(0, 0));
for len in 1..=100 {
assert!(!block_hash_position_array_element::has_sequences(0, len),
"failed on len={}", len);
}
assert!(block_hash_position_array_element::has_sequences(u64::MAX, 0));
for len in 1..=64 {
assert!(block_hash_position_array_element::has_sequences(u64::MAX, len),
"failed on len={}", len);
}
assert!(!block_hash_position_array_element::has_sequences(u64::MAX, 65));
const STRIPE_1: u64 = 0b_1010_1010_1010_1010_1010_1010_1010_1010_1010_1010_1010_1010_1010_1010_1010_1010;
const STRIPE_2: u64 = 0b_0101_0101_0101_0101_0101_0101_0101_0101_0101_0101_0101_0101_0101_0101_0101_0101;
for stripe in [STRIPE_1, STRIPE_2] {
assert_eq!(u64::MAX, stripe | stripe.rotate_left(1),
"failed on stripe=0b{:064b}", stripe);
assert_eq!(u64::MAX, stripe ^ stripe.rotate_left(1),
"failed on stripe=0b{:064b}", stripe);
assert_eq!(0, stripe & stripe.rotate_left(1),
"failed on stripe=0b{:064b}", stripe);
}
for len in 0..=1 {
assert!(block_hash_position_array_element::has_sequences(STRIPE_1, len),
"failed on len={}", len);
assert!(block_hash_position_array_element::has_sequences(STRIPE_2, len),
"failed on len={}", len);
}
for len in 2..=100 {
assert!(!block_hash_position_array_element::has_sequences(STRIPE_1, len),
"failed on len={}", len);
assert!(!block_hash_position_array_element::has_sequences(STRIPE_2, len),
"failed on len={}", len);
}
for len in 2..=64 {
let base = u64_lsb_ones(len);
let mut aggr_bits: u64 = 0;
for shift in 0..=(u64::BITS - len) {
let seq = base << shift;
aggr_bits |= seq;
assert_eq!(seq >> shift, base,
"failed on len={}, shift={}", len, shift);
for test_len in 1..=100 {
let target = seq;
assert_eq!(
block_hash_position_array_element::has_sequences(target, test_len),
test_len <= len,
"failed on len={}, shift={}", len, shift
);
for stripe in [STRIPE_1, STRIPE_2] {
let mut target = stripe;
target &= !(seq << 1);
target &= !(seq >> 1);
target |= seq;
if test_len < 2 {
assert!(block_hash_position_array_element::has_sequences(target, test_len),
"failed on len={}, shift={}, stripe=0b{:064b}", len, shift, stripe);
} else {
assert_eq!(
block_hash_position_array_element::has_sequences(target, test_len),
test_len <= len,
"failed on len={}, shift={}, stripe=0b{:064b}", len, shift, stripe
);
}
}
}
}
assert_eq!(aggr_bits, u64::MAX, "failed on len={}", len);
for offset in 0..=len {
let mut has_seq = false;
let mut target = u64::MAX;
if offset == len {
has_seq = true;
}
if offset != 64 {
target &= !(1u64 << offset);
}
for pos in ((offset + len + 1)..64).step_by(usize::try_from(len + 1).unwrap()) {
has_seq = true;
target &= !(1u64 << pos);
}
if offset + len + 1 == 64 {
has_seq = true;
}
assert_eq!(has_seq, block_hash_position_array_element::has_sequences(target, len),
"failed on len={}, offset={}", len, offset);
if has_seq {
for test_len in 0..len {
assert!(block_hash_position_array_element::has_sequences(target, test_len),
"failed on len={}, offset={}, test_len={}", len, offset, test_len);
}
} else {
let max_seq_len = u32::max(u64::BITS - 1 - offset, offset);
for test_len in 0..len {
assert_eq!(test_len <= max_seq_len, block_hash_position_array_element::has_sequences(target, test_len),
"failed on len={}, offset={}, test_len={}", len, offset, test_len);
}
}
for test_len in (len + 1)..=100 {
assert!(!block_hash_position_array_element::has_sequences(target, test_len),
"failed on len={}, offset={}, test_len={}", len, offset, test_len);
}
}
}
}
#[test]
fn position_array_impls() {
test_recommended_default!(BlockHashPositionArray);
}
#[test]
fn position_array_glob() {
const BLOCK_HASH_STR: &str = "F123456F89";
let glob = glob::Pattern::new("F[!F][!F][!F][!F][!F][!F]F*").unwrap();
assert!(glob.matches(BLOCK_HASH_STR));
assert_eq!(BLOCK_HASH_STR.chars().filter(|&ch| ch == 'F').count(), 2);
let hash_str = format!("3:{}:", BLOCK_HASH_STR);
let hash = crate::RawFuzzyHash::from_bytes(hash_str.as_bytes()).unwrap();
let mut pa = BlockHashPositionArray::new();
pa.init_from(hash.block_hash_1());
assert_eq!(pa.representation()[5], 0x81);
}
#[test]
fn position_array_usage() {
let mut pa = BlockHashPositionArray::new();
pa.init_from(&[0, 0, 0, 1, 2, 3, 4, 5, 6]);
assert_eq!(pa.len(), 9);
assert!(pa.is_valid());
assert!(pa.is_valid_and_normalized());
assert!(pa.has_common_substring(&[0, 0, 0, 1, 2, 3, 4]));
assert!(pa.has_common_substring(&[0, 1, 2, 3, 4, 5, 6]));
assert!(!pa.has_common_substring(&[1, 2, 3, 4, 5, 6, 7]));
assert!(!pa.has_common_substring(&[0, 0, 0, 0, 1, 2, 3]));
pa.init_from(&[7, 8, 9, 10, 11, 12, 12, 12]);
assert_eq!(pa.len(), 8);
assert!(pa.is_valid());
assert!(pa.is_valid_and_normalized());
pa.init_from(&[0, 0, 0, 0, 1, 2, 3, 4, 5, 6]);
assert_eq!(pa.len(), 10);
assert!(pa.is_valid());
assert!(!pa.is_valid_and_normalized());
assert!(pa.has_common_substring(&[0, 0, 0, 0, 1, 2, 3]));
assert!(pa.has_common_substring(&[0, 0, 0, 1, 2, 3, 4]));
assert!(pa.has_common_substring(&[0, 1, 2, 3, 4, 5, 6]));
assert!(!pa.has_common_substring(&[1, 2, 3, 4, 5, 6, 7]));
pa.init_from(&[7, 8, 9, 10, 11, 12, 12, 12, 12]);
assert_eq!(pa.len(), 9);
assert!(pa.is_valid());
assert!(!pa.is_valid_and_normalized());
pa.clear();
assert_eq!(pa.len(), 0);
assert_eq!(pa, BlockHashPositionArray::new());
}
cfg_if::cfg_if! {
if #[cfg(not(feature = "unchecked"))] {
trait CompositeImpl : BlockHashPositionArrayImpl + BlockHashPositionArrayImplInternal {}
impl<T> CompositeImpl for T
where
T : BlockHashPositionArrayImpl + BlockHashPositionArrayImplInternal
{}
} else {
trait CompositeImpl : BlockHashPositionArrayImpl + BlockHashPositionArrayImplInternal + BlockHashPositionArrayImplUnchecked {}
impl<T> CompositeImpl for T
where
T : BlockHashPositionArrayImpl + BlockHashPositionArrayImplInternal + BlockHashPositionArrayImplUnchecked
{}
}
}
fn check_one_bhpa(bh: &[u8], test_func: &mut dyn FnMut(&dyn CompositeImpl)) {
let mut value = BlockHashPositionArray::new();
value.init_from(bh);
test_func(&value);
}
fn check_one_bhpa_ref(bh: &[u8], test_func: &mut dyn FnMut(&dyn CompositeImpl)) {
let mut value = BlockHashPositionArray::new();
value.init_from(bh);
let value_ref = BlockHashPositionArrayRef(&value.representation, &value.len);
test_func(&value_ref);
}
fn check_one_bhpa_mut_ref(bh: &[u8], test_func: &mut dyn FnMut(&dyn CompositeImpl)) {
let mut value = BlockHashPositionArray::new();
value.init_from(bh);
let value_ref = BlockHashPositionArrayMutRef(&mut value.representation, &mut value.len);
test_func(&value_ref);
}
fn check_data_model_basic(wrapper: &mut impl FnMut(&[u8], &mut dyn FnMut(&dyn CompositeImpl))) {
test_blockhash_content_all(&mut |bh, bh_norm| {
let is_already_normalized = bh == bh_norm;
wrapper(bh, &mut |value| {
assert_eq!(value.is_empty(), bh.is_empty(), "failed on bh={:?}", bh);
assert!(value.is_valid(), "failed on bh={:?}", bh);
assert!(value.is_equiv(bh), "failed on bh={:?}", bh);
assert!(value.is_equiv_internal(bh), "failed on bh={:?}", bh);
assert_eq!(value.is_valid_and_normalized(), is_already_normalized, "failed on bh={:?}", bh);
#[cfg(feature = "unchecked")]
unsafe {
assert!(value.is_equiv_unchecked(bh), "failed on bh={:?}", bh);
}
});
wrapper(bh_norm, &mut |value| {
assert_eq!(value.is_empty(), bh_norm.is_empty(), "failed on bh={:?}", bh);
assert!(value.is_valid(), "failed on bh={:?}", bh);
assert!(value.is_equiv(bh_norm), "failed on bh={:?}", bh);
assert!(value.is_equiv_internal(bh_norm), "failed on bh={:?}", bh);
assert!(value.is_valid_and_normalized(), "failed on bh={:?}", bh);
assert_eq!(value.is_equiv(bh), is_already_normalized, "failed on bh={:?}", bh);
assert_eq!(value.is_equiv_internal(bh), is_already_normalized, "failed on bh={:?}", bh);
#[cfg(feature = "unchecked")]
unsafe {
assert!(value.is_equiv_unchecked(bh_norm), "failed on bh={:?}", bh);
assert_eq!(value.is_equiv_unchecked(bh), is_already_normalized, "failed on bh={:?}", bh);
}
});
});
}
#[test]
fn data_model_basic_bhpa() {
check_data_model_basic(&mut check_one_bhpa);
}
#[test]
fn data_model_basic_bhpa_ref() {
check_data_model_basic(&mut check_one_bhpa_ref);
}
#[test]
fn data_model_basic_bhpa_mut_ref() {
check_data_model_basic(&mut check_one_bhpa_mut_ref);
}
fn check_data_model_inequality(wrapper: &mut impl FnMut(&[u8], &mut dyn FnMut(&dyn CompositeImpl))) {
assert_eq!(block_hash::ALPHABET_SIZE % 2, 0);
test_blockhash_content_all(&mut |bh, bh_norm| {
let mut test = |bh: &[u8]| {
wrapper(bh, &mut |value: &dyn CompositeImpl| {
if bh.is_empty() { return; }
let mut bh_mod = [0u8; block_hash::FULL_SIZE];
let bh_mod = bh_mod[0..bh.len()].as_mut();
bh_mod.copy_from_slice(bh);
for i in 0..bh.len() {
bh_mod[i] ^= 1; assert!(!value.is_equiv(bh_mod), "failed on bh={:?}, i={}", bh, i);
assert!(!value.is_equiv_internal(bh_mod), "failed on bh={:?}, i={}", bh, i);
#[cfg(feature = "unchecked")]
unsafe {
assert!(!value.is_equiv_unchecked(bh_mod), "failed on bh={:?}, i={}", bh, i);
}
bh_mod[i] ^= 1;
assert!(value.is_equiv_internal(bh_mod), "failed on bh={:?}, i={}", bh, i);
}
});
};
test(bh);
test(bh_norm);
});
}
#[test]
fn data_model_inequality_bhpa() {
check_data_model_inequality(&mut check_one_bhpa);
}
#[test]
fn data_model_inequality_bhpa_ref() {
check_data_model_inequality(&mut check_one_bhpa_ref);
}
#[test]
fn data_model_inequality_bhpa_mut_ref() {
check_data_model_inequality(&mut check_one_bhpa_mut_ref);
}
fn check_substring_itself(wrapper: &mut impl FnMut(&[u8], &mut dyn FnMut(&dyn CompositeImpl))) {
test_blockhash_content_all(&mut |bh, bh_norm| {
let mut test = |bh: &[u8]| {
wrapper(bh, &mut |value: &dyn CompositeImpl| {
for len in 1..block_hash::MIN_LCS_FOR_COMPARISON {
for window in bh.windows(len) {
assert!(!value.has_common_substring(window),
"failed on bh={:?}, window={:?}", bh, window);
assert!(!value.has_common_substring_internal(window),
"failed on bh={:?}, window={:?}", bh, window);
#[cfg(feature = "unchecked")]
unsafe {
assert!(!value.has_common_substring_unchecked(window),
"failed on bh={:?}, window={:?}", bh, window);
}
}
}
for len in block_hash::MIN_LCS_FOR_COMPARISON..=bh.len() {
for window in bh.windows(len) {
assert!(value.has_common_substring(window),
"failed on bh={:?}, window={:?}", bh, window);
assert!(value.has_common_substring_internal(window),
"failed on bh={:?}, window={:?}", bh, window);
#[cfg(feature = "unchecked")]
unsafe {
assert!(value.has_common_substring_unchecked(window),
"failed on bh={:?}, window={:?}", bh, window);
}
}
}
});
};
test(bh);
test(bh_norm);
});
}
#[test]
fn substring_itself_bhpa() {
check_substring_itself(&mut check_one_bhpa);
}
#[test]
fn substring_itself_bhpa_ref() {
check_substring_itself(&mut check_one_bhpa_ref);
}
#[test]
fn substring_itself_bhpa_mut_ref() {
check_substring_itself(&mut check_one_bhpa_mut_ref);
}
fn check_substring_fail_example(wrapper: &mut impl FnMut(&[u8], &mut dyn FnMut(&dyn CompositeImpl))) {
const STR1: &[u8] = &[0, 1, 2, 3, 4, 5, 6];
const STR2: &[u8] = &[6, 5, 4, 3, 2, 1, 0];
assert!(STR1.len() == block_hash::MIN_LCS_FOR_COMPARISON);
assert!(STR2.len() == block_hash::MIN_LCS_FOR_COMPARISON);
assert_ne!(STR1, STR2);
#[cfg(feature = "std")]
{
let alphabets =
std::collections::HashSet::<u8>::from_iter(STR1.iter().copied());
assert!(STR2.iter().all(|x| alphabets.contains(x)));
}
wrapper(STR1, &mut |value: &dyn CompositeImpl| {
assert!(!value.has_common_substring(STR2));
assert!(!value.has_common_substring_internal(STR2));
#[cfg(feature = "unchecked")]
unsafe {
assert!(!value.has_common_substring_unchecked(STR2));
}
});
}
#[test]
fn substring_fail_example_bhpa() {
check_substring_fail_example(&mut check_one_bhpa);
}
#[test]
fn substring_fail_example_bhpa_ref() {
check_substring_fail_example(&mut check_one_bhpa_ref);
}
#[test]
fn substring_fail_example_bhpa_mut_ref() {
check_substring_fail_example(&mut check_one_bhpa_mut_ref);
}
fn check_edit_distance_itself(wrapper: &mut impl FnMut(&[u8], &mut dyn FnMut(&dyn CompositeImpl))) {
test_blockhash_content_all(&mut |bh, bh_norm| {
let mut test = |bh: &[u8]| {
wrapper(bh, &mut |value: &dyn CompositeImpl| {
assert_eq!(value.edit_distance(bh), 0, "failed on bh={:?}", bh);
assert_eq!(value.edit_distance_internal(bh), 0, "failed on bh={:?}", bh);
#[cfg(feature = "unchecked")]
unsafe {
assert_eq!(value.edit_distance_unchecked(bh), 0, "failed on bh={:?}", bh);
}
});
};
test(bh);
test(bh_norm);
});
}
#[test]
fn edit_distance_itself_bhpa() {
check_edit_distance_itself(&mut check_one_bhpa);
}
#[test]
fn edit_distance_itself_bhpa_ref() {
check_edit_distance_itself(&mut check_one_bhpa_ref);
}
#[test]
fn edit_distance_itself_bhpa_mut_ref() {
check_edit_distance_itself(&mut check_one_bhpa_mut_ref);
}
fn check_scoring_with_itself(wrapper: &mut impl FnMut(&[u8], &mut dyn FnMut(&dyn CompositeImpl))) {
test_blockhash_content_all(&mut |_bh, bh_norm| {
wrapper(bh_norm, &mut |value| {
let len_norm = u8::try_from(bh_norm.len()).unwrap();
let expected_score = if bh_norm.len() >= block_hash::MIN_LCS_FOR_COMPARISON { 100 } else { 0 };
assert_eq!(value.score_strings_raw(bh_norm), expected_score,
"failed on bh_norm={:?}", bh_norm);
assert_eq!(value.score_strings_raw_internal(bh_norm), expected_score,
"failed on bh_norm={:?}", bh_norm);
#[cfg(feature = "unchecked")]
unsafe {
assert_eq!(value.score_strings_raw_unchecked(bh_norm), expected_score,
"failed on bh_norm={:?}", bh_norm);
}
assert_eq!(
value.score_strings(bh_norm, FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER),
expected_score,
"failed on bh_norm={:?}", bh_norm
);
if bh_norm.len() >= block_hash::MIN_LCS_FOR_COMPARISON {
for log_block_size in 0..FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER {
let score_cap = FuzzyHashCompareTarget::score_cap_on_block_hash_comparison_internal(
log_block_size,
len_norm,
len_norm
).min(100);
let capped_score = expected_score.min(score_cap);
assert_eq!(value.score_strings(bh_norm, log_block_size), capped_score,
"failed on bh_norm={:?}, log_block_size={}", bh_norm, log_block_size);
assert_eq!(value.score_strings_internal(bh_norm, log_block_size), capped_score,
"failed on bh_norm={:?}, log_block_size={}", bh_norm, log_block_size);
#[cfg(feature = "unchecked")]
unsafe {
assert_eq!(value.score_strings_unchecked(bh_norm, log_block_size), capped_score,
"failed on bh_norm={:?}, log_block_size={}", bh_norm, log_block_size);
}
}
}
});
});
}
#[test]
fn scoring_with_itself_bhpa() {
check_scoring_with_itself(&mut check_one_bhpa);
}
#[test]
fn scoring_with_itself_bhpa_ref() {
check_scoring_with_itself(&mut check_one_bhpa_ref);
}
#[test]
fn scoring_with_itself_bhpa_mut_ref() {
check_scoring_with_itself(&mut check_one_bhpa_mut_ref);
}
fn check_data_model_corruption<T>(value: &mut T)
where
T: BlockHashPositionArrayImplMut + BlockHashPositionArrayImplMutInternal
{
assert_eq!(block_hash::FULL_SIZE, 64);
assert_eq!(block_hash::ALPHABET_SIZE, 64);
{
value.clear();
assert!(value.is_valid());
assert!(value.is_valid_and_normalized());
}
{
value.clear();
assert!(value.is_valid());
assert!(value.is_valid_and_normalized());
for len in 1..=u8::MAX {
*value.len_mut() = len;
assert!(!value.is_valid(),
"failed on len={}", len);
assert!(!value.is_valid_and_normalized(),
"failed on len={}", len);
}
for len in 1u8..=64 {
let target_value = u64_lsb_ones(len as u32);
*value.len_mut() = len;
for i in 0..(*value.representation_mut()).len() {
(*value.representation_mut())[i] = target_value;
assert!(value.is_valid(),
"failed on len={}, i={}", len, i);
assert_eq!(value.is_valid_and_normalized(), (len as usize) <= block_hash::MAX_SEQUENCE_SIZE,
"failed on len={}, i={}", len, i);
(*value.representation_mut())[i] = 0;
assert!(!value.is_valid(),
"failed on len={}, i={}", len, i);
assert!(!value.is_valid_and_normalized(),
"failed on len={}, i={}", len, i);
}
}
*value.len_mut() = 64;
(*value.representation_mut())[0] = u64::MAX;
assert!(value.is_valid());
assert!(!value.is_valid_and_normalized());
for len in (64 + 1)..=u8::MAX {
*value.len_mut() = len;
assert!(!value.is_valid(),
"failed on len={}", len);
assert!(!value.is_valid_and_normalized(),
"failed on len={}", len);
}
}
{
for len in 0..=block_hash::FULL_SIZE {
value.clear();
assert!(value.is_valid(),
"failed on len={}", len);
assert!(value.is_valid_and_normalized(),
"failed on len={}", len);
for i in 0..len {
(*value.representation_mut())[i] = 1 << i;
}
*value.len_mut() = len as u8;
assert!(value.is_valid(),
"failed on len={}", len);
assert!(value.is_valid_and_normalized(),
"failed on len={}", len);
for invalid_pos in (len as u32)..u64::BITS {
let bitpos = 1u64 << invalid_pos;
for ch in 0..(*value.representation_mut()).len() {
(*value.representation_mut())[ch] |= bitpos;
assert!(!value.is_valid(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
assert!(!value.is_valid_and_normalized(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
(*value.representation_mut())[ch] &= !bitpos;
assert!(value.is_valid(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
assert!(value.is_valid_and_normalized(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
}
}
}
}
{
for len in 0..=block_hash::FULL_SIZE {
value.clear();
assert!(value.is_valid(),
"failed on len={}", len);
assert!(value.is_valid_and_normalized(),
"failed on len={}", len);
for i in 0..len {
(*value.representation_mut())[i] = 1 << i;
}
*value.len_mut() = len as u8;
assert!(value.is_valid(),
"failed on len={}", len);
assert!(value.is_valid_and_normalized(),
"failed on len={}", len);
for invalid_pos in 0..len {
let bitpos = 1u64 << (invalid_pos as u32);
for ch in 0..(*value.representation_mut()).len() {
(*value.representation_mut())[ch] ^= bitpos;
assert!(!value.is_valid(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
assert!(!value.is_valid_and_normalized(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
(*value.representation_mut())[ch] ^= bitpos;
assert!(value.is_valid(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
assert!(value.is_valid_and_normalized(),
"failed on len={}, invalid_pos={}, ch={}", len, invalid_pos, ch);
}
}
}
}
}
#[test]
fn data_model_corruption_bhpa() {
let mut pa = BlockHashPositionArray::new();
assert!(pa.is_valid());
check_data_model_corruption(&mut pa);
}
#[test]
fn data_model_corruption_bhpa_mut_ref() {
let mut representation = [0; block_hash::ALPHABET_SIZE];
let mut len = 0;
let mut pa = BlockHashPositionArrayMutRef(&mut representation, &mut len);
assert!(pa.is_valid());
check_data_model_corruption(&mut pa);
}
fn has_common_substring_naive(
str1: &[u8],
str2: &[u8]
) -> bool
{
use std::collections::HashSet;
const TARGET_LEN: usize = block_hash::MIN_LCS_FOR_COMPARISON;
let mut set1: HashSet<&[u8]> = HashSet::new();
let mut set2: HashSet<&[u8]> = HashSet::new();
for window in str1.windows(TARGET_LEN) {
set1.insert(window);
}
for window in str2.windows(TARGET_LEN) {
set2.insert(window);
}
!set1.is_disjoint(&set2)
}
#[test]
fn test_has_common_substring_naive() {
assert_eq!(block_hash::MIN_LCS_FOR_COMPARISON, 7);
assert!(!has_common_substring_naive(b"", b""));
assert!(!has_common_substring_naive(b"ABCDEF", b"ABCDEF"));
assert!(has_common_substring_naive(b"ABCDEFG", b"ABCDEFG"));
assert!(has_common_substring_naive(b"00000ABCDEFG", b"ABCDEFG11111"));
assert!(has_common_substring_naive(b"+r/kcOpEYXB+0ZJ", b"7ocOpEYXB+0ZF29"));
assert!(!has_common_substring_naive(b"+r/kcOpEYXX+0ZJ", b"7ocOpEYXB+0ZF29"));
}
#[cfg(feature = "tests-slow")]
#[test]
fn verify_has_common_substring_by_real_blockhash_vectors() {
use core::str::FromStr;
use std::collections::HashSet;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::vec::Vec;
use crate::internals::hash::LongFuzzyHash;
let mut block_hashes = HashSet::new();
for filename in [
"data/testsuite/compare/big_cluster.uniform_blocksize.4x128.txt",
"data/testsuite/compare/malware_all.uniform_blocksize.16x32.txt",
"data/testsuite/compare/various.txt"
] {
let index = BufReader::new(File::open(filename).unwrap());
for hash in index.lines() {
let hash = LongFuzzyHash::from_str(&hash.unwrap()).unwrap();
block_hashes.insert(Vec::from(hash.block_hash_1()));
block_hashes.insert(Vec::from(hash.block_hash_2()));
}
}
let mut pa = BlockHashPositionArray::new();
for bh1 in &block_hashes {
for bh2 in &block_hashes {
pa.init_from(bh1.as_slice());
let expected_value = has_common_substring_naive(bh1.as_slice(), bh2.as_slice());
assert_eq!(
expected_value,
pa.has_common_substring(bh2.as_slice()),
"failed on bh1={:?}, bh2={:?}", bh1, bh2
);
}
}
}
#[cfg(feature = "tests-slow")]
#[test]
fn verify_edit_distance_by_real_blockhash_vectors() {
use core::str::FromStr;
use std::collections::HashSet;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::vec::Vec;
use crate::internals::compare::position_array::{BlockHashPositionArray, BlockHashPositionArrayImpl};
use crate::internals::hash::LongFuzzyHash;
let mut block_hashes = HashSet::new();
for filename in [
"data/testsuite/compare/big_cluster.uniform_blocksize.4x128.txt",
"data/testsuite/compare/malware_all.uniform_blocksize.16x32.txt",
"data/testsuite/compare/various.txt"
] {
let index = BufReader::new(File::open(filename).unwrap());
for hash in index.lines() {
let hash = LongFuzzyHash::from_str(&hash.unwrap()).unwrap();
block_hashes.insert(Vec::from(hash.block_hash_1()));
block_hashes.insert(Vec::from(hash.block_hash_2()));
}
}
let mut pa = BlockHashPositionArray::new();
for bh1 in &block_hashes {
for bh2 in &block_hashes {
pa.init_from(bh1.as_slice());
let dist_from_dp_impl =
crate::internals::compare::test_utils::edit_distn(bh1.as_slice(), bh2.as_slice()) as u32;
let dist_from_fast_impl = pa.edit_distance(bh2.as_slice());
assert_eq!(dist_from_dp_impl, dist_from_fast_impl,
"failed on bh1={:?}, bh2={:?}", bh1, bh2);
}
}
}