use crate::internals::compare::FuzzyHashCompareTarget;
use crate::internals::hash::block::{block_hash, block_size};
use crate::internals::macros::invariant;
use crate::internals::utils::u64_lsb_ones;
pub mod block_hash_position_array_element {
#[inline(always)]
pub const fn has_sequences(pa_elem: u64, len: u32) -> bool {
match len {
0 => true,
1 => pa_elem != 0,
2..=u64::BITS if len < u64::BITS => {
let cont_01 = pa_elem;
let cont_02 = cont_01 & (cont_01 >> 1);
let cont_04 = cont_02 & (cont_02 >> 2);
let cont_08 = cont_04 & (cont_04 >> 4);
let cont_16 = cont_08 & (cont_08 >> 8);
let cont_32 = cont_16 & (cont_16 >> 16);
let mut len = len;
let mut shift;
let mut mask = if len < 4 {
len &= !2;
shift = 2;
cont_02
} else if len < 8 {
len &= !4;
shift = 4;
cont_04
} else if len < 16 {
len &= !8;
shift = 8;
cont_08
} else if len < 32 {
len &= !16;
shift = 16;
cont_16
} else {
len &= !32;
shift = 32;
cont_32
};
if (len & 16) != 0 {
mask &= cont_16 >> shift;
shift += 16;
}
if (len & 8) != 0 {
mask &= cont_08 >> shift;
shift += 8;
}
if (len & 4) != 0 {
mask &= cont_04 >> shift;
shift += 4;
}
if (len & 2) != 0 {
mask &= cont_02 >> shift;
shift += 2;
}
if (len & 1) != 0 {
mask &= cont_01 >> shift;
}
mask != 0
}
u64::BITS => pa_elem == u64::MAX,
_ => false,
}
}
#[inline(always)]
pub const fn has_sequences_const<const LEN: u32>(pa_elem: u64) -> bool {
has_sequences(pa_elem, LEN)
}
}
pub trait BlockHashPositionArrayData {
fn representation(&self) -> &[u64; block_hash::ALPHABET_SIZE];
fn len(&self) -> u8;
#[inline(always)]
fn is_empty(&self) -> bool {
self.len() == 0
}
fn is_valid(&self) -> bool {
let len = self.len();
if len > 64 {
return false;
}
let expected_total: u64 = u64_lsb_ones(len as u32);
let mut total: u64 = 0;
let has_no_multi_occupation = self.representation().iter().all(
#[inline(always)]
|&pos| {
let is_valid = (total & pos) == 0;
total |= pos;
is_valid
},
);
has_no_multi_occupation && total == expected_total
}
fn is_valid_and_normalized(&self) -> bool {
self.is_valid()
&& self.representation().iter().all(
#[inline(always)]
|&pos| {
!block_hash_position_array_element::has_sequences_const::<
{ block_hash::MAX_SEQUENCE_SIZE as u32 + 1 },
>(pos)
},
)
}
}
pub(crate) trait BlockHashPositionArrayDataMut: BlockHashPositionArrayData {
fn representation_mut(&mut self) -> &mut [u64; block_hash::ALPHABET_SIZE];
fn len_mut(&mut self) -> &mut u8;
}
pub trait BlockHashPositionArrayImplInternal: BlockHashPositionArrayData {
#[inline]
fn is_equiv_internal(&self, other: &[u8]) -> bool {
debug_assert!(self.is_valid());
debug_assert!(other.len() <= 64);
let len = self.len();
let representation = self.representation();
(len as usize) == other.len()
&& other.iter().enumerate().all(
#[inline(always)]
|(i, &ch)| {
invariant!((ch as usize) < block_hash::ALPHABET_SIZE);
let value = representation[ch as usize]; value & (1u64 << i) != 0
},
)
}
#[inline(always)]
fn has_common_substring_internal(&self, other: &[u8]) -> bool {
debug_assert!(self.is_valid());
let len = self.len();
let representation = self.representation();
if (len as usize) < block_hash::MIN_LCS_FOR_COMPARISON
|| other.len() < block_hash::MIN_LCS_FOR_COMPARISON
{
return false;
}
let mut l: usize = other.len() - block_hash::MIN_LCS_FOR_COMPARISON;
loop {
invariant!(l < other.len());
invariant!((other[l] as usize) < block_hash::ALPHABET_SIZE);
let mut d: u64 = representation[other[l] as usize]; let r: usize = l + (block_hash::MIN_LCS_FOR_COMPARISON - 1);
while d != 0 {
l += 1;
invariant!(l < other.len());
invariant!((other[l] as usize) < block_hash::ALPHABET_SIZE);
d = (d << 1) & representation[other[l] as usize]; if l == r && d != 0 {
return true;
}
}
if l < block_hash::MIN_LCS_FOR_COMPARISON {
break;
}
l -= block_hash::MIN_LCS_FOR_COMPARISON;
}
false
}
#[inline(always)]
fn edit_distance_internal(&self, other: &[u8]) -> u32 {
let len = self.len();
let representation = self.representation();
debug_assert!(self.is_valid());
debug_assert!((len as usize) <= block_hash::FULL_SIZE);
debug_assert!(other.len() <= block_hash::FULL_SIZE);
let mut v: u64 = !0;
for &ch in other.iter() {
invariant!((ch as usize) < block_hash::ALPHABET_SIZE);
let e: u64 = representation[ch as usize]; let p: u64 = e & v;
v = (v.wrapping_add(p)) | (v.wrapping_sub(p));
}
let llcs = v.count_zeros();
(len as u32) + (other.len() as u32) - 2 * llcs
}
#[inline(always)]
fn score_strings_raw_internal(&self, other: &[u8]) -> u32 {
let len = self.len();
debug_assert!(self.is_valid_and_normalized());
debug_assert!((len as usize) <= block_hash::FULL_SIZE);
debug_assert!(other.len() <= block_hash::FULL_SIZE);
if !self.has_common_substring_internal(other) {
return 0;
}
FuzzyHashCompareTarget::raw_score_by_edit_distance_internal(
len,
other.len() as u8,
self.edit_distance_internal(other),
)
}
#[inline(never)]
fn score_strings_internal(&self, other: &[u8], log_block_size: u8) -> u32 {
let len = self.len();
debug_assert!(self.is_valid_and_normalized());
debug_assert!((len as usize) <= block_hash::FULL_SIZE);
debug_assert!(other.len() <= block_hash::FULL_SIZE);
debug_assert!((log_block_size as usize) <= block_size::NUM_VALID);
let score = self.score_strings_raw_internal(other);
if log_block_size >= FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER {
return score;
}
let score_cap = FuzzyHashCompareTarget::score_cap_on_block_hash_comparison_internal(
log_block_size,
len,
other.len() as u8,
);
u32::min(score, score_cap)
}
}
impl<T> BlockHashPositionArrayImplInternal for T where T: BlockHashPositionArrayData {}
#[cfg(feature = "unchecked")]
#[allow(unsafe_code)]
pub unsafe trait BlockHashPositionArrayImplUnchecked: BlockHashPositionArrayData {
unsafe fn is_equiv_unchecked(&self, other: &[u8]) -> bool;
unsafe fn has_common_substring_unchecked(&self, other: &[u8]) -> bool;
unsafe fn edit_distance_unchecked(&self, other: &[u8]) -> u32;
unsafe fn score_strings_raw_unchecked(&self, other: &[u8]) -> u32;
unsafe fn score_strings_unchecked(&self, other: &[u8], log_block_size: u8) -> u32;
}
#[cfg(feature = "unchecked")]
#[allow(unsafe_code)]
unsafe impl<T> BlockHashPositionArrayImplUnchecked for T
where
T: BlockHashPositionArrayImplInternal,
{
#[inline(always)]
unsafe fn is_equiv_unchecked(&self, other: &[u8]) -> bool {
self.is_equiv_internal(other)
}
#[inline(always)]
unsafe fn has_common_substring_unchecked(&self, other: &[u8]) -> bool {
self.has_common_substring_internal(other)
}
#[inline(always)]
unsafe fn edit_distance_unchecked(&self, other: &[u8]) -> u32 {
self.edit_distance_internal(other)
}
#[inline(always)]
unsafe fn score_strings_raw_unchecked(&self, other: &[u8]) -> u32 {
self.score_strings_raw_internal(other)
}
#[inline(always)]
unsafe fn score_strings_unchecked(&self, other: &[u8], log_block_size: u8) -> u32 {
self.score_strings_internal(other, log_block_size)
}
}
pub trait BlockHashPositionArrayImpl: BlockHashPositionArrayData {
fn is_equiv(&self, other: &[u8]) -> bool;
fn has_common_substring(&self, other: &[u8]) -> bool;
fn edit_distance(&self, other: &[u8]) -> u32;
fn score_strings_raw(&self, other: &[u8]) -> u32;
fn score_strings(&self, other: &[u8], log_block_size: u8) -> u32;
}
impl<T> BlockHashPositionArrayImpl for T
where
T: BlockHashPositionArrayImplInternal,
{
fn is_equiv(&self, other: &[u8]) -> bool {
assert!(self.is_valid());
assert!(other.len() <= 64);
assert!(other
.iter()
.all(|&x| (x as usize) < block_hash::ALPHABET_SIZE));
self.is_equiv_internal(other)
}
fn has_common_substring(&self, other: &[u8]) -> bool {
assert!(self.is_valid());
assert!(other
.iter()
.all(|&x| (x as usize) < block_hash::ALPHABET_SIZE));
self.has_common_substring_internal(other)
}
fn edit_distance(&self, other: &[u8]) -> u32 {
assert!(self.is_valid());
assert!((self.len() as usize) <= block_hash::FULL_SIZE);
assert!(other.len() <= block_hash::FULL_SIZE);
assert!(other
.iter()
.all(|&x| (x as usize) < block_hash::ALPHABET_SIZE));
self.edit_distance_internal(other)
}
fn score_strings_raw(&self, other: &[u8]) -> u32 {
assert!(self.is_valid_and_normalized());
assert!((self.len() as usize) <= block_hash::FULL_SIZE);
assert!(other.len() <= block_hash::FULL_SIZE);
assert!(other
.iter()
.all(|&x| (x as usize) < block_hash::ALPHABET_SIZE));
self.score_strings_raw_internal(other)
}
fn score_strings(&self, other: &[u8], log_block_size: u8) -> u32 {
assert!(self.is_valid_and_normalized());
assert!((self.len() as usize) <= block_hash::FULL_SIZE);
assert!(other.len() <= block_hash::FULL_SIZE);
assert!(other
.iter()
.all(|&x| (x as usize) < block_hash::ALPHABET_SIZE));
assert!((log_block_size as usize) <= block_size::NUM_VALID);
self.score_strings_internal(other, log_block_size)
}
}
pub(crate) trait BlockHashPositionArrayImplMutInternal:
BlockHashPositionArrayDataMut
{
#[inline(always)]
fn clear_representation_only(&mut self) {
self.representation_mut().fill(0);
}
#[inline(always)]
fn clear(&mut self) {
self.clear_representation_only();
*self.len_mut() = 0;
}
fn set_len_internal(&mut self, len: u8);
fn init_from_partial(&mut self, blockhash: &[u8]) {
debug_assert!(blockhash.len() <= 64);
let representation = self.representation_mut();
for (i, &ch) in blockhash.iter().enumerate() {
invariant!((ch as usize) < block_hash::ALPHABET_SIZE);
representation[ch as usize] |= 1u64 << i; }
self.set_len_internal(blockhash.len() as u8);
}
}
pub(crate) trait BlockHashPositionArrayImplMut: BlockHashPositionArrayDataMut {
fn init_from(&mut self, blockhash: &[u8]);
}
impl<T> BlockHashPositionArrayImplMut for T
where
T: BlockHashPositionArrayImplMutInternal,
{
fn init_from(&mut self, blockhash: &[u8]) {
assert!(blockhash.len() <= 64);
assert!(blockhash
.iter()
.all(|&x| (x as usize) < block_hash::ALPHABET_SIZE));
self.clear_representation_only();
self.init_from_partial(blockhash);
}
}
pub struct BlockHashPositionArrayRef<'a>(pub &'a [u64; block_hash::ALPHABET_SIZE], pub &'a u8);
pub(crate) struct BlockHashPositionArrayMutRef<'a>(
pub(crate) &'a mut [u64; block_hash::ALPHABET_SIZE],
pub(crate) &'a mut u8,
);
impl BlockHashPositionArrayData for BlockHashPositionArrayRef<'_> {
#[inline(always)]
fn representation(&self) -> &[u64; block_hash::ALPHABET_SIZE] {
self.0
}
#[inline(always)]
fn len(&self) -> u8 {
*self.1
}
}
impl BlockHashPositionArrayData for BlockHashPositionArrayMutRef<'_> {
#[inline(always)]
fn representation(&self) -> &[u64; block_hash::ALPHABET_SIZE] {
self.0
}
#[inline(always)]
fn len(&self) -> u8 {
*self.1
}
}
impl BlockHashPositionArrayDataMut for BlockHashPositionArrayMutRef<'_> {
#[inline(always)]
fn representation_mut(&mut self) -> &mut [u64; block_hash::ALPHABET_SIZE] {
self.0
}
#[inline(always)]
fn len_mut(&mut self) -> &mut u8 {
self.1
}
}
impl BlockHashPositionArrayImplMutInternal for BlockHashPositionArrayMutRef<'_> {
#[inline(always)]
fn set_len_internal(&mut self, len: u8) {
debug_assert!(len <= 64);
*self.1 = len;
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct BlockHashPositionArray {
representation: [u64; block_hash::ALPHABET_SIZE],
len: u8,
}
impl BlockHashPositionArray {
pub fn new() -> Self {
BlockHashPositionArray {
representation: [0u64; block_hash::ALPHABET_SIZE],
len: 0,
}
}
pub fn clear(&mut self) {
BlockHashPositionArrayImplMutInternal::clear(self);
}
pub fn init_from(&mut self, blockhash: &[u8]) {
BlockHashPositionArrayImplMut::init_from(self, blockhash);
}
}
impl BlockHashPositionArrayData for BlockHashPositionArray {
fn representation(&self) -> &[u64; block_hash::ALPHABET_SIZE] {
&self.representation
}
fn len(&self) -> u8 {
self.len
}
}
impl BlockHashPositionArrayDataMut for BlockHashPositionArray {
fn representation_mut(&mut self) -> &mut [u64; block_hash::ALPHABET_SIZE] {
&mut self.representation
}
fn len_mut(&mut self) -> &mut u8 {
&mut self.len
}
}
impl BlockHashPositionArrayImplMutInternal for BlockHashPositionArray {
fn set_len_internal(&mut self, len: u8) {
debug_assert!(len <= 64);
self.len = len;
}
}
impl Default for BlockHashPositionArray {
fn default() -> Self {
Self::new()
}
}
#[doc(hidden)]
mod const_asserts {
static_assertions::const_assert!(super::block_hash::FULL_SIZE <= 64);
}
pub(crate) mod tests;