#[cfg(all(feature = "alloc", not(any(test, doc, feature = "std"))))]
use alloc::string::String;
use crate::base64::BASE64_TABLE_U8;
use crate::hash::block::{
block_hash, block_size, BlockHashSize, BlockHashSizes, ConstrainedBlockHashSize,
ConstrainedBlockHashSizes,
};
use crate::hash::parser_state::{
BlockHashParseState, ParseError, ParseErrorKind, ParseErrorOrigin,
};
use crate::hash::{fuzzy_norm_type, fuzzy_raw_type, FuzzyHashData};
use crate::intrinsics::unlikely;
use crate::macros::{invariant, optionally_unsafe};
mod rle_encoding {
pub const BITS_POSITION: u32 = 6;
pub const MASK_POSITION: u8 = (1u8 << BITS_POSITION) - 1;
pub const BITS_RUN_LENGTH: u32 = 2;
pub const MAX_RUN_LENGTH: usize = 1usize << BITS_RUN_LENGTH;
pub const TERMINATOR: u8 = 0;
#[doc(hidden)]
#[allow(clippy::int_plus_one)]
mod const_asserts {
use super::*;
use crate::hash::block::block_hash;
use static_assertions::{const_assert, const_assert_eq, const_assert_ne};
const_assert_ne!(BITS_POSITION, 0);
const_assert_ne!(BITS_RUN_LENGTH, 0);
const_assert_eq!(BITS_POSITION + BITS_RUN_LENGTH, u8::BITS);
const_assert!(block_hash::MAX_SEQUENCE_SIZE >= 2);
const_assert!(block_hash::FULL_SIZE <= (1usize << BITS_POSITION));
const_assert!(block_hash::MAX_SEQUENCE_SIZE + 1 <= MAX_RUN_LENGTH);
}
#[inline(always)]
pub(crate) fn encode(pos: u8, len: u8) -> u8 {
debug_assert!(len != 0);
debug_assert!(len <= MAX_RUN_LENGTH as u8);
debug_assert!(pos != 0);
debug_assert!(pos <= MASK_POSITION);
pos | ((len - 1) << BITS_POSITION)
}
#[inline(always)]
pub(crate) fn decode(value: u8) -> (u8, u8) {
(value & MASK_POSITION, (value >> BITS_POSITION) + 1)
}
#[cfg(test)]
#[test]
fn decode_terminator() {
let (pos, _) = decode(TERMINATOR);
assert_eq!(pos, 0);
}
}
pub struct ReconstructionBlockSize<const SZ_BH: usize, const SZ_R: usize> {}
mod private {
use super::*;
use crate::hash::block::block_hash;
pub trait SealedReconstructionBlockSize {}
macro_rules! rle_size_for_block_hash_template {
{ $(sizes_def($block_hash_size: expr, $rle_size: expr);)* } => {
$(impl SealedReconstructionBlockSize for ReconstructionBlockSize<{$block_hash_size}, {$rle_size}> {})*
#[doc(hidden)]
mod const_asserts {
use super::*;
use static_assertions::const_assert;
#[cfg_attr(feature = "unstable", coverage(off))]
#[allow(dead_code)]
const fn div_ceil(a: usize, b: usize) -> usize {
cfg_if::cfg_if! {
if #[cfg(ffuzzy_div_ceil = "fallback")] {
a / b + (if a % b == 0 { 0 } else { 1 })
}
else {
usize::div_ceil(a, b)
}
}
}
#[cfg(test)]
#[test]
fn div_ceil_examples() {
assert_eq!(div_ceil(0, 1), 0);
assert_eq!(div_ceil(1, 1), 1);
assert_eq!(div_ceil(2, 1), 2);
assert_eq!(div_ceil(3, 1), 3);
assert_eq!(div_ceil(4, 1), 4);
assert_eq!(div_ceil(5, 1), 5);
assert_eq!(div_ceil(6, 1), 6);
assert_eq!(div_ceil(7, 1), 7);
assert_eq!(div_ceil(8, 1), 8);
assert_eq!(div_ceil(0, 2), 0);
assert_eq!(div_ceil(1, 2), 1);
assert_eq!(div_ceil(2, 2), 1);
assert_eq!(div_ceil(3, 2), 2);
assert_eq!(div_ceil(4, 2), 2);
assert_eq!(div_ceil(5, 2), 3);
assert_eq!(div_ceil(6, 2), 3);
assert_eq!(div_ceil(7, 2), 4);
assert_eq!(div_ceil(8, 2), 4);
assert_eq!(div_ceil(0, 3), 0);
assert_eq!(div_ceil(1, 3), 1);
assert_eq!(div_ceil(2, 3), 1);
assert_eq!(div_ceil(3, 3), 1);
assert_eq!(div_ceil(4, 3), 2);
assert_eq!(div_ceil(5, 3), 2);
assert_eq!(div_ceil(6, 3), 2);
assert_eq!(div_ceil(7, 3), 3);
assert_eq!(div_ceil(8, 3), 3);
assert_eq!(div_ceil(0, 4), 0);
assert_eq!(div_ceil(1, 4), 1);
assert_eq!(div_ceil(2, 4), 1);
assert_eq!(div_ceil(3, 4), 1);
assert_eq!(div_ceil(4, 4), 1);
assert_eq!(div_ceil(5, 4), 2);
assert_eq!(div_ceil(6, 4), 2);
assert_eq!(div_ceil(7, 4), 2);
assert_eq!(div_ceil(8, 4), 2);
}
$(
const_assert!(
div_ceil($block_hash_size, block_hash::MAX_SEQUENCE_SIZE + 1) <= $rle_size
);
const_assert!(
div_ceil($block_hash_size, rle_encoding::MAX_RUN_LENGTH) <= $rle_size
);
)*
}
};
}
rle_size_for_block_hash_template! {
sizes_def(block_hash::FULL_SIZE, block_hash::FULL_SIZE / 4);
sizes_def(block_hash::HALF_SIZE, block_hash::HALF_SIZE / 4);
}
}
pub trait ConstrainedReconstructionBlockSize: private::SealedReconstructionBlockSize {}
impl<T> ConstrainedReconstructionBlockSize for T where T: private::SealedReconstructionBlockSize {}
mod algorithms {
use super::*;
#[inline(always)]
pub(crate) fn update_rle_block<const SZ_RLE: usize>(
rle_block: &mut [u8; SZ_RLE],
rle_offset: usize,
pos: usize,
len: usize,
) -> usize {
debug_assert!(len > block_hash::MAX_SEQUENCE_SIZE);
let extend_len_minus_one = len - block_hash::MAX_SEQUENCE_SIZE - 1;
let seq_fill_size = extend_len_minus_one / rle_encoding::MAX_RUN_LENGTH;
let start = rle_offset;
optionally_unsafe! {
invariant!(start <= rle_block.len());
invariant!(start + seq_fill_size <= rle_block.len());
invariant!(start <= start + seq_fill_size);
}
rle_block[start..start + seq_fill_size].fill(rle_encoding::encode(
pos as u8,
rle_encoding::MAX_RUN_LENGTH as u8,
));
optionally_unsafe! {
invariant!(start + seq_fill_size < rle_block.len());
}
rle_block[start + seq_fill_size] = rle_encoding::encode(
pos as u8,
(extend_len_minus_one % rle_encoding::MAX_RUN_LENGTH) as u8 + 1,
);
start + seq_fill_size + 1
}
#[inline]
pub(crate) fn compress_block_hash_with_rle<const SZ_BH: usize, const SZ_RLE: usize>(
blockhash_out: &mut [u8; SZ_BH],
rle_block_out: &mut [u8; SZ_RLE],
blockhash_len_out: &mut u8,
blockhash_in: &[u8],
) where
BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
{
debug_assert!(blockhash_in.len() <= SZ_BH);
optionally_unsafe! {
let mut rle_offset = 0;
let mut seq = 0usize;
let mut len = 0usize;
let mut prev = crate::base64::BASE64_INVALID;
for &curr in blockhash_in {
if curr == prev {
seq += 1;
if seq >= block_hash::MAX_SEQUENCE_SIZE {
continue;
}
}
else {
if seq >= block_hash::MAX_SEQUENCE_SIZE {
rle_offset = update_rle_block(rle_block_out, rle_offset, len - 1, seq + 1);
}
seq = 0;
prev = curr;
}
invariant!(len < blockhash_out.len());
blockhash_out[len] = curr; len += 1;
}
if seq >= block_hash::MAX_SEQUENCE_SIZE {
rle_offset = update_rle_block(rle_block_out, rle_offset, len - 1, seq + 1);
}
*blockhash_len_out = len as u8;
invariant!(len <= blockhash_out.len());
blockhash_out[len..].fill(0); invariant!(rle_offset <= rle_block_out.len());
rle_block_out[rle_offset..].fill(rle_encoding::TERMINATOR); }
}
#[inline]
pub(crate) fn expand_block_hash_using_rle<const SZ_BH: usize, const SZ_RLE: usize>(
blockhash_out: &mut [u8; SZ_BH],
blockhash_len_out: &mut u8,
blockhash_in: &[u8; SZ_BH],
blockhash_len_in: u8,
rle_block_in: &[u8; SZ_RLE],
) where
BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
{
optionally_unsafe! {
let mut offset_src = 0usize;
let mut offset_dst = 0usize;
let mut len_out = blockhash_len_in;
let copy_as_is = |blockhash_out: &mut [u8; SZ_BH], dst, src, len| {
invariant!(src <= blockhash_in.len());
invariant!(src + len <= blockhash_in.len());
invariant!(src <= src + len);
invariant!(dst <= blockhash_out.len());
invariant!(dst + len <= blockhash_out.len());
invariant!(dst <= dst + len);
blockhash_out[dst..dst+len].clone_from_slice(&blockhash_in[src..src+len]); };
for &rle in rle_block_in {
let (pos, len) = rle_encoding::decode(rle);
if pos == 0 {
debug_assert!(rle == rle_encoding::TERMINATOR);
break;
}
let pos = pos as usize;
len_out += len;
let len = len as usize;
let copy_len = pos - offset_src;
copy_as_is(blockhash_out, offset_dst, offset_src, copy_len);
invariant!(pos < blockhash_in.len());
let lastch = blockhash_in[pos]; invariant!(offset_dst + copy_len <= blockhash_out.len());
invariant!(offset_dst + copy_len + len <= blockhash_out.len());
invariant!(offset_dst + copy_len <= offset_dst + copy_len + len);
blockhash_out[offset_dst+copy_len..offset_dst+copy_len+len].fill(lastch); offset_src += copy_len;
offset_dst += copy_len + len;
}
let copy_len = len_out as usize - offset_dst;
copy_as_is(blockhash_out, offset_dst, offset_src, copy_len);
invariant!(offset_dst + copy_len <= blockhash_out.len());
blockhash_out[offset_dst+copy_len..].fill(0); *blockhash_len_out = len_out;
}
}
pub(crate) fn is_valid_rle_block_for_block_hash<const SZ_BH: usize, const SZ_RLE: usize>(
blockhash: &[u8; SZ_BH],
rle_block: &[u8; SZ_RLE],
blockhash_len: u8,
) -> bool
where
BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
{
let mut expanded_len = blockhash_len as u32;
let mut terminator_expected = false;
let mut prev_pos = 0u8;
let mut prev_len = 0u8;
for &rle in rle_block {
if unlikely(rle != rle_encoding::TERMINATOR && terminator_expected) {
return false;
}
if rle == rle_encoding::TERMINATOR {
terminator_expected = true;
continue;
}
let (pos, len) = rle_encoding::decode(rle);
if unlikely(
pos < block_hash::MAX_SEQUENCE_SIZE as u8 - 1
|| pos >= blockhash_len
|| pos < prev_pos,
) {
return false;
}
if prev_pos == pos {
if unlikely(prev_len != rle_encoding::MAX_RUN_LENGTH as u8) {
return false;
}
} else {
let end = pos as usize;
let start = end - (block_hash::MAX_SEQUENCE_SIZE - 1);
optionally_unsafe! {
invariant!(start < blockhash.len());
invariant!(end < blockhash.len());
#[allow(clippy::int_plus_one)]
{
invariant!(start + 1 <= end);
}
}
let ch = blockhash[start]; if unlikely(
blockhash[start + 1..=end] .iter()
.any(|x| *x != ch),
) {
return false;
}
}
prev_pos = pos;
prev_len = len;
expanded_len += len as u32;
}
if unlikely(expanded_len as usize > SZ_BH) {
return false;
}
true
}
}
#[repr(align(8))]
#[derive(Copy, Clone)]
pub struct FuzzyHashDualData<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
rle_block1: [u8; C1],
rle_block2: [u8; C2],
norm_hash: fuzzy_norm_type!(S1, S2),
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
pub const MAX_BLOCK_HASH_SIZE_1: usize = <fuzzy_norm_type!(S1, S2)>::MAX_BLOCK_HASH_SIZE_1;
pub const MAX_BLOCK_HASH_SIZE_2: usize = <fuzzy_norm_type!(S1, S2)>::MAX_BLOCK_HASH_SIZE_2;
#[allow(dead_code)]
const RLE_BLOCK_SIZE_1: usize = C1;
#[allow(dead_code)]
const RLE_BLOCK_SIZE_2: usize = C2;
pub const IS_NORMALIZED_FORM: bool = false;
pub const IS_LONG_FORM: bool = <fuzzy_norm_type!(S1, S2)>::IS_LONG_FORM;
pub const MAX_LEN_IN_STR: usize = <fuzzy_norm_type!(S1, S2)>::MAX_LEN_IN_STR;
pub fn new() -> Self {
Self {
rle_block1: [rle_encoding::TERMINATOR; C1],
rle_block2: [rle_encoding::TERMINATOR; C2],
norm_hash: FuzzyHashData::new(),
}
}
pub fn init_from_raw_form(&mut self, hash: &fuzzy_raw_type!(S1, S2)) {
self.norm_hash.log_blocksize = hash.log_blocksize;
algorithms::compress_block_hash_with_rle(
&mut self.norm_hash.blockhash1,
&mut self.rle_block1,
&mut self.norm_hash.len_blockhash1,
hash.block_hash_1(),
);
algorithms::compress_block_hash_with_rle(
&mut self.norm_hash.blockhash2,
&mut self.rle_block2,
&mut self.norm_hash.len_blockhash2,
hash.block_hash_2(),
);
}
fn new_from_internals_near_raw_internal(
log_block_size: u8,
block_hash_1: &[u8],
block_hash_2: &[u8],
) -> Self {
debug_assert!(block_size::is_log_valid(log_block_size));
debug_assert!(block_hash_1
.iter()
.all(|&x| x < block_hash::ALPHABET_SIZE as u8));
debug_assert!(block_hash_2
.iter()
.all(|&x| x < block_hash::ALPHABET_SIZE as u8));
optionally_unsafe! {
invariant!(block_hash_1.len() <= S1);
invariant!(block_hash_2.len() <= S2);
}
let mut hash = Self::new();
hash.norm_hash.log_blocksize = log_block_size;
algorithms::compress_block_hash_with_rle(
&mut hash.norm_hash.blockhash1,
&mut hash.rle_block1,
&mut hash.norm_hash.len_blockhash1,
block_hash_1,
);
algorithms::compress_block_hash_with_rle(
&mut hash.norm_hash.blockhash2,
&mut hash.rle_block2,
&mut hash.norm_hash.len_blockhash2,
block_hash_2,
);
hash
}
#[cfg(feature = "unchecked")]
#[allow(unsafe_code)]
#[inline(always)]
pub unsafe fn new_from_internals_near_raw_unchecked(
log_block_size: u8,
block_hash_1: &[u8],
block_hash_2: &[u8],
) -> Self {
Self::new_from_internals_near_raw_internal(log_block_size, block_hash_1, block_hash_2)
}
#[inline]
pub fn new_from_internals_near_raw(
log_block_size: u8,
block_hash_1: &[u8],
block_hash_2: &[u8],
) -> Self {
assert!(block_size::is_log_valid(log_block_size));
assert!(block_hash_1.len() <= S1);
assert!(block_hash_2.len() <= S2);
assert!(block_hash_1
.iter()
.all(|&x| x < block_hash::ALPHABET_SIZE as u8));
assert!(block_hash_2
.iter()
.all(|&x| x < block_hash::ALPHABET_SIZE as u8));
Self::new_from_internals_near_raw_internal(log_block_size, block_hash_1, block_hash_2)
}
#[allow(dead_code)]
#[inline(always)]
fn new_from_internals_internal(
block_size: u32,
block_hash_1: &[u8],
block_hash_2: &[u8],
) -> Self {
debug_assert!(block_size::is_valid(block_size));
Self::new_from_internals_near_raw_internal(
block_size::log_from_valid_internal(block_size),
block_hash_1,
block_hash_2,
)
}
#[cfg(feature = "unchecked")]
#[allow(unsafe_code)]
#[inline(always)]
pub unsafe fn new_from_internals_unchecked(
block_size: u32,
block_hash_1: &[u8],
block_hash_2: &[u8],
) -> Self {
Self::new_from_internals_internal(block_size, block_hash_1, block_hash_2)
}
#[inline]
pub fn new_from_internals(block_size: u32, block_hash_1: &[u8], block_hash_2: &[u8]) -> Self {
assert!(block_size::is_valid(block_size));
Self::new_from_internals_near_raw(
block_size::log_from_valid_internal(block_size),
block_hash_1,
block_hash_2,
)
}
#[inline(always)]
pub fn log_block_size(&self) -> u8 {
self.norm_hash.log_blocksize
}
#[inline]
pub fn block_size(&self) -> u32 {
block_size::from_log_internal(self.norm_hash.log_blocksize)
}
#[inline(always)]
pub fn as_normalized(&self) -> &fuzzy_norm_type!(S1, S2) {
&self.norm_hash
}
pub fn from_raw_form(hash: &fuzzy_raw_type!(S1, S2)) -> Self {
let mut dual_hash = FuzzyHashDualData::new();
dual_hash.init_from_raw_form(hash);
dual_hash
}
pub fn from_normalized(hash: &fuzzy_norm_type!(S1, S2)) -> Self {
Self {
rle_block1: [rle_encoding::TERMINATOR; C1],
rle_block2: [rle_encoding::TERMINATOR; C2],
norm_hash: *hash,
}
}
pub fn into_mut_raw_form(&self, hash: &mut fuzzy_raw_type!(S1, S2)) {
hash.log_blocksize = self.norm_hash.log_blocksize;
algorithms::expand_block_hash_using_rle(
&mut hash.blockhash1,
&mut hash.len_blockhash1,
&self.norm_hash.blockhash1,
self.norm_hash.len_blockhash1,
&self.rle_block1,
);
algorithms::expand_block_hash_using_rle(
&mut hash.blockhash2,
&mut hash.len_blockhash2,
&self.norm_hash.blockhash2,
self.norm_hash.len_blockhash2,
&self.rle_block2,
);
}
pub fn to_raw_form(&self) -> fuzzy_raw_type!(S1, S2) {
let mut hash = FuzzyHashData::new();
self.into_mut_raw_form(&mut hash);
hash
}
#[inline(always)]
pub fn to_normalized(&self) -> fuzzy_norm_type!(S1, S2) {
self.norm_hash
}
#[cfg(feature = "alloc")]
pub fn to_normalized_string(&self) -> String {
self.norm_hash.to_string()
}
#[cfg(feature = "alloc")]
pub fn to_raw_form_string(&self) -> String {
self.to_raw_form().to_string()
}
#[inline(always)]
fn from_bytes_with_last_index_internal(
str: &[u8],
index: &mut usize,
) -> Result<Self, ParseError> {
use crate::hash::{algorithms, hash_from_bytes_with_last_index_internal_template};
use crate::hash_dual::algorithms::update_rle_block;
let mut fuzzy = Self::new();
hash_from_bytes_with_last_index_internal_template! {
str, index, true,
fuzzy.norm_hash.log_blocksize,
{ let mut rle_offset = 0; },
#[inline(always)] |pos, len| rle_offset = update_rle_block(
&mut fuzzy.rle_block1, rle_offset, pos + block_hash::MAX_SEQUENCE_SIZE - 1, len),
fuzzy.norm_hash.blockhash1, fuzzy.norm_hash.len_blockhash1,
{ let mut rle_offset = 0; },
#[inline(always)] |pos, len| rle_offset = update_rle_block(
&mut fuzzy.rle_block2, rle_offset, pos + block_hash::MAX_SEQUENCE_SIZE - 1, len),
fuzzy.norm_hash.blockhash2, fuzzy.norm_hash.len_blockhash2
}
Ok(fuzzy)
}
pub fn from_bytes_with_last_index(str: &[u8], index: &mut usize) -> Result<Self, ParseError> {
Self::from_bytes_with_last_index_internal(str, index)
}
pub fn from_bytes(str: &[u8]) -> Result<Self, ParseError> {
Self::from_bytes_with_last_index_internal(str, &mut 0usize)
}
pub fn normalize_in_place(&mut self) {
self.rle_block1 = [rle_encoding::TERMINATOR; C1];
self.rle_block2 = [rle_encoding::TERMINATOR; C2];
}
pub fn is_normalized(&self) -> bool {
self.rle_block1[0] == rle_encoding::TERMINATOR
&& self.rle_block2[0] == rle_encoding::TERMINATOR
}
pub fn is_valid(&self) -> bool {
self.norm_hash.is_valid()
&& algorithms::is_valid_rle_block_for_block_hash(
&self.norm_hash.blockhash1,
&self.rle_block1,
self.norm_hash.len_blockhash1,
)
&& algorithms::is_valid_rle_block_for_block_hash(
&self.norm_hash.blockhash2,
&self.rle_block2,
self.norm_hash.len_blockhash2,
)
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
AsRef<fuzzy_norm_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
#[inline(always)]
fn as_ref(&self) -> &fuzzy_norm_type!(S1, S2) {
&self.norm_hash
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Default
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
fn default() -> Self {
Self::new()
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> PartialEq
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
fn eq(&self, other: &Self) -> bool {
self.norm_hash == other.norm_hash
&& self.rle_block1 == other.rle_block1
&& self.rle_block2 == other.rle_block2
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Eq
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::hash::Hash
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
#[inline]
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
self.norm_hash.hash(state);
state.write(&self.rle_block1);
state.write(&self.rle_block2);
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Ord
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
#[inline]
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
(self.norm_hash, self.rle_block1, self.rle_block2).cmp(&(
other.norm_hash,
other.rle_block1,
other.rle_block2,
))
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> PartialOrd
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
#[inline(always)]
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::fmt::Debug
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
struct DebugBuilderForRLEBlockEntry(u8);
struct DebugBuilderForValidRLEBlock<'a, const N: usize> {
block: &'a [u8; N],
}
struct DebugBuilderForInvalidRLEBlock<'a, const N: usize> {
block: &'a [u8; N],
}
impl<'a, const N: usize> DebugBuilderForValidRLEBlock<'a, N> {
pub fn new(rle_block: &'a [u8; N]) -> Self {
Self { block: rle_block }
}
}
impl<'a, const N: usize> DebugBuilderForInvalidRLEBlock<'a, N> {
pub fn new(rle_block: &'a [u8; N]) -> Self {
Self { block: rle_block }
}
}
impl core::fmt::Debug for DebugBuilderForRLEBlockEntry {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
if self.0 != rle_encoding::TERMINATOR {
let (pos, len) = rle_encoding::decode(self.0);
f.debug_tuple("RLE").field(&pos).field(&len).finish()
} else {
f.debug_tuple("RLENull").finish()
}
}
}
impl<'a, const N: usize> core::fmt::Debug for DebugBuilderForValidRLEBlock<'a, N> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_list()
.entries(
self.block
.iter()
.cloned()
.filter(|x| *x != rle_encoding::TERMINATOR)
.map(DebugBuilderForRLEBlockEntry),
)
.finish()
}
}
impl<'a, const N: usize> core::fmt::Debug for DebugBuilderForInvalidRLEBlock<'a, N> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_list()
.entries(self.block.iter().cloned().map(DebugBuilderForRLEBlockEntry))
.finish()
}
}
if self.is_valid() {
let buffer1 = self
.norm_hash
.blockhash1
.map(|x| BASE64_TABLE_U8[x as usize]); let buffer2 = self
.norm_hash
.blockhash2
.map(|x| BASE64_TABLE_U8[x as usize]); f.debug_struct("FuzzyHashDualData")
.field("LONG", &(S2 == block_hash::FULL_SIZE))
.field(
"block_size",
&block_size::from_log_internal(self.norm_hash.log_blocksize),
)
.field(
"blockhash1",
&core::str::from_utf8(&buffer1[..self.norm_hash.len_blockhash1 as usize])
.unwrap(),
)
.field(
"blockhash2",
&core::str::from_utf8(&buffer2[..self.norm_hash.len_blockhash2 as usize])
.unwrap(),
)
.field(
"rle_block1",
&(DebugBuilderForValidRLEBlock::new(&self.rle_block1)),
)
.field(
"rle_block2",
&(DebugBuilderForValidRLEBlock::new(&self.rle_block2)),
)
.finish()
} else {
f.debug_struct("FuzzyHashDualData")
.field("ILL_FORMED", &true)
.field("LONG", &(S2 == block_hash::FULL_SIZE))
.field("log_blocksize", &self.norm_hash.log_blocksize)
.field("len_blockhash1", &self.norm_hash.len_blockhash1)
.field("len_blockhash2", &self.norm_hash.len_blockhash2)
.field("blockhash1", &self.norm_hash.blockhash1)
.field("blockhash2", &self.norm_hash.blockhash2)
.field(
"rle_block1",
&(DebugBuilderForInvalidRLEBlock::new(&self.rle_block1)),
)
.field(
"rle_block2",
&(DebugBuilderForInvalidRLEBlock::new(&self.rle_block2)),
)
.finish()
}
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::fmt::Display
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{{{}|{}}}", self.norm_hash, self.to_raw_form())
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::str::FromStr
for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
type Err = ParseError;
#[inline(always)]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::from_bytes(s.as_bytes())
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
core::convert::From<fuzzy_norm_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
#[inline]
fn from(value: fuzzy_norm_type!(S1, S2)) -> Self {
Self::from_normalized(&value)
}
}
impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
core::convert::From<fuzzy_raw_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
{
#[inline]
fn from(value: fuzzy_raw_type!(S1, S2)) -> Self {
Self::from_raw_form(&value)
}
}
pub type DualFuzzyHash = FuzzyHashDualData<
{ block_hash::FULL_SIZE },
{ block_hash::HALF_SIZE },
{ block_hash::FULL_SIZE / 4 },
{ block_hash::HALF_SIZE / 4 },
>;
pub type LongDualFuzzyHash = FuzzyHashDualData<
{ block_hash::FULL_SIZE },
{ block_hash::FULL_SIZE },
{ block_hash::FULL_SIZE / 4 },
{ block_hash::FULL_SIZE / 4 },
>;
mod tests;