1#[cfg(feature = "alloc")]
7use alloc::string::String;
8
9use crate::internals::base64::BASE64_TABLE_U8;
10use crate::internals::hash::block::{
11 block_hash, block_size, BlockHashSize, BlockHashSizes, ConstrainedBlockHashSize,
12 ConstrainedBlockHashSizes,
13};
14use crate::internals::hash::parser_state::{
15 BlockHashParseState, ParseError, ParseErrorKind, ParseErrorOrigin,
16};
17use crate::internals::hash::{fuzzy_norm_type, fuzzy_raw_type, FuzzyHashData};
18use crate::internals::intrinsics::unlikely;
19use crate::internals::macros::invariant;
20
21mod rle_encoding {
111 pub const BITS_POSITION: u32 = 6;
118
119 pub const MASK_POSITION: u8 = (1u8 << BITS_POSITION) - 1;
121
122 pub const BITS_RUN_LENGTH: u32 = 2;
127
128 pub const MAX_RUN_LENGTH: usize = 1usize << BITS_RUN_LENGTH;
130
131 pub const TERMINATOR: u8 = 0;
136
137 #[doc(hidden)]
139 #[allow(clippy::int_plus_one)]
140 mod const_asserts {
141 use static_assertions::{const_assert, const_assert_eq, const_assert_ne};
142
143 use crate::internals::hash::block::block_hash;
144
145 use super::*;
146
147 const_assert_ne!(BITS_POSITION, 0);
149 const_assert_ne!(BITS_RUN_LENGTH, 0);
150 const_assert_eq!(BITS_POSITION + BITS_RUN_LENGTH, u8::BITS);
151
152 const_assert!(block_hash::MAX_SEQUENCE_SIZE >= 2);
155
156 const_assert!(block_hash::FULL_SIZE <= (1usize << BITS_POSITION));
158 const_assert!(block_hash::MAX_SEQUENCE_SIZE + 1 <= MAX_RUN_LENGTH);
160 }
161
162 #[inline(always)]
164 pub(crate) fn encode(pos: u8, len: u8) -> u8 {
165 debug_assert!(len != 0);
166 debug_assert!(len <= MAX_RUN_LENGTH as u8);
167 debug_assert!(pos != 0);
168 debug_assert!(pos <= MASK_POSITION);
169 pos | ((len - 1) << BITS_POSITION)
170 }
171
172 #[inline(always)]
174 pub(crate) fn decode(value: u8) -> (u8, u8) {
175 (value & MASK_POSITION, (value >> BITS_POSITION) + 1)
176 }
177
178 #[cfg(test)]
180 #[test]
181 fn decode_terminator() {
182 let (pos, _) = decode(TERMINATOR);
183 assert_eq!(pos, 0);
184 }
185 }
187
188pub struct ReconstructionBlockSize<const SZ_BH: usize, const SZ_R: usize> {}
199
200mod private {
202 use crate::internals::hash::block::block_hash;
203
204 use super::*;
205
206 pub trait SealedReconstructionBlockSize {}
216
217 macro_rules! rle_size_for_block_hash_template {
220 { $(sizes_def($block_hash_size: expr, $rle_size: expr);)* } => {
221 $(impl SealedReconstructionBlockSize for ReconstructionBlockSize<{$block_hash_size}, {$rle_size}> {})*
222
223 #[doc(hidden)]
225 mod const_asserts {
226 use static_assertions::const_assert;
227
228 use super::*;
229
230 #[cfg_attr(feature = "unstable", coverage(off))]
233 #[allow(dead_code)]
234 const fn div_ceil(a: usize, b: usize) -> usize {
235 cfg_if::cfg_if! {
236 if #[cfg(ffuzzy_div_ceil = "fallback")] {
237 a / b + (if a % b == 0 { 0 } else { 1 })
238 } else {
239 usize::div_ceil(a, b)
240 }
241 }
242 }
243
244 #[cfg(test)]
245 #[test]
246 fn div_ceil_examples() {
247 assert_eq!(div_ceil(0, 1), 0);
248 assert_eq!(div_ceil(1, 1), 1);
249 assert_eq!(div_ceil(2, 1), 2);
250 assert_eq!(div_ceil(3, 1), 3);
251 assert_eq!(div_ceil(4, 1), 4);
252 assert_eq!(div_ceil(5, 1), 5);
253 assert_eq!(div_ceil(6, 1), 6);
254 assert_eq!(div_ceil(7, 1), 7);
255 assert_eq!(div_ceil(8, 1), 8);
256 assert_eq!(div_ceil(0, 2), 0);
257 assert_eq!(div_ceil(1, 2), 1);
258 assert_eq!(div_ceil(2, 2), 1);
259 assert_eq!(div_ceil(3, 2), 2);
260 assert_eq!(div_ceil(4, 2), 2);
261 assert_eq!(div_ceil(5, 2), 3);
262 assert_eq!(div_ceil(6, 2), 3);
263 assert_eq!(div_ceil(7, 2), 4);
264 assert_eq!(div_ceil(8, 2), 4);
265 assert_eq!(div_ceil(0, 3), 0);
266 assert_eq!(div_ceil(1, 3), 1);
267 assert_eq!(div_ceil(2, 3), 1);
268 assert_eq!(div_ceil(3, 3), 1);
269 assert_eq!(div_ceil(4, 3), 2);
270 assert_eq!(div_ceil(5, 3), 2);
271 assert_eq!(div_ceil(6, 3), 2);
272 assert_eq!(div_ceil(7, 3), 3);
273 assert_eq!(div_ceil(8, 3), 3);
274 assert_eq!(div_ceil(0, 4), 0);
275 assert_eq!(div_ceil(1, 4), 1);
276 assert_eq!(div_ceil(2, 4), 1);
277 assert_eq!(div_ceil(3, 4), 1);
278 assert_eq!(div_ceil(4, 4), 1);
279 assert_eq!(div_ceil(5, 4), 2);
280 assert_eq!(div_ceil(6, 4), 2);
281 assert_eq!(div_ceil(7, 4), 2);
282 assert_eq!(div_ceil(8, 4), 2);
283 }
284 $(
288 const_assert!(
290 div_ceil($block_hash_size, block_hash::MAX_SEQUENCE_SIZE + 1) <= $rle_size
291 );
292 const_assert!(
294 div_ceil($block_hash_size, rle_encoding::MAX_RUN_LENGTH) <= $rle_size
295 );
296 )*
297 }
298 };
299 }
300
301 rle_size_for_block_hash_template! {
302 sizes_def(block_hash::FULL_SIZE, block_hash::FULL_SIZE / 4);
303 sizes_def(block_hash::HALF_SIZE, block_hash::HALF_SIZE / 4);
304 }
305}
306
307pub trait ConstrainedReconstructionBlockSize: private::SealedReconstructionBlockSize {}
324impl<T> ConstrainedReconstructionBlockSize for T where T: private::SealedReconstructionBlockSize {}
325
326mod algorithms {
328 use super::*;
329
330 #[inline(always)]
337 pub(crate) fn update_rle_block<const SZ_RLE: usize>(
338 rle_block: &mut [u8; SZ_RLE],
339 rle_offset: usize,
340 pos: usize,
341 len: usize,
342 ) -> usize {
343 debug_assert!(len > block_hash::MAX_SEQUENCE_SIZE);
344 let extend_len_minus_one = len - block_hash::MAX_SEQUENCE_SIZE - 1;
345 let seq_fill_size = extend_len_minus_one / rle_encoding::MAX_RUN_LENGTH;
346 let start = rle_offset;
347 invariant!(start <= rle_block.len());
348 invariant!(start + seq_fill_size <= rle_block.len());
349 invariant!(start <= start + seq_fill_size);
350 rle_block[start..start + seq_fill_size].fill(rle_encoding::encode(
352 pos as u8,
353 rle_encoding::MAX_RUN_LENGTH as u8,
354 ));
355 invariant!(start + seq_fill_size < rle_block.len());
357 rle_block[start + seq_fill_size] = rle_encoding::encode(
359 pos as u8,
360 (extend_len_minus_one % rle_encoding::MAX_RUN_LENGTH) as u8 + 1,
361 );
362 start + seq_fill_size + 1
364 }
365
366 #[inline]
371 pub(crate) fn compress_block_hash_with_rle<const SZ_BH: usize, const SZ_RLE: usize>(
372 blockhash_out: &mut [u8; SZ_BH],
373 rle_block_out: &mut [u8; SZ_RLE],
374 blockhash_len_out: &mut u8,
375 blockhash_in: &[u8],
376 ) where
377 BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
378 ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
379 {
380 debug_assert!(blockhash_in.len() <= SZ_BH);
381 let mut rle_offset = 0;
382 let mut seq = 0usize;
383 let mut len = 0usize;
384 let mut prev = crate::internals::base64::BASE64_INVALID;
385 for &curr in blockhash_in {
386 if curr == prev {
387 seq += 1;
388 if seq >= block_hash::MAX_SEQUENCE_SIZE {
389 continue;
391 }
392 } else {
393 if seq >= block_hash::MAX_SEQUENCE_SIZE {
394 rle_offset = update_rle_block(rle_block_out, rle_offset, len - 1, seq + 1);
395 }
396 seq = 0;
397 prev = curr;
398 }
399 invariant!(len < blockhash_out.len());
400 blockhash_out[len] = curr; len += 1;
402 }
403 if seq >= block_hash::MAX_SEQUENCE_SIZE {
406 rle_offset = update_rle_block(rle_block_out, rle_offset, len - 1, seq + 1);
407 }
408 *blockhash_len_out = len as u8;
409 invariant!(len <= blockhash_out.len());
410 blockhash_out[len..].fill(0); invariant!(rle_offset <= rle_block_out.len());
412 rle_block_out[rle_offset..].fill(rle_encoding::TERMINATOR); }
414
415 #[inline]
417 pub(crate) fn expand_block_hash_using_rle<const SZ_BH: usize, const SZ_RLE: usize>(
418 blockhash_out: &mut [u8; SZ_BH],
419 blockhash_len_out: &mut u8,
420 blockhash_in: &[u8; SZ_BH],
421 blockhash_len_in: u8,
422 rle_block_in: &[u8; SZ_RLE],
423 ) where
424 BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
425 ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
426 {
427 let mut offset_src = 0usize;
428 let mut offset_dst = 0usize;
429 let mut len_out = blockhash_len_in;
430 let copy_as_is = |blockhash_out: &mut [u8; SZ_BH], dst, src, len| {
431 invariant!(src <= blockhash_in.len());
432 invariant!(src + len <= blockhash_in.len());
433 invariant!(src <= src + len);
434 invariant!(dst <= blockhash_out.len());
435 invariant!(dst + len <= blockhash_out.len());
436 invariant!(dst <= dst + len);
437 blockhash_out[dst..dst + len].clone_from_slice(&blockhash_in[src..src + len]);
438 };
440 for &rle in rle_block_in {
441 let (pos, len) = rle_encoding::decode(rle);
443 if pos == 0 {
444 debug_assert!(rle == rle_encoding::TERMINATOR);
446 break;
447 }
448 let pos = pos as usize;
449 len_out += len;
450 let len = len as usize;
451 let copy_len = pos - offset_src;
453 copy_as_is(blockhash_out, offset_dst, offset_src, copy_len);
454 invariant!(pos < blockhash_in.len());
456 let lastch = blockhash_in[pos]; invariant!(offset_dst + copy_len <= blockhash_out.len());
458 invariant!(offset_dst + copy_len + len <= blockhash_out.len());
459 invariant!(offset_dst + copy_len <= offset_dst + copy_len + len);
460 blockhash_out[offset_dst + copy_len..offset_dst + copy_len + len].fill(lastch); offset_src += copy_len;
463 offset_dst += copy_len + len;
464 }
465 let copy_len = len_out as usize - offset_dst;
467 copy_as_is(blockhash_out, offset_dst, offset_src, copy_len);
468 invariant!(offset_dst + copy_len <= blockhash_out.len());
470 blockhash_out[offset_dst + copy_len..].fill(0); *blockhash_len_out = len_out;
472 }
473
474 pub(crate) fn is_valid_rle_block_for_block_hash<const SZ_BH: usize, const SZ_RLE: usize>(
476 blockhash: &[u8; SZ_BH],
477 rle_block: &[u8; SZ_RLE],
478 blockhash_len: u8,
479 ) -> bool
480 where
481 BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
482 ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
483 {
484 let mut expanded_len = blockhash_len as u32;
485 let mut terminator_expected = false;
486 let mut prev_pos = 0u8;
487 let mut prev_len = 0u8;
488 for &rle in rle_block {
489 if unlikely(rle != rle_encoding::TERMINATOR && terminator_expected) {
490 return false;
492 }
493 if rle == rle_encoding::TERMINATOR {
494 terminator_expected = true;
496 continue;
497 }
498 let (pos, len) = rle_encoding::decode(rle);
500 if unlikely(
502 pos < block_hash::MAX_SEQUENCE_SIZE as u8 - 1
503 || pos >= blockhash_len
504 || pos < prev_pos,
505 ) {
506 return false;
507 }
508 if prev_pos == pos {
509 if unlikely(prev_len != rle_encoding::MAX_RUN_LENGTH as u8) {
511 return false;
512 }
513 } else {
514 let end = pos as usize;
517 let start = end - (block_hash::MAX_SEQUENCE_SIZE - 1);
518 invariant!(start < blockhash.len());
519 invariant!(end < blockhash.len());
520 #[allow(clippy::int_plus_one)]
521 {
522 invariant!(start + 1 <= end);
523 }
524 let ch = blockhash[start]; if unlikely(
526 blockhash[start + 1..=end] .iter()
528 .any(|x| *x != ch),
529 ) {
530 return false;
531 }
532 }
533 prev_pos = pos;
535 prev_len = len;
536 expanded_len += len as u32;
537 }
538 if unlikely(expanded_len as usize > SZ_BH) {
539 return false;
540 }
541 true
542 }
543}
544
545#[repr(align(8))]
627#[derive(Copy, Clone)]
628pub struct FuzzyHashDualData<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
629where
630 BlockHashSize<S1>: ConstrainedBlockHashSize,
631 BlockHashSize<S2>: ConstrainedBlockHashSize,
632 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
633 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
634 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
635{
636 rle_block1: [u8; C1],
641
642 rle_block2: [u8; C2],
647
648 norm_hash: fuzzy_norm_type!(S1, S2),
651}
652
653impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
654 FuzzyHashDualData<S1, S2, C1, C2>
655where
656 BlockHashSize<S1>: ConstrainedBlockHashSize,
657 BlockHashSize<S2>: ConstrainedBlockHashSize,
658 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
659 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
660 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
661{
662 pub const MAX_BLOCK_HASH_SIZE_1: usize = <fuzzy_norm_type!(S1, S2)>::MAX_BLOCK_HASH_SIZE_1;
667
668 pub const MAX_BLOCK_HASH_SIZE_2: usize = <fuzzy_norm_type!(S1, S2)>::MAX_BLOCK_HASH_SIZE_2;
673
674 #[allow(dead_code)]
676 const RLE_BLOCK_SIZE_1: usize = C1;
677
678 #[allow(dead_code)]
680 const RLE_BLOCK_SIZE_2: usize = C2;
681
682 pub const IS_NORMALIZED_FORM: bool = false;
686
687 pub const IS_LONG_FORM: bool = <fuzzy_norm_type!(S1, S2)>::IS_LONG_FORM;
692
693 pub const MAX_LEN_IN_STR: usize = <fuzzy_norm_type!(S1, S2)>::MAX_LEN_IN_STR;
698
699 pub fn new() -> Self {
703 Self {
704 rle_block1: [rle_encoding::TERMINATOR; C1],
705 rle_block2: [rle_encoding::TERMINATOR; C2],
706 norm_hash: FuzzyHashData::new(),
707 }
708 }
709
710 pub fn init_from_raw_form(&mut self, hash: &fuzzy_raw_type!(S1, S2)) {
712 self.norm_hash.log_blocksize = hash.log_blocksize;
713 algorithms::compress_block_hash_with_rle(
714 &mut self.norm_hash.blockhash1,
715 &mut self.rle_block1,
716 &mut self.norm_hash.len_blockhash1,
717 hash.block_hash_1(),
718 );
719 algorithms::compress_block_hash_with_rle(
720 &mut self.norm_hash.blockhash2,
721 &mut self.rle_block2,
722 &mut self.norm_hash.len_blockhash2,
723 hash.block_hash_2(),
724 );
725 }
726
727 fn new_from_internals_near_raw_internal(
729 log_block_size: u8,
730 block_hash_1: &[u8],
731 block_hash_2: &[u8],
732 ) -> Self {
733 debug_assert!(block_size::is_log_valid(log_block_size));
734 debug_assert!(block_hash_1
735 .iter()
736 .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
737 debug_assert!(block_hash_2
738 .iter()
739 .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
740 invariant!(block_hash_1.len() <= S1);
741 invariant!(block_hash_2.len() <= S2);
742 let mut hash = Self::new();
743 hash.norm_hash.log_blocksize = log_block_size;
744 algorithms::compress_block_hash_with_rle(
745 &mut hash.norm_hash.blockhash1,
746 &mut hash.rle_block1,
747 &mut hash.norm_hash.len_blockhash1,
748 block_hash_1,
749 );
750 algorithms::compress_block_hash_with_rle(
751 &mut hash.norm_hash.blockhash2,
752 &mut hash.rle_block2,
753 &mut hash.norm_hash.len_blockhash2,
754 block_hash_2,
755 );
756 hash
757 }
758
759 #[cfg(feature = "unchecked")]
771 #[allow(unsafe_code)]
772 #[inline(always)]
773 pub unsafe fn new_from_internals_near_raw_unchecked(
774 log_block_size: u8,
775 block_hash_1: &[u8],
776 block_hash_2: &[u8],
777 ) -> Self {
778 Self::new_from_internals_near_raw_internal(log_block_size, block_hash_1, block_hash_2)
779 }
780
781 #[inline]
794 pub fn new_from_internals_near_raw(
795 log_block_size: u8,
796 block_hash_1: &[u8],
797 block_hash_2: &[u8],
798 ) -> Self {
799 assert!(block_size::is_log_valid(log_block_size));
800 assert!(block_hash_1.len() <= S1);
801 assert!(block_hash_2.len() <= S2);
802 assert!(block_hash_1
803 .iter()
804 .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
805 assert!(block_hash_2
806 .iter()
807 .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
808 Self::new_from_internals_near_raw_internal(log_block_size, block_hash_1, block_hash_2)
809 }
810
811 #[allow(dead_code)]
813 #[inline(always)]
814 fn new_from_internals_internal(
815 block_size: u32,
816 block_hash_1: &[u8],
817 block_hash_2: &[u8],
818 ) -> Self {
819 debug_assert!(block_size::is_valid(block_size));
820 Self::new_from_internals_near_raw_internal(
821 block_size::log_from_valid_internal(block_size),
822 block_hash_1,
823 block_hash_2,
824 )
825 }
826
827 #[cfg(feature = "unchecked")]
838 #[allow(unsafe_code)]
839 #[inline(always)]
840 pub unsafe fn new_from_internals_unchecked(
841 block_size: u32,
842 block_hash_1: &[u8],
843 block_hash_2: &[u8],
844 ) -> Self {
845 Self::new_from_internals_internal(block_size, block_hash_1, block_hash_2)
846 }
847
848 #[inline]
860 pub fn new_from_internals(block_size: u32, block_hash_1: &[u8], block_hash_2: &[u8]) -> Self {
861 assert!(block_size::is_valid(block_size));
862 Self::new_from_internals_near_raw(
863 block_size::log_from_valid_internal(block_size),
864 block_hash_1,
865 block_hash_2,
866 )
867 }
868
869 #[inline(always)]
873 pub fn log_block_size(&self) -> u8 {
874 self.norm_hash.log_blocksize
875 }
876
877 #[inline]
879 pub fn block_size(&self) -> u32 {
880 block_size::from_log_internal(self.norm_hash.log_blocksize)
881 }
882
883 #[inline(always)]
888 pub fn as_normalized(&self) -> &fuzzy_norm_type!(S1, S2) {
889 &self.norm_hash
890 }
891
892 pub fn from_raw_form(hash: &fuzzy_raw_type!(S1, S2)) -> Self {
894 let mut dual_hash = FuzzyHashDualData::new();
895 dual_hash.init_from_raw_form(hash);
896 dual_hash
897 }
898
899 pub fn from_normalized(hash: &fuzzy_norm_type!(S1, S2)) -> Self {
901 Self {
902 rle_block1: [rle_encoding::TERMINATOR; C1],
903 rle_block2: [rle_encoding::TERMINATOR; C2],
904 norm_hash: *hash,
905 }
906 }
907
908 pub fn into_mut_raw_form(&self, hash: &mut fuzzy_raw_type!(S1, S2)) {
911 hash.log_blocksize = self.norm_hash.log_blocksize;
912 algorithms::expand_block_hash_using_rle(
913 &mut hash.blockhash1,
914 &mut hash.len_blockhash1,
915 &self.norm_hash.blockhash1,
916 self.norm_hash.len_blockhash1,
917 &self.rle_block1,
918 );
919 algorithms::expand_block_hash_using_rle(
920 &mut hash.blockhash2,
921 &mut hash.len_blockhash2,
922 &self.norm_hash.blockhash2,
923 self.norm_hash.len_blockhash2,
924 &self.rle_block2,
925 );
926 }
927
928 pub fn to_raw_form(&self) -> fuzzy_raw_type!(S1, S2) {
934 let mut hash = FuzzyHashData::new();
935 self.into_mut_raw_form(&mut hash);
936 hash
937 }
938
939 #[inline(always)]
944 pub fn to_normalized(&self) -> fuzzy_norm_type!(S1, S2) {
945 self.norm_hash
946 }
947
948 #[cfg(feature = "alloc")]
953 pub fn to_normalized_string(&self) -> String {
954 self.norm_hash.to_string()
955 }
956
957 #[cfg(feature = "alloc")]
962 pub fn to_raw_form_string(&self) -> String {
963 self.to_raw_form().to_string()
964 }
965
966 #[inline(always)]
968 fn from_bytes_with_last_index_internal(
969 str: &[u8],
970 index: &mut usize,
971 ) -> Result<Self, ParseError> {
972 use crate::internals::hash::{
973 algorithms, hash_from_bytes_with_last_index_internal_template,
974 };
975 use crate::internals::hash_dual::algorithms::update_rle_block;
976 let mut fuzzy = Self::new();
977 hash_from_bytes_with_last_index_internal_template! {
978 str, index, true,
979 fuzzy.norm_hash.log_blocksize,
980 { let mut rle_offset = 0; },
981 #[inline(always)] |pos, len| rle_offset = update_rle_block(
982 &mut fuzzy.rle_block1, rle_offset, pos + block_hash::MAX_SEQUENCE_SIZE - 1, len),
983 fuzzy.norm_hash.blockhash1, fuzzy.norm_hash.len_blockhash1,
984 { let mut rle_offset = 0; },
985 #[inline(always)] |pos, len| rle_offset = update_rle_block(
986 &mut fuzzy.rle_block2, rle_offset, pos + block_hash::MAX_SEQUENCE_SIZE - 1, len),
987 fuzzy.norm_hash.blockhash2, fuzzy.norm_hash.len_blockhash2
988 }
989 Ok(fuzzy)
990 }
991
992 pub fn from_bytes_with_last_index(str: &[u8], index: &mut usize) -> Result<Self, ParseError> {
1005 Self::from_bytes_with_last_index_internal(str, index)
1006 }
1007
1008 pub fn from_bytes(str: &[u8]) -> Result<Self, ParseError> {
1014 Self::from_bytes_with_last_index_internal(str, &mut 0usize)
1015 }
1016
1017 pub fn normalize_in_place(&mut self) {
1025 self.rle_block1 = [rle_encoding::TERMINATOR; C1];
1026 self.rle_block2 = [rle_encoding::TERMINATOR; C2];
1027 }
1028
1029 pub fn is_normalized(&self) -> bool {
1031 self.rle_block1[0] == rle_encoding::TERMINATOR
1032 && self.rle_block2[0] == rle_encoding::TERMINATOR
1033 }
1034
1035 pub fn is_valid(&self) -> bool {
1057 self.norm_hash.is_valid()
1058 && algorithms::is_valid_rle_block_for_block_hash(
1059 &self.norm_hash.blockhash1,
1060 &self.rle_block1,
1061 self.norm_hash.len_blockhash1,
1062 )
1063 && algorithms::is_valid_rle_block_for_block_hash(
1064 &self.norm_hash.blockhash2,
1065 &self.rle_block2,
1066 self.norm_hash.len_blockhash2,
1067 )
1068 }
1069}
1070
1071impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1072 AsRef<fuzzy_norm_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
1073where
1074 BlockHashSize<S1>: ConstrainedBlockHashSize,
1075 BlockHashSize<S2>: ConstrainedBlockHashSize,
1076 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1077 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1078 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1079{
1080 #[inline(always)]
1081 fn as_ref(&self) -> &fuzzy_norm_type!(S1, S2) {
1082 &self.norm_hash
1083 }
1084}
1085
1086impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Default
1087 for FuzzyHashDualData<S1, S2, C1, C2>
1088where
1089 BlockHashSize<S1>: ConstrainedBlockHashSize,
1090 BlockHashSize<S2>: ConstrainedBlockHashSize,
1091 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1092 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1093 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1094{
1095 fn default() -> Self {
1096 Self::new()
1097 }
1098}
1099
1100impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> PartialEq
1101 for FuzzyHashDualData<S1, S2, C1, C2>
1102where
1103 BlockHashSize<S1>: ConstrainedBlockHashSize,
1104 BlockHashSize<S2>: ConstrainedBlockHashSize,
1105 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1106 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1107 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1108{
1109 fn eq(&self, other: &Self) -> bool {
1110 self.norm_hash == other.norm_hash
1111 && self.rle_block1 == other.rle_block1
1112 && self.rle_block2 == other.rle_block2
1113 }
1114}
1115
1116impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Eq
1117 for FuzzyHashDualData<S1, S2, C1, C2>
1118where
1119 BlockHashSize<S1>: ConstrainedBlockHashSize,
1120 BlockHashSize<S2>: ConstrainedBlockHashSize,
1121 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1122 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1123 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1124{
1125}
1126
1127impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::hash::Hash
1128 for FuzzyHashDualData<S1, S2, C1, C2>
1129where
1130 BlockHashSize<S1>: ConstrainedBlockHashSize,
1131 BlockHashSize<S2>: ConstrainedBlockHashSize,
1132 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1133 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1134 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1135{
1136 #[inline]
1137 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
1138 self.norm_hash.hash(state);
1139 state.write(&self.rle_block1);
1140 state.write(&self.rle_block2);
1141 }
1142}
1143
1144impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Ord
1145 for FuzzyHashDualData<S1, S2, C1, C2>
1146where
1147 BlockHashSize<S1>: ConstrainedBlockHashSize,
1148 BlockHashSize<S2>: ConstrainedBlockHashSize,
1149 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1150 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1151 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1152{
1153 #[inline]
1154 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
1155 (self.norm_hash, self.rle_block1, self.rle_block2).cmp(&(
1156 other.norm_hash,
1157 other.rle_block1,
1158 other.rle_block2,
1159 ))
1160 }
1161}
1162
1163impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> PartialOrd
1164 for FuzzyHashDualData<S1, S2, C1, C2>
1165where
1166 BlockHashSize<S1>: ConstrainedBlockHashSize,
1167 BlockHashSize<S2>: ConstrainedBlockHashSize,
1168 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1169 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1170 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1171{
1172 #[inline(always)]
1173 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
1174 Some(self.cmp(other))
1175 }
1176}
1177
1178impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::fmt::Debug
1179 for FuzzyHashDualData<S1, S2, C1, C2>
1180where
1181 BlockHashSize<S1>: ConstrainedBlockHashSize,
1182 BlockHashSize<S2>: ConstrainedBlockHashSize,
1183 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1184 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1185 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1186{
1187 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1188 struct DebugBuilderForRLEBlockEntry(u8);
1190 struct DebugBuilderForValidRLEBlock<'a, const N: usize> {
1192 block: &'a [u8; N],
1194 }
1195 struct DebugBuilderForInvalidRLEBlock<'a, const N: usize> {
1197 block: &'a [u8; N],
1199 }
1200 impl<'a, const N: usize> DebugBuilderForValidRLEBlock<'a, N> {
1201 pub fn new(rle_block: &'a [u8; N]) -> Self {
1203 Self { block: rle_block }
1204 }
1205 }
1206 impl<'a, const N: usize> DebugBuilderForInvalidRLEBlock<'a, N> {
1207 pub fn new(rle_block: &'a [u8; N]) -> Self {
1209 Self { block: rle_block }
1210 }
1211 }
1212 impl core::fmt::Debug for DebugBuilderForRLEBlockEntry {
1213 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1214 if self.0 != rle_encoding::TERMINATOR {
1215 let (pos, len) = rle_encoding::decode(self.0);
1216 f.debug_tuple("RLE").field(&pos).field(&len).finish()
1217 } else {
1218 f.debug_tuple("RLENull").finish()
1219 }
1220 }
1221 }
1222 impl<const N: usize> core::fmt::Debug for DebugBuilderForValidRLEBlock<'_, N> {
1223 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1224 f.debug_list()
1225 .entries(
1226 self.block
1227 .iter()
1228 .copied()
1229 .filter(|x| *x != rle_encoding::TERMINATOR)
1230 .map(DebugBuilderForRLEBlockEntry),
1231 )
1232 .finish()
1233 }
1234 }
1235 impl<const N: usize> core::fmt::Debug for DebugBuilderForInvalidRLEBlock<'_, N> {
1236 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1237 f.debug_list()
1240 .entries(self.block.iter().copied().map(DebugBuilderForRLEBlockEntry))
1241 .finish()
1242 }
1243 }
1244
1245 if self.is_valid() {
1247 let buffer1 = self
1249 .norm_hash
1250 .blockhash1
1251 .map(|x| BASE64_TABLE_U8[x as usize]); let buffer2 = self
1253 .norm_hash
1254 .blockhash2
1255 .map(|x| BASE64_TABLE_U8[x as usize]); f.debug_struct("FuzzyHashDualData")
1257 .field("LONG", &(S2 == block_hash::FULL_SIZE))
1258 .field(
1259 "block_size",
1260 &block_size::from_log_internal(self.norm_hash.log_blocksize),
1261 )
1262 .field(
1263 "blockhash1",
1264 &core::str::from_utf8(&buffer1[..self.norm_hash.len_blockhash1 as usize])
1265 .unwrap(),
1266 )
1267 .field(
1268 "blockhash2",
1269 &core::str::from_utf8(&buffer2[..self.norm_hash.len_blockhash2 as usize])
1270 .unwrap(),
1271 )
1272 .field(
1273 "rle_block1",
1274 &(DebugBuilderForValidRLEBlock::new(&self.rle_block1)),
1275 )
1276 .field(
1277 "rle_block2",
1278 &(DebugBuilderForValidRLEBlock::new(&self.rle_block2)),
1279 )
1280 .finish()
1281 } else {
1282 f.debug_struct("FuzzyHashDualData")
1283 .field("ILL_FORMED", &true)
1284 .field("LONG", &(S2 == block_hash::FULL_SIZE))
1285 .field("log_blocksize", &self.norm_hash.log_blocksize)
1286 .field("len_blockhash1", &self.norm_hash.len_blockhash1)
1287 .field("len_blockhash2", &self.norm_hash.len_blockhash2)
1288 .field("blockhash1", &self.norm_hash.blockhash1)
1289 .field("blockhash2", &self.norm_hash.blockhash2)
1290 .field(
1291 "rle_block1",
1292 &(DebugBuilderForInvalidRLEBlock::new(&self.rle_block1)),
1293 )
1294 .field(
1295 "rle_block2",
1296 &(DebugBuilderForInvalidRLEBlock::new(&self.rle_block2)),
1297 )
1298 .finish()
1299 }
1300 }
1301}
1302
1303impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::fmt::Display
1304 for FuzzyHashDualData<S1, S2, C1, C2>
1305where
1306 BlockHashSize<S1>: ConstrainedBlockHashSize,
1307 BlockHashSize<S2>: ConstrainedBlockHashSize,
1308 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1309 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1310 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1311{
1312 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1313 write!(f, "{{{}|{}}}", self.norm_hash, self.to_raw_form())
1314 }
1315}
1316
1317impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::str::FromStr
1318 for FuzzyHashDualData<S1, S2, C1, C2>
1319where
1320 BlockHashSize<S1>: ConstrainedBlockHashSize,
1321 BlockHashSize<S2>: ConstrainedBlockHashSize,
1322 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1323 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1324 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1325{
1326 type Err = ParseError;
1327 #[inline(always)]
1328 fn from_str(s: &str) -> Result<Self, Self::Err> {
1329 Self::from_bytes(s.as_bytes())
1330 }
1331}
1332
1333impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1334 core::convert::From<fuzzy_norm_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
1335where
1336 BlockHashSize<S1>: ConstrainedBlockHashSize,
1337 BlockHashSize<S2>: ConstrainedBlockHashSize,
1338 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1339 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1340 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1341{
1342 #[inline]
1343 fn from(value: fuzzy_norm_type!(S1, S2)) -> Self {
1344 Self::from_normalized(&value)
1345 }
1346}
1347
1348impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1349 core::convert::From<fuzzy_raw_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
1350where
1351 BlockHashSize<S1>: ConstrainedBlockHashSize,
1352 BlockHashSize<S2>: ConstrainedBlockHashSize,
1353 BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1354 ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1355 ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1356{
1357 #[inline]
1358 fn from(value: fuzzy_raw_type!(S1, S2)) -> Self {
1359 Self::from_raw_form(&value)
1360 }
1361}
1362
1363pub type DualFuzzyHash = FuzzyHashDualData<
1373 { block_hash::FULL_SIZE },
1374 { block_hash::HALF_SIZE },
1375 { block_hash::FULL_SIZE / 4 },
1376 { block_hash::HALF_SIZE / 4 },
1377>;
1378
1379pub type LongDualFuzzyHash = FuzzyHashDualData<
1389 { block_hash::FULL_SIZE },
1390 { block_hash::FULL_SIZE },
1391 { block_hash::FULL_SIZE / 4 },
1392 { block_hash::FULL_SIZE / 4 },
1393>;
1394
1395mod tests;