ssdeep/internals/
compare.rs

1// SPDX-License-Identifier: GPL-2.0-or-later
2// SPDX-FileCopyrightText: Copyright Andrew Tridgell <tridge@samba.org> 2002
3// SPDX-FileCopyrightText: Copyright (C) 2017, 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>
4
5//! Fuzzy hash comparison.
6
7use crate::internals::hash::block::{
8    block_hash, block_size, BlockHashSize, BlockHashSizes, BlockSizeRelation,
9    ConstrainedBlockHashSize, ConstrainedBlockHashSizes,
10};
11use crate::internals::hash::{fuzzy_norm_type, FuzzyHashData};
12use crate::internals::hash_dual::{
13    ConstrainedReconstructionBlockSize, FuzzyHashDualData, ReconstructionBlockSize,
14};
15use crate::internals::macros::invariant;
16
17pub mod position_array;
18
19use position_array::{
20    BlockHashPositionArray, BlockHashPositionArrayData, BlockHashPositionArrayImpl,
21    BlockHashPositionArrayImplInternal, BlockHashPositionArrayImplMutInternal,
22    BlockHashPositionArrayMutRef, BlockHashPositionArrayRef,
23};
24
25#[cfg(feature = "unchecked")]
26use position_array::BlockHashPositionArrayImplUnchecked;
27
28/// An efficient position array-based fuzzy hash comparison target.
29///
30/// It can be built from a normalized [`FuzzyHashData`] object and represents
31/// the normalized contents of two block hashes as two position arrays.
32///
33/// Although that this structure is large, it is particularly useful if
34/// you compare many of fuzzy hashes and you can fix one of the operands
35/// (this is usually over 10 times faster than batched `fuzzy_compare` calls
36/// in ssdeep 2.13).  Even if we generate this object each time we compare
37/// two fuzzy hashes, it's usually faster than `fuzzy_compare` in ssdeep 2.13.
38///
39/// In fact, if you just compare two fuzzy hashes in this crate, a temporary
40/// [`FuzzyHashCompareTarget`] object containing two block hashes or
41/// corresponding [half-baked object](BlockHashPositionArray) containing
42/// one block hash, will be created from either side of the comparison.
43///
44/// See also:
45/// *   ["Fuzzy Hash Comparison" section of `FuzzyHashData`](FuzzyHashData#fuzzy-hash-comparison)
46/// *   ["Position Array Representation" section of `BlockHashPositionArrayData`](BlockHashPositionArrayData#position-array-representation)
47///     for the details of internal representation
48///
49/// # Examples
50///
51/// ```rust
52/// // Requires the global allocator to use `Vec` (default on std).
53/// use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
54///
55/// // Brute force comparison
56/// let hashes: Vec<FuzzyHash> = Vec::new();
57/// /* ... add fuzzy hashes to `hashes` ... */
58///
59/// let mut target: FuzzyHashCompareTarget = FuzzyHashCompareTarget::new();
60/// for hash1 in &hashes {
61///     target.init_from(hash1);
62///     for hash2 in &hashes {
63///         let score = target.compare(hash2);
64///         /* ... */
65///     }
66/// }
67/// ```
68#[derive(Clone, Debug)]
69pub struct FuzzyHashCompareTarget {
70    /// The position array representation of block hash 1.
71    ///
72    /// See also:
73    /// *   [`BlockHashPositionArrayData`]
74    /// *   [`BlockHashPositionArrayImpl`]
75    /// *   [`block_hash_1()`](Self::block_hash_1())
76    blockhash1: [u64; block_hash::ALPHABET_SIZE],
77
78    /// The position array representation of block hash 2.
79    ///
80    /// See also:
81    /// *   [`BlockHashPositionArrayData`]
82    /// *   [`BlockHashPositionArrayImpl`]
83    /// *   [`block_hash_2()`](Self::block_hash_2())
84    blockhash2: [u64; block_hash::ALPHABET_SIZE],
85
86    /// Length of the block hash 1 (up to [`block_hash::FULL_SIZE`]).
87    ///
88    /// See also: [`block_hash_1()`](Self::block_hash_1())
89    len_blockhash1: u8,
90
91    /// Length of the block hash 2 (up to [`block_hash::FULL_SIZE`]).
92    ///
93    /// See also: [`block_hash_2()`](Self::block_hash_2())
94    len_blockhash2: u8,
95
96    /// *Base-2 logarithm* form of the actual block size.
97    ///
98    /// See also: ["Block Size" section of `FuzzyHashData`](FuzzyHashData#block-size)
99    log_blocksize: u8,
100}
101
102cfg_if::cfg_if! {
103    if #[cfg(not(feature = "unchecked"))] {
104        /// The return type of [`FuzzyHashCompareTarget::block_hash_1()`] and
105        /// [`FuzzyHashCompareTarget::block_hash_2()`].
106        macro_rules! compare_target_block_hash_pub_impl {
107            ($a: lifetime) => {
108                impl $a + BlockHashPositionArrayImpl
109            };
110        }
111        /// The return type of [`FuzzyHashCompareTarget::block_hash_1_internal()`]
112        /// and [`FuzzyHashCompareTarget::block_hash_2_internal()`].
113        macro_rules! compare_target_block_hash_priv_impl {
114            ($a: lifetime) => {
115                impl $a + BlockHashPositionArrayImpl + BlockHashPositionArrayImplInternal
116            };
117        }
118    } else {
119        /// The return type of [`FuzzyHashCompareTarget::block_hash_1()`] and
120        /// [`FuzzyHashCompareTarget::block_hash_2()`].
121        macro_rules! compare_target_block_hash_pub_impl {
122            ($a: lifetime) => {
123                impl $a + BlockHashPositionArrayImpl + BlockHashPositionArrayImplUnchecked
124            };
125        }
126        /// The return type of [`FuzzyHashCompareTarget::block_hash_1_internal()`]
127        /// and [`FuzzyHashCompareTarget::block_hash_2_internal()`].
128        macro_rules! compare_target_block_hash_priv_impl {
129            ($a: lifetime) => {
130                impl $a + BlockHashPositionArrayImpl + BlockHashPositionArrayImplUnchecked + BlockHashPositionArrayImplInternal
131            };
132        }
133    }
134}
135
136impl FuzzyHashCompareTarget {
137    /// The lower bound (inclusive) of the *base-2 logarithm* form of
138    /// the block size in which the score capping is no longer required.
139    ///
140    /// If `log_block_size` is equal to or larger than this value and `len1` and
141    /// `len2` are at least [`block_hash::MIN_LCS_FOR_COMPARISON`] in size,
142    /// [`Self::score_cap_on_block_hash_comparison`]`(log_block_size, len1, len2)`
143    /// is guaranteed to be `100` or greater.
144    ///
145    /// The score "cap" is computed as
146    /// `(1 << log_block_size) * min(len1, len2)`.
147    /// If this always guaranteed to be `100` or greater,
148    /// capping the score is not longer required.
149    ///
150    /// See also: ["Fuzzy Hash Comparison" section of `FuzzyHashData`](FuzzyHashData#fuzzy-hash-comparison)
151    ///
152    /// # Backgrounds
153    ///
154    /// ## Theorem
155    ///
156    /// For all positive integers `a`, `b` and `c`, `a <= b * c` iff
157    /// `(a + b - 1) / b <= c` (where `ceil(a/b) == (a + b - 1) / b`).
158    ///
159    /// This is proven by Z3 and Coq in the source code:
160    /// *   Z3 + Python:  
161    ///     `dev/prover/compare/blocksize_capping_theorem.py`
162    /// *   Coq:  
163    ///     `dev/prover/compare/blocksize_capping_theorem.v`
164    ///
165    /// ## The Minimum Score Cap
166    ///
167    /// This is expressed as `(1 << log_block_size) * MIN_LCS_FOR_COMPARISON`
168    /// because both block hashes must at least as long as
169    /// [`block_hash::MIN_LCS_FOR_COMPARISON`] to perform edit distance-based
170    /// scoring.
171    ///
172    /// ## Computing the Constant
173    ///
174    /// Applying the theorem above,
175    /// `100 <= (1 << log_block_size) * MIN_LCS_FOR_COMPARISON`
176    /// is equivalent to
177    /// `(100 + MIN_LCS_FOR_COMPARISON - 1) / MIN_LCS_FOR_COMPARISON <= (1 << log_block_size)`.
178    ///
179    /// This leads to the expression to define this constant.
180    pub const LOG_BLOCK_SIZE_CAPPING_BORDER: u8 =
181        ((100 + block_hash::MIN_LCS_FOR_COMPARISON as u64 - 1)
182            / block_hash::MIN_LCS_FOR_COMPARISON as u64)
183            .next_power_of_two()
184            .trailing_zeros() as u8;
185
186    /// Creates a new [`FuzzyHashCompareTarget`] object with empty contents.
187    ///
188    /// This is equivalent to the fuzzy hash string `3::`.
189    #[inline]
190    pub fn new() -> Self {
191        FuzzyHashCompareTarget {
192            blockhash1: [0u64; block_hash::ALPHABET_SIZE],
193            blockhash2: [0u64; block_hash::ALPHABET_SIZE],
194            len_blockhash1: 0,
195            len_blockhash2: 0,
196            log_blocksize: 0,
197        }
198    }
199
200    /// The *base-2 logarithm* form of the comparison target's block size.
201    ///
202    /// See also: ["Block Size" section of `FuzzyHashData`](FuzzyHashData#block-size)
203    #[inline(always)]
204    pub fn log_block_size(&self) -> u8 {
205        self.log_blocksize
206    }
207
208    /// The block size of the comparison target.
209    #[inline]
210    pub fn block_size(&self) -> u32 {
211        block_size::from_log_internal(self.log_blocksize)
212    }
213
214    /// Position array-based representation of the block hash 1.
215    ///
216    /// This is the same as [`block_hash_1()`](Self::block_hash_1()) except that
217    /// it exposes some internals.
218    ///
219    /// See also: [`block_hash_1()`](Self::block_hash_1())
220    ///
221    /// # Examples (Public part)
222    ///
223    /// Because this documentation test is not suitable for a part of the public
224    /// documentation, it is listed here.
225    ///
226    /// ```
227    /// use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
228    /// use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
229    ///
230    /// let target = FuzzyHashCompareTarget::from(str::parse::<FuzzyHash>("3:ABCDEFGHIJKLMNOP:").unwrap());
231    /// let base_bh1:     &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
232    /// let base_bh1_mod: &[u8] = &[1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; // [0] is replaced
233    /// let bh1 = target.block_hash_1();
234    ///
235    /// assert!(bh1.is_valid());                // Should be always true
236    /// assert!(bh1.is_valid_and_normalized()); // Should be always true
237    /// assert_eq!(bh1.len(), base_bh1.len() as u8);
238    /// assert!( bh1.is_equiv(base_bh1));
239    /// assert!(!bh1.is_equiv(base_bh1_mod));
240    /// assert!(!bh1.is_equiv(&[0, 1, 2, 3, 4, 5, 6, 7])); // "ABCDEFGH" (subset)
241    /// assert!( bh1.has_common_substring(&[ 0,  1,  2,  3,  4,  5,  6,  7])); // 0..=6 or 1..=7 matches (enough length)
242    /// assert!(!bh1.has_common_substring(&[10, 11, 12, 13, 14, 15, 16, 17])); // 10..=15 matches but doesn't have enough length
243    /// assert_eq!(bh1.edit_distance(base_bh1), 0);     // edit distance with itself
244    /// assert_eq!(bh1.edit_distance(base_bh1_mod), 2); // replace a character: cost 2
245    /// assert_eq!(bh1.score_strings_raw(base_bh1), 100); // compare with itself
246    /// assert_eq!(bh1.score_strings(base_bh1, 0),   16); // compare with itself, capped (block size 3)
247    ///
248    /// #[cfg(feature = "unchecked")]
249    /// unsafe {
250    ///     use ssdeep::internal_comparison::BlockHashPositionArrayImplUnchecked;
251    ///     // Test unchecked counterparts
252    ///     assert!( bh1.is_equiv_unchecked(base_bh1));
253    ///     assert!(!bh1.is_equiv_unchecked(base_bh1_mod));
254    ///     assert!(!bh1.is_equiv_unchecked(&[0, 1, 2, 3, 4, 5, 6, 7]));
255    ///     assert!( bh1.has_common_substring_unchecked(&[ 0,  1,  2,  3,  4,  5,  6,  7]));
256    ///     assert!(!bh1.has_common_substring_unchecked(&[10, 11, 12, 13, 14, 15, 16, 17]));
257    ///     assert_eq!(bh1.edit_distance_unchecked(base_bh1), 0);
258    ///     assert_eq!(bh1.edit_distance_unchecked(base_bh1_mod), 2);
259    ///     assert_eq!(bh1.score_strings_raw_unchecked(base_bh1), 100);
260    ///     assert_eq!(bh1.score_strings_unchecked(base_bh1, 0),   16);
261    /// }
262    /// ```
263    ///
264    /// # Examples (Private part which should fail)
265    ///
266    /// It allows access to internal functions of [`BlockHashPositionArrayImplInternal`].
267    ///
268    /// In the examples below, it makes sure that they are not
269    /// accessible from outside.
270    ///
271    /// ```compile_fail
272    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
273    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
274    /// # let target = FuzzyHashCompareTarget::from(str::parse::<FuzzyHash>("3:ABCDEFGHIJKLMNOP:").unwrap());
275    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
276    /// let bh1 = target.block_hash_1();
277    /// assert!(bh1.is_equiv_internal(base_bh1));
278    /// ```
279    /// ```compile_fail
280    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
281    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
282    /// # let target = FuzzyHashCompareTarget::from(str::parse::<FuzzyHash>("3:ABCDEFGHIJKLMNOP:").unwrap());
283    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
284    /// let bh1 = target.block_hash_1();
285    /// assert!(bh1.has_common_substring_internal(&[0, 1, 2, 3, 4, 5, 6, 7]));
286    /// ```
287    /// ```compile_fail
288    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
289    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
290    /// # let target = FuzzyHashCompareTarget::from(str::parse::<FuzzyHash>("3:ABCDEFGHIJKLMNOP:").unwrap());
291    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
292    /// let bh1 = target.block_hash_1();
293    /// assert_eq!(bh1.edit_distance_internal(base_bh1), 0);
294    /// ```
295    /// ```compile_fail
296    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
297    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
298    /// # let target = FuzzyHashCompareTarget::from(str::parse::<FuzzyHash>("3:ABCDEFGHIJKLMNOP:").unwrap());
299    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
300    /// let bh1 = target.block_hash_1();
301    /// assert_eq!(bh1.score_strings_raw_internal(base_bh1), 100);
302    /// ```
303    /// ```compile_fail
304    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
305    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
306    /// # let target = FuzzyHashCompareTarget::from(str::parse::<FuzzyHash>("3:ABCDEFGHIJKLMNOP:").unwrap());
307    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
308    /// let bh1 = target.block_hash_1();
309    /// assert_eq!(bh1.score_strings_internal(base_bh1, 0), 16);
310    /// ```
311    #[inline(always)]
312    fn block_hash_1_internal(&self) -> compare_target_block_hash_priv_impl!('_) {
313        BlockHashPositionArrayRef(&self.blockhash1, &self.len_blockhash1)
314    }
315
316    /// Position array-based representation of the block hash 1.
317    ///
318    /// This method provides raw access to the internal efficient block hash
319    /// representation and fast bit-parallel string functions.
320    ///
321    /// You are not recommended to use this unless
322    /// you know the internal details deeply.
323    ///
324    /// The result has the same lifetime as this object and implements
325    /// following traits:
326    ///
327    /// *   [`BlockHashPositionArrayData`]
328    /// *   [`BlockHashPositionArrayImpl`]
329    /// *   [`BlockHashPositionArrayImplUnchecked`]
330    ///     (only if the `unchecked` feature is enabled)
331    #[inline(always)]
332    pub fn block_hash_1(&self) -> compare_target_block_hash_pub_impl!('_) {
333        // Expose a subset of block_hash_1_internal()
334        self.block_hash_1_internal()
335    }
336
337    /// Position array-based representation of the block hash 1.
338    ///
339    /// This is internal only *and* mutable.
340    ///
341    /// See also: [`block_hash_1()`](Self::block_hash_1())
342    ///
343    /// # Examples (That should fail)
344    ///
345    /// In the examples below, it makes sure that they are not
346    /// accessible from outside.
347    ///
348    /// It allows access to internal functions of
349    /// [`BlockHashPositionArrayImplMut`](crate::internals::compare::position_array::BlockHashPositionArrayImplMut)
350    /// and [`BlockHashPositionArrayImplMutInternal`].
351    ///
352    /// ```compile_fail
353    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
354    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
355    /// # let mut target = FuzzyHashCompareTarget::new();
356    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
357    /// let bh1 = target.block_hash_1();
358    /// assert!(bh1.init_from(base_bh1));
359    /// ```
360    /// ```compile_fail
361    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
362    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
363    /// # let mut target = FuzzyHashCompareTarget::new();
364    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
365    /// let bh1 = target.block_hash_1();
366    /// assert!(bh1.set_len_internal(16));
367    /// ```
368    /// ```compile_fail
369    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
370    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
371    /// # let mut target = FuzzyHashCompareTarget::new();
372    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
373    /// let bh1 = target.block_hash_1();
374    /// assert!(bh1.clear_representation_only());
375    /// ```
376    /// ```compile_fail
377    /// # use ssdeep::{FuzzyHash, FuzzyHashCompareTarget};
378    /// # use ssdeep::internal_comparison::{BlockHashPositionArrayData, BlockHashPositionArrayImpl};
379    /// # let mut target = FuzzyHashCompareTarget::new();
380    /// # let base_bh1: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
381    /// let bh1 = target.block_hash_1();
382    /// assert!(bh1.init_from_partial(base_bh1));
383    /// ```
384    #[inline(always)]
385    fn block_hash_1_mut(
386        &mut self,
387    ) -> impl '_ + BlockHashPositionArrayImpl + BlockHashPositionArrayImplMutInternal {
388        BlockHashPositionArrayMutRef(&mut self.blockhash1, &mut self.len_blockhash1)
389    }
390
391    /// Position array-based representation of the block hash 2.
392    ///
393    /// This is the same as [`block_hash_2()`](Self::block_hash_2()) except that
394    /// it exposes some internals.
395    ///
396    /// See also: [`block_hash_1_internal()`](Self::block_hash_1_internal())
397    #[inline(always)]
398    fn block_hash_2_internal(&self) -> compare_target_block_hash_priv_impl!('_) {
399        BlockHashPositionArrayRef(&self.blockhash2, &self.len_blockhash2)
400    }
401
402    /// Position array-based representation of the block hash 2.
403    ///
404    /// See also: [`block_hash_1()`](Self::block_hash_1())
405    #[inline(always)]
406    pub fn block_hash_2(&self) -> compare_target_block_hash_pub_impl!('_) {
407        // Expose a subset of block_hash_2_internal()
408        self.block_hash_2_internal()
409    }
410
411    /// Position array-based representation of the block hash 2.
412    ///
413    /// This is internal only *and* mutable.
414    ///
415    /// See also: [`block_hash_1_mut()`](Self::block_hash_1_mut())
416    #[inline(always)]
417    fn block_hash_2_mut(
418        &mut self,
419    ) -> impl '_ + BlockHashPositionArrayImpl + BlockHashPositionArrayImplMutInternal {
420        BlockHashPositionArrayMutRef(&mut self.blockhash2, &mut self.len_blockhash2)
421    }
422
423    /// Performs full equality checking of the internal structure.
424    ///
425    /// This type intentionally lacks the implementation of [`PartialEq`]
426    /// because of its large size.  However, there's a case where we need to
427    /// compare two comparison targets.
428    ///
429    /// The primary purpose of this is debugging and it compares all internal
430    /// members inside the structure (just like auto-generated
431    /// [`PartialEq::eq()`]).
432    ///
433    /// Note that, despite that it is only relevant to users when the
434    /// `unchecked` feature is enabled but made public without any features
435    /// because this method is not *unsafe* or *unchecked* in any way.
436    pub fn full_eq(&self, other: &Self) -> bool {
437        // The contents of this method is auto-generated by rust-analyzer.
438        self.blockhash1 == other.blockhash1
439            && self.blockhash2 == other.blockhash2
440            && self.len_blockhash1 == other.len_blockhash1
441            && self.len_blockhash2 == other.len_blockhash2
442            && self.log_blocksize == other.log_blocksize
443    }
444
445    /// Initialize the object from a given fuzzy hash
446    /// (without clearing the position arrays).
447    ///
448    /// This method is intended to be used just after clearing the position
449    /// arrays (i.e. just after the initialization).
450    #[inline]
451    fn init_from_partial<const S1: usize, const S2: usize>(
452        &mut self,
453        hash: impl AsRef<fuzzy_norm_type!(S1, S2)>,
454    ) where
455        BlockHashSize<S1>: ConstrainedBlockHashSize,
456        BlockHashSize<S2>: ConstrainedBlockHashSize,
457        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
458    {
459        let hash = hash.as_ref();
460        debug_assert!((hash.len_blockhash1 as usize) <= S1);
461        debug_assert!((hash.len_blockhash2 as usize) <= S2);
462        debug_assert!(block_size::is_log_valid(hash.log_blocksize));
463        self.len_blockhash1 = hash.len_blockhash1;
464        self.len_blockhash2 = hash.len_blockhash2;
465        self.log_blocksize = hash.log_blocksize;
466        // Initialize position arrays based on the original block hashes
467        self.block_hash_1_mut()
468            .init_from_partial(hash.block_hash_1());
469        self.block_hash_2_mut()
470            .init_from_partial(hash.block_hash_2());
471    }
472
473    /// Initialize the object from a given fuzzy hash.
474    #[inline]
475    pub fn init_from<const S1: usize, const S2: usize>(
476        &mut self,
477        hash: impl AsRef<fuzzy_norm_type!(S1, S2)>,
478    ) where
479        BlockHashSize<S1>: ConstrainedBlockHashSize,
480        BlockHashSize<S2>: ConstrainedBlockHashSize,
481        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
482    {
483        self.block_hash_1_mut().clear_representation_only();
484        self.block_hash_2_mut().clear_representation_only();
485        self.init_from_partial(hash);
486    }
487
488    /// Compare whether two fuzzy hashes are equivalent
489    /// (except for their block size).
490    #[inline]
491    fn is_equiv_except_block_size<const S1: usize, const S2: usize>(
492        &self,
493        hash: impl AsRef<fuzzy_norm_type!(S1, S2)>,
494    ) -> bool
495    where
496        BlockHashSize<S1>: ConstrainedBlockHashSize,
497        BlockHashSize<S2>: ConstrainedBlockHashSize,
498        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
499    {
500        let hash = hash.as_ref();
501        self.block_hash_1_internal()
502            .is_equiv_internal(hash.block_hash_1())
503            && self
504                .block_hash_2_internal()
505                .is_equiv_internal(hash.block_hash_2())
506    }
507
508    /// Compare whether two fuzzy hashes are equivalent.
509    #[inline(always)]
510    pub fn is_equiv<const S1: usize, const S2: usize>(
511        &self,
512        hash: impl AsRef<fuzzy_norm_type!(S1, S2)>,
513    ) -> bool
514    where
515        BlockHashSize<S1>: ConstrainedBlockHashSize,
516        BlockHashSize<S2>: ConstrainedBlockHashSize,
517        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
518    {
519        let hash = hash.as_ref();
520        if self.log_blocksize != hash.log_blocksize {
521            return false;
522        }
523        self.is_equiv_except_block_size(hash)
524    }
525
526    /// The internal implementation of [`Self::raw_score_by_edit_distance_unchecked()`].
527    #[inline(always)]
528    fn raw_score_by_edit_distance_internal(
529        len_block_hash_lhs: u8,
530        len_block_hash_rhs: u8,
531        edit_distance: u32,
532    ) -> u32 {
533        // Scale the raw edit distance to a 0 to 100 score (familiar to humans).
534        debug_assert!(len_block_hash_lhs >= block_hash::MIN_LCS_FOR_COMPARISON as u8);
535        debug_assert!(len_block_hash_rhs >= block_hash::MIN_LCS_FOR_COMPARISON as u8);
536        debug_assert!(len_block_hash_lhs <= block_hash::FULL_SIZE as u8);
537        debug_assert!(len_block_hash_rhs <= block_hash::FULL_SIZE as u8);
538        debug_assert!(
539            edit_distance
540                <= (len_block_hash_lhs as u32 + len_block_hash_rhs as u32
541                    - 2 * block_hash::MIN_LCS_FOR_COMPARISON as u32)
542        );
543        // rustc/LLVM cannot prove that
544        // (len_block_hash_lhs + len_block_hash_rhs)
545        //     >= block_hash::MIN_LCS_FOR_COMPARISON * 2.
546        // Place this invariant to avoid division-by-zero checking.
547        invariant!((len_block_hash_lhs as u32 + len_block_hash_rhs as u32) > 0);
548        /*
549            Possible arithmetic operations to check overflow:
550            1.  (block_hash::FULL_SIZE * 2) * block_hash::FULL_SIZE
551            2.  100 * block_hash::FULL_SIZE
552        */
553        let subscore = (edit_distance * block_hash::FULL_SIZE as u32)
554            / (len_block_hash_lhs as u32 + len_block_hash_rhs as u32); // grcov-excl-br-line:DIVZERO
555        100 - (100 * subscore) / block_hash::FULL_SIZE as u32
556    }
557
558    /// Returns the raw score (without capping) based on the lengths of block
559    /// hashes and the edit distance between them.
560    ///
561    /// This method assumes that following constraints are satisfied.
562    ///
563    /// # Compatibility Note
564    ///
565    /// The types of `len_block_hash_lhs` and `len_block_hash_rhs` were
566    /// changed from [`u32`] to [`u8`] on the version 0.3.
567    ///
568    /// # Safety
569    ///
570    /// *   Both `len_block_hash_lhs` and `len_block_hash_rhs` must satisfy:
571    ///     *   Equal to or greater than [`MIN_LCS_FOR_COMPARISON`](crate::block_hash::MIN_LCS_FOR_COMPARISON),
572    ///     *   Equal to or less than [`FULL_SIZE`](crate::block_hash::FULL_SIZE).
573    ///
574    /// *   `edit_distance` must be equal to or less than
575    ///     `len_block_hash_lhs + len_block_hash_rhs - 2 * MIN_LCS_FOR_COMPARISON`.
576    ///
577    ///     This constraint comes from the constraints of the lengths and the
578    ///     fact that we shall have a common substring of the length
579    ///     [`MIN_LCS_FOR_COMPARISON`](crate::block_hash::MIN_LCS_FOR_COMPARISON)
580    ///     to perform an edit distance-based comparison (reducing maximum
581    ///     possible edit distance by `2 * MIN_LCS_FOR_COMPARISON`).
582    ///
583    /// If they are not satisfied, it will at least return a meaningless score
584    /// and will cause a division by zero on the worst case.
585    #[cfg(feature = "unchecked")]
586    #[allow(unsafe_code)]
587    #[inline(always)]
588    pub unsafe fn raw_score_by_edit_distance_unchecked(
589        len_block_hash_lhs: u8,
590        len_block_hash_rhs: u8,
591        edit_distance: u32,
592    ) -> u32 {
593        Self::raw_score_by_edit_distance_internal(
594            len_block_hash_lhs,
595            len_block_hash_rhs,
596            edit_distance,
597        )
598    }
599
600    /// Returns the raw score (without capping) based on the lengths of block
601    /// hashes and the edit distance between them.
602    ///
603    /// This method scales the edit distance to the `0..=100` score familiar to
604    /// humans (`100` means a perfect match, smaller the score, lower
605    /// the similarity).
606    ///
607    /// Note that it doesn't perform any [score capping](Self::score_cap_on_block_hash_comparison())
608    /// (that should be performed on [smaller block sizes](Self::LOG_BLOCK_SIZE_CAPPING_BORDER)).
609    ///
610    /// See also: ["Fuzzy Hash Comparison" section of `FuzzyHashData`](FuzzyHashData#fuzzy-hash-comparison)
611    ///
612    /// # Compatibility Note
613    ///
614    /// The types of `len_block_hash_lhs` and `len_block_hash_rhs` were
615    /// changed from [`u32`] to [`u8`] on the version 0.3.
616    ///
617    /// # Usage Constraints
618    ///
619    /// *   Both `len_block_hash_lhs` and `len_block_hash_rhs` must satisfy:
620    ///     *   Equal to or greater than [`MIN_LCS_FOR_COMPARISON`](crate::block_hash::MIN_LCS_FOR_COMPARISON),
621    ///     *   Equal to or less than [`FULL_SIZE`](crate::block_hash::FULL_SIZE).
622    ///
623    /// *   `edit_distance` must be equal to or less than
624    ///     `len_block_hash_lhs + len_block_hash_rhs - 2 * MIN_LCS_FOR_COMPARISON`.
625    ///
626    ///     This constraint comes from the constraints of the lengths and the
627    ///     fact that we shall have a common substring of the length
628    ///     [`MIN_LCS_FOR_COMPARISON`](crate::block_hash::MIN_LCS_FOR_COMPARISON)
629    ///     to perform an edit distance-based comparison (reducing maximum
630    ///     possible edit distance by `2 * MIN_LCS_FOR_COMPARISON`).
631    ///
632    /// # Useful Property
633    ///
634    /// If all arguments are valid, the return value (the raw score) is
635    /// guaranteed to be greater than zero.  Along with the property of the
636    /// [score capping](Self::score_cap_on_block_hash_comparison()), it means
637    /// that we should have a non-zero score if we can perform an edit
638    /// distance-based comparison.
639    #[inline(always)]
640    pub fn raw_score_by_edit_distance(
641        len_block_hash_lhs: u8,
642        len_block_hash_rhs: u8,
643        edit_distance: u32,
644    ) -> u32 {
645        // Scale the raw edit distance to a 0 to 100 score (familiar to humans).
646        assert!(len_block_hash_lhs >= block_hash::MIN_LCS_FOR_COMPARISON as u8);
647        assert!(len_block_hash_rhs >= block_hash::MIN_LCS_FOR_COMPARISON as u8);
648        assert!(len_block_hash_lhs <= block_hash::FULL_SIZE as u8);
649        assert!(len_block_hash_rhs <= block_hash::FULL_SIZE as u8);
650        assert!(
651            edit_distance
652                <= (len_block_hash_lhs as u32 + len_block_hash_rhs as u32
653                    - 2 * block_hash::MIN_LCS_FOR_COMPARISON as u32)
654        );
655        Self::raw_score_by_edit_distance_internal(
656            len_block_hash_lhs,
657            len_block_hash_rhs,
658            edit_distance,
659        )
660    }
661
662    /// The internal implementation of [`Self::score_cap_on_block_hash_comparison_unchecked()`].
663    #[inline(always)]
664    fn score_cap_on_block_hash_comparison_internal(
665        log_block_size: u8,
666        len_block_hash_lhs: u8,
667        len_block_hash_rhs: u8,
668    ) -> u32 {
669        invariant!(log_block_size < FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER);
670        (1u32 << log_block_size) * u32::min(len_block_hash_lhs as u32, len_block_hash_rhs as u32)
671    }
672
673    /// Returns the "score cap" for a given block size and two block hash
674    /// lengths, assuming that block size and block hash lengths are small
675    /// enough so that no arithmetic overflow will occur.
676    ///
677    /// # Safety
678    ///
679    /// *   `log_block_size` must be less than
680    ///     [`FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER`](Self::LOG_BLOCK_SIZE_CAPPING_BORDER).
681    /// *   Both `len_block_hash_lhs` and `len_block_hash_rhs` must not exceed
682    ///     [`block_hash::FULL_SIZE`].
683    ///
684    /// Otherwise, it may cause an arithmetic overflow and return an
685    /// useless value.
686    #[cfg(feature = "unchecked")]
687    #[allow(unsafe_code)]
688    #[inline(always)]
689    pub unsafe fn score_cap_on_block_hash_comparison_unchecked(
690        log_block_size: u8,
691        len_block_hash_lhs: u8,
692        len_block_hash_rhs: u8,
693    ) -> u32 {
694        Self::score_cap_on_block_hash_comparison_internal(
695            log_block_size,
696            len_block_hash_lhs,
697            len_block_hash_rhs,
698        )
699    }
700
701    /// Returns the "score cap" for a given block size and
702    /// two block hash lengths.
703    ///
704    /// The internal block hash comparison method "caps" the score to prevent
705    /// exaggerating the matches that are not meaningful enough.  This behavior
706    /// depends on the block size (the "cap" gets higher as the block size gets
707    /// higher) and the minimum of block hash lengths.
708    ///
709    /// The result is not always guaranteed to be in `0..=100` but `100` or
710    /// higher means that we don't need any score capping.
711    ///
712    /// If at least one of the arguments `len_block_hash_lhs` and
713    /// `len_block_hash_rhs` are less than
714    /// [`block_hash::MIN_LCS_FOR_COMPARISON`], the result is
715    /// implementation-defined.
716    ///
717    /// If all arguments are valid and `log_block_size` is
718    /// [large enough](Self::LOG_BLOCK_SIZE_CAPPING_BORDER),
719    /// `100` or greater will be returned, meaning that the score capping is
720    /// no longer required.
721    ///
722    /// See also: ["Fuzzy Hash Comparison" section of `FuzzyHashData`](FuzzyHashData#fuzzy-hash-comparison)
723    ///
724    /// # Compatibility Note
725    ///
726    /// While this method is completely safe even if semantically-invalid
727    /// parameters are specified (due to arithmetic properties of internal
728    /// computation and a safety measure in this method), following semantic
729    /// constraints may be added on the future versions:
730    ///
731    /// *   `log_block_size` [must be valid](block_size::is_log_valid())
732    ///     or must be equal to [`block_size::NUM_VALID`] (this value itself is
733    ///     not valid as a block size for a fuzzy hash object but will be valid
734    ///     on this method).
735    /// *   Both `len_block_hash_lhs` and `len_block_hash_rhs` must not exceed
736    ///     [`block_hash::FULL_SIZE`].
737    ///
738    /// If at least one of the arguments `len_block_hash_lhs` and
739    /// `len_block_hash_rhs` are less than
740    /// [`block_hash::MIN_LCS_FOR_COMPARISON`], this is semantically-invalid.
741    /// We haven't determined whether we need to reject those cases but at
742    /// least implementation-defined (may cause a panic in the future).
743    ///
744    /// # Useful Property
745    ///
746    /// If all arguments are valid and both `len_block_hash_lhs` and
747    /// `len_block_hash_rhs` are non-zero, the return value (the score cap) is
748    /// guaranteed to be greater than zero.  Along with the property of the
749    /// [raw scoring](Self::raw_score_by_edit_distance()), it means that we
750    /// should have a non-zero score if we can perform an edit distance-based
751    /// comparison.
752    #[inline(always)]
753    pub fn score_cap_on_block_hash_comparison(
754        log_block_size: u8,
755        len_block_hash_lhs: u8,
756        len_block_hash_rhs: u8,
757    ) -> u32 {
758        // assert!((log_block_size as usize) <= block_size::NUM_VALID);
759        // assert!((len_block_hash_lhs as usize) >= block_hash::MIN_LCS_FOR_COMPARISON);
760        // assert!((len_block_hash_rhs as usize) >= block_hash::MIN_LCS_FOR_COMPARISON);
761        // assert!((len_block_hash_lhs as usize) <= block_hash::FULL_SIZE);
762        // assert!((len_block_hash_rhs as usize) <= block_hash::FULL_SIZE);
763        if log_block_size >= FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER {
764            100
765        } else {
766            Self::score_cap_on_block_hash_comparison_internal(
767                log_block_size,
768                len_block_hash_lhs,
769                len_block_hash_rhs,
770            )
771        }
772    }
773}
774
775impl Default for FuzzyHashCompareTarget {
776    fn default() -> Self {
777        Self::new()
778    }
779}
780
781impl FuzzyHashCompareTarget {
782    /// Performs full validity checking of the internal structure.
783    ///
784    /// The primary purpose of this is debugging and it should always
785    /// return [`true`] unless...
786    ///
787    /// *   There is a bug in this crate, corrupting this structure,
788    /// *   A memory corruption is occurred somewhere else or
789    /// *   An `unsafe` function to construct this object is misused.
790    ///
791    /// Because of its purpose, this method is not designed to be fast.
792    ///
793    /// Note that, despite that it is only relevant to users when the
794    /// `unchecked` feature is enabled but made public without any features
795    /// because this method is not *unsafe* or *unchecked* in any way.
796    pub fn is_valid(&self) -> bool {
797        block_size::is_log_valid(self.log_blocksize)
798            && self.block_hash_1_internal().is_valid_and_normalized()
799            && self.block_hash_2_internal().is_valid_and_normalized()
800    }
801
802    /// The internal implementation of [`Self::compare_unequal_near_eq_unchecked()`].
803    #[inline]
804    fn compare_unequal_near_eq_internal<const S1: usize, const S2: usize>(
805        &self,
806        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
807    ) -> u32
808    where
809        BlockHashSize<S1>: ConstrainedBlockHashSize,
810        BlockHashSize<S2>: ConstrainedBlockHashSize,
811        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
812    {
813        let other = other.as_ref();
814        let (bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
815        debug_assert!(!self.is_equiv(other));
816        debug_assert!(block_size::is_near_eq(bs1, _bs2));
817        u32::max(
818            self.block_hash_1_internal()
819                .score_strings_internal(other.block_hash_1(), bs1),
820            self.block_hash_2_internal()
821                .score_strings_internal(other.block_hash_2(), bs1 + 1),
822        )
823    }
824
825    /// Compare two fuzzy hashes assuming both are different and their
826    /// block sizes have a relation of [`BlockSizeRelation::NearEq`].
827    ///
828    /// # Safety
829    ///
830    /// *   Both fuzzy hashes must be different.
831    /// *   Both fuzzy hashes (`self` and `other`) must have
832    ///     block size relation of [`BlockSizeRelation::NearEq`].
833    ///
834    /// If they are not satisfied, it will return a meaningless score.
835    #[cfg(feature = "unchecked")]
836    #[allow(unsafe_code)]
837    #[inline(always)]
838    pub unsafe fn compare_unequal_near_eq_unchecked<const S1: usize, const S2: usize>(
839        &self,
840        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
841    ) -> u32
842    where
843        BlockHashSize<S1>: ConstrainedBlockHashSize,
844        BlockHashSize<S2>: ConstrainedBlockHashSize,
845        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
846    {
847        self.compare_unequal_near_eq_internal(other)
848    }
849
850    /// *Slow*: Compare two fuzzy hashes assuming both are different and
851    /// their block sizes have a relation of [`BlockSizeRelation::NearEq`].
852    ///
853    /// # Usage Constraints
854    ///
855    /// *   Both fuzzy hashes must be different.
856    /// *   Both fuzzy hashes (`self` and `other`) must have
857    ///     block size relation of [`BlockSizeRelation::NearEq`].
858    ///
859    /// # Performance Consideration
860    ///
861    /// This method's performance is not good enough (because of constraint
862    /// checking).
863    ///
864    /// Use those instead:
865    /// *   [`compare_near_eq()`](Self::compare_near_eq()) (checked)
866    /// *   [`compare_unequal_near_eq_unchecked()`](Self::compare_unequal_near_eq_unchecked())
867    ///     (unchecked)
868    #[inline(always)]
869    pub fn compare_unequal_near_eq<const S1: usize, const S2: usize>(
870        &self,
871        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
872    ) -> u32
873    where
874        BlockHashSize<S1>: ConstrainedBlockHashSize,
875        BlockHashSize<S2>: ConstrainedBlockHashSize,
876        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
877    {
878        let other = other.as_ref();
879        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
880        assert!(!self.is_equiv(other));
881        assert!(block_size::is_near_eq(_bs1, _bs2));
882        self.compare_unequal_near_eq_internal(other)
883    }
884
885    /// The internal implementation of [`Self::compare_near_eq_unchecked()`].
886    #[inline]
887    fn compare_near_eq_internal<const S1: usize, const S2: usize>(
888        &self,
889        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
890    ) -> u32
891    where
892        BlockHashSize<S1>: ConstrainedBlockHashSize,
893        BlockHashSize<S2>: ConstrainedBlockHashSize,
894        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
895    {
896        let other = other.as_ref();
897        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
898        assert!(block_size::is_near_eq(_bs1, _bs2));
899        if self.is_equiv_except_block_size(other) {
900            return 100;
901        }
902        self.compare_unequal_near_eq_internal(other)
903    }
904
905    /// Compare two fuzzy hashes assuming their block sizes have
906    /// a relation of [`BlockSizeRelation::NearEq`].
907    ///
908    /// # Safety
909    ///
910    /// *   Both fuzzy hashes (`self` and `other`) must have
911    ///     block size relation of [`BlockSizeRelation::NearEq`].
912    ///
913    /// If the condition above is not satisfied, it will return
914    /// a meaningless score.
915    #[cfg(feature = "unchecked")]
916    #[allow(unsafe_code)]
917    #[inline(always)]
918    pub unsafe fn compare_near_eq_unchecked<const S1: usize, const S2: usize>(
919        &self,
920        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
921    ) -> u32
922    where
923        BlockHashSize<S1>: ConstrainedBlockHashSize,
924        BlockHashSize<S2>: ConstrainedBlockHashSize,
925        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
926    {
927        self.compare_near_eq_internal(other)
928    }
929
930    /// Compare two fuzzy hashes assuming their block sizes have
931    /// a relation of [`BlockSizeRelation::NearEq`].
932    ///
933    /// # Usage Constraints
934    ///
935    /// *   Both fuzzy hashes (`self` and `other`) must have
936    ///     block size relation of [`BlockSizeRelation::NearEq`].
937    #[inline(always)]
938    pub fn compare_near_eq<const S1: usize, const S2: usize>(
939        &self,
940        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
941    ) -> u32
942    where
943        BlockHashSize<S1>: ConstrainedBlockHashSize,
944        BlockHashSize<S2>: ConstrainedBlockHashSize,
945        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
946    {
947        let other = other.as_ref();
948        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
949        assert!(block_size::is_near_eq(_bs1, _bs2));
950        self.compare_near_eq_internal(other)
951    }
952
953    /// The internal implementation of [`Self::compare_unequal_near_lt_unchecked()`].
954    #[inline(always)]
955    fn compare_unequal_near_lt_internal<const S1: usize, const S2: usize>(
956        &self,
957        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
958    ) -> u32
959    where
960        BlockHashSize<S1>: ConstrainedBlockHashSize,
961        BlockHashSize<S2>: ConstrainedBlockHashSize,
962        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
963    {
964        let other = other.as_ref();
965        let (_bs1, bs2) = (self.log_blocksize, other.log_blocksize);
966        assert!(block_size::is_near_lt(_bs1, bs2));
967        self.block_hash_2_internal()
968            .score_strings_internal(other.block_hash_1(), bs2)
969    }
970
971    /// Compare two fuzzy hashes assuming both are different and their
972    /// block sizes have a relation of [`BlockSizeRelation::NearLt`].
973    ///
974    /// # Safety
975    ///
976    /// *   Both fuzzy hashes (`self` and `other`) must have
977    ///     block size relation of [`BlockSizeRelation::NearLt`].
978    ///
979    /// If they are not satisfied, it will return a meaningless score.
980    #[cfg(feature = "unchecked")]
981    #[allow(unsafe_code)]
982    #[inline(always)]
983    pub unsafe fn compare_unequal_near_lt_unchecked<const S1: usize, const S2: usize>(
984        &self,
985        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
986    ) -> u32
987    where
988        BlockHashSize<S1>: ConstrainedBlockHashSize,
989        BlockHashSize<S2>: ConstrainedBlockHashSize,
990        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
991    {
992        self.compare_unequal_near_lt_internal(other)
993    }
994
995    /// Compare two fuzzy hashes assuming both are different and their
996    /// block sizes have a relation of [`BlockSizeRelation::NearLt`].
997    ///
998    /// # Usage Constraints
999    ///
1000    /// *   Both fuzzy hashes (`self` and `other`) must have
1001    ///     block size relation of [`BlockSizeRelation::NearLt`].
1002    #[inline(always)]
1003    pub fn compare_unequal_near_lt<const S1: usize, const S2: usize>(
1004        &self,
1005        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1006    ) -> u32
1007    where
1008        BlockHashSize<S1>: ConstrainedBlockHashSize,
1009        BlockHashSize<S2>: ConstrainedBlockHashSize,
1010        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1011    {
1012        let other = other.as_ref();
1013        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1014        assert!(block_size::is_near_lt(_bs1, _bs2));
1015        self.compare_unequal_near_lt_internal(other)
1016    }
1017
1018    /// The internal implementation of [`Self::compare_unequal_near_gt_unchecked()`].
1019    #[inline(always)]
1020    fn compare_unequal_near_gt_internal<const S1: usize, const S2: usize>(
1021        &self,
1022        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1023    ) -> u32
1024    where
1025        BlockHashSize<S1>: ConstrainedBlockHashSize,
1026        BlockHashSize<S2>: ConstrainedBlockHashSize,
1027        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1028    {
1029        let other = other.as_ref();
1030        let (bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1031        assert!(block_size::is_near_gt(bs1, _bs2));
1032        self.block_hash_1_internal()
1033            .score_strings_internal(other.block_hash_2(), bs1)
1034    }
1035
1036    /// Compare two fuzzy hashes assuming both are different and their
1037    /// block sizes have a relation of [`BlockSizeRelation::NearGt`].
1038    ///
1039    /// # Safety
1040    ///
1041    /// *   Both fuzzy hashes (`self` and `other`) must have
1042    ///     block size relation of [`BlockSizeRelation::NearGt`].
1043    ///
1044    /// If they are not satisfied, it will return a meaningless score.
1045    #[cfg(feature = "unchecked")]
1046    #[allow(unsafe_code)]
1047    #[inline(always)]
1048    pub unsafe fn compare_unequal_near_gt_unchecked<const S1: usize, const S2: usize>(
1049        &self,
1050        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1051    ) -> u32
1052    where
1053        BlockHashSize<S1>: ConstrainedBlockHashSize,
1054        BlockHashSize<S2>: ConstrainedBlockHashSize,
1055        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1056    {
1057        self.compare_unequal_near_gt_internal(other)
1058    }
1059
1060    /// Compare two fuzzy hashes assuming both are different and their
1061    /// block sizes have a relation of [`BlockSizeRelation::NearGt`].
1062    ///
1063    /// # Usage Constraints
1064    ///
1065    /// *   Both fuzzy hashes (`self` and `other`) must have
1066    ///     block size relation of [`BlockSizeRelation::NearGt`].
1067    #[inline(always)]
1068    pub fn compare_unequal_near_gt<const S1: usize, const S2: usize>(
1069        &self,
1070        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1071    ) -> u32
1072    where
1073        BlockHashSize<S1>: ConstrainedBlockHashSize,
1074        BlockHashSize<S2>: ConstrainedBlockHashSize,
1075        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1076    {
1077        let other = other.as_ref();
1078        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1079        assert!(block_size::is_near_gt(_bs1, _bs2));
1080        self.compare_unequal_near_gt_internal(other)
1081    }
1082
1083    /// The internal implementation of [`Self::compare_unequal_unchecked()`].
1084    #[rustfmt::skip]
1085    #[inline]
1086    fn compare_unequal_internal<const S1: usize, const S2: usize>(
1087        &self,
1088        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1089    ) -> u32
1090    where
1091        BlockHashSize<S1>: ConstrainedBlockHashSize,
1092        BlockHashSize<S2>: ConstrainedBlockHashSize,
1093        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1094    {
1095        let other = other.as_ref();
1096        debug_assert!(!self.is_equiv(other));
1097        match block_size::compare_sizes(self.log_blocksize, other.log_blocksize) { // grcov-excl-br-line:MATCH_ENUM
1098            BlockSizeRelation::Far => 0,
1099            BlockSizeRelation::NearEq => self.compare_unequal_near_eq_internal(other),
1100            BlockSizeRelation::NearLt => self.compare_unequal_near_lt_internal(other),
1101            BlockSizeRelation::NearGt => self.compare_unequal_near_gt_internal(other),
1102        }
1103    }
1104
1105    /// Compare two normalized fuzzy hashes assuming both are different.
1106    ///
1107    /// # Safety
1108    ///
1109    /// *   Both fuzzy hashes (`self` and `other`) must be different.
1110    ///
1111    /// If the condition above is not satisfied, it will return
1112    /// a meaningless score.
1113    #[cfg(feature = "unchecked")]
1114    #[allow(unsafe_code)]
1115    #[inline(always)]
1116    pub unsafe fn compare_unequal_unchecked<const S1: usize, const S2: usize>(
1117        &self,
1118        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1119    ) -> u32
1120    where
1121        BlockHashSize<S1>: ConstrainedBlockHashSize,
1122        BlockHashSize<S2>: ConstrainedBlockHashSize,
1123        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1124    {
1125        self.compare_unequal_internal(other)
1126    }
1127
1128    /// *Slow*: Compare two normalized fuzzy hashes assuming
1129    /// both are different.
1130    ///
1131    /// # Usage Constraints
1132    ///
1133    /// *   Both fuzzy hashes (`self` and `other`) must be different.
1134    ///
1135    /// # Performance Consideration
1136    ///
1137    /// This method's performance is not good enough (because of the constraint
1138    /// checking).
1139    ///
1140    /// Use those instead:
1141    /// *   [`compare()`](Self::compare()) (checked)
1142    /// *   [`compare_unequal_unchecked()`](Self::compare_unequal_unchecked())
1143    ///     (unchecked)
1144    #[inline(always)]
1145    pub fn compare_unequal<const S1: usize, const S2: usize>(
1146        &self,
1147        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1148    ) -> u32
1149    where
1150        BlockHashSize<S1>: ConstrainedBlockHashSize,
1151        BlockHashSize<S2>: ConstrainedBlockHashSize,
1152        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1153    {
1154        let other = other.as_ref();
1155        assert!(!self.is_equiv(other));
1156        self.compare_unequal_internal(other)
1157    }
1158
1159    /// Compares two normalized fuzzy hashes.
1160    #[rustfmt::skip]
1161    #[inline]
1162    pub fn compare<const S1: usize, const S2: usize>(
1163        &self,
1164        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1165    ) -> u32
1166    where
1167        BlockHashSize<S1>: ConstrainedBlockHashSize,
1168        BlockHashSize<S2>: ConstrainedBlockHashSize,
1169        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1170    {
1171        let other = other.as_ref();
1172        match block_size::compare_sizes(self.log_blocksize, other.log_blocksize) { // grcov-excl-br-line:MATCH_ENUM
1173            BlockSizeRelation::Far => 0,
1174            BlockSizeRelation::NearEq => self.compare_near_eq_internal(other),
1175            BlockSizeRelation::NearLt => self.compare_unequal_near_lt_internal(other),
1176            BlockSizeRelation::NearGt => self.compare_unequal_near_gt_internal(other),
1177        }
1178    }
1179
1180    /// The internal implementation of [`Self::is_comparison_candidate_near_eq_unchecked()`].
1181    #[inline]
1182    fn is_comparison_candidate_near_eq_internal<const S1: usize, const S2: usize>(
1183        &self,
1184        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1185    ) -> bool
1186    where
1187        BlockHashSize<S1>: ConstrainedBlockHashSize,
1188        BlockHashSize<S2>: ConstrainedBlockHashSize,
1189        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1190    {
1191        let other = other.as_ref();
1192        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1193        debug_assert!(block_size::is_near_eq(_bs1, _bs2));
1194        self.block_hash_1_internal()
1195            .has_common_substring_internal(other.block_hash_1())
1196            || self
1197                .block_hash_2_internal()
1198                .has_common_substring_internal(other.block_hash_2())
1199    }
1200
1201    /// Tests whether `other` is a candidate for edit distance-based comparison
1202    /// assuming that their block sizes have a relation of
1203    /// [`BlockSizeRelation::NearEq`].
1204    ///
1205    /// See also: [`is_comparison_candidate()`](Self::is_comparison_candidate())
1206    ///
1207    /// # Safety
1208    ///
1209    /// *   Both fuzzy hashes (`self` and `other`) must have
1210    ///     block size relation of [`BlockSizeRelation::NearEq`].
1211    ///
1212    /// If the condition above is not satisfied, it will return
1213    /// a meaningless value.
1214    #[cfg(feature = "unchecked")]
1215    #[allow(unsafe_code)]
1216    #[inline(always)]
1217    pub unsafe fn is_comparison_candidate_near_eq_unchecked<const S1: usize, const S2: usize>(
1218        &self,
1219        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1220    ) -> bool
1221    where
1222        BlockHashSize<S1>: ConstrainedBlockHashSize,
1223        BlockHashSize<S2>: ConstrainedBlockHashSize,
1224        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1225    {
1226        self.is_comparison_candidate_near_eq_internal(other)
1227    }
1228
1229    /// Tests whether `other` is a candidate for edit distance-based comparison
1230    /// assuming that their block sizes have a relation of
1231    /// [`BlockSizeRelation::NearEq`].
1232    ///
1233    /// See also: [`is_comparison_candidate()`](Self::is_comparison_candidate())
1234    ///
1235    /// # Usage Constraints
1236    ///
1237    /// *   Both fuzzy hashes (`self` and `other`) must have
1238    ///     block size relation of [`BlockSizeRelation::NearEq`].
1239    #[inline(always)]
1240    pub fn is_comparison_candidate_near_eq<const S1: usize, const S2: usize>(
1241        &self,
1242        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1243    ) -> bool
1244    where
1245        BlockHashSize<S1>: ConstrainedBlockHashSize,
1246        BlockHashSize<S2>: ConstrainedBlockHashSize,
1247        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1248    {
1249        let other = other.as_ref();
1250        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1251        assert!(block_size::is_near_eq(_bs1, _bs2));
1252        self.is_comparison_candidate_near_eq_internal(other)
1253    }
1254
1255    /// The internal implementation of [`Self::is_comparison_candidate_near_lt_unchecked()`].
1256    #[inline]
1257    fn is_comparison_candidate_near_lt_internal<const S1: usize, const S2: usize>(
1258        &self,
1259        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1260    ) -> bool
1261    where
1262        BlockHashSize<S1>: ConstrainedBlockHashSize,
1263        BlockHashSize<S2>: ConstrainedBlockHashSize,
1264        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1265    {
1266        let other = other.as_ref();
1267        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1268        debug_assert!(block_size::is_near_lt(_bs1, _bs2));
1269        self.block_hash_2_internal()
1270            .has_common_substring_internal(other.block_hash_1())
1271    }
1272
1273    /// Tests whether `other` is a candidate for edit distance-based comparison
1274    /// assuming that their block sizes have a relation of
1275    /// [`BlockSizeRelation::NearLt`].
1276    ///
1277    /// See also: [`is_comparison_candidate()`](Self::is_comparison_candidate())
1278    ///
1279    /// # Safety
1280    ///
1281    /// *   Both fuzzy hashes (`self` and `other`) must have
1282    ///     block size relation of [`BlockSizeRelation::NearLt`].
1283    ///
1284    /// If the condition above is not satisfied, it will return
1285    /// a meaningless value.
1286    #[cfg(feature = "unchecked")]
1287    #[allow(unsafe_code)]
1288    #[inline(always)]
1289    pub unsafe fn is_comparison_candidate_near_lt_unchecked<const S1: usize, const S2: usize>(
1290        &self,
1291        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1292    ) -> bool
1293    where
1294        BlockHashSize<S1>: ConstrainedBlockHashSize,
1295        BlockHashSize<S2>: ConstrainedBlockHashSize,
1296        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1297    {
1298        self.is_comparison_candidate_near_lt_internal(other)
1299    }
1300
1301    /// Tests whether `other` is a candidate for edit distance-based comparison
1302    /// assuming that their block sizes have a relation of
1303    /// [`BlockSizeRelation::NearLt`].
1304    ///
1305    /// See also: [`is_comparison_candidate()`](Self::is_comparison_candidate())
1306    ///
1307    /// # Usage Constraints
1308    ///
1309    /// *   Both fuzzy hashes (`self` and `other`) must have
1310    ///     block size relation of [`BlockSizeRelation::NearLt`].
1311    #[inline(always)]
1312    pub fn is_comparison_candidate_near_lt<const S1: usize, const S2: usize>(
1313        &self,
1314        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1315    ) -> bool
1316    where
1317        BlockHashSize<S1>: ConstrainedBlockHashSize,
1318        BlockHashSize<S2>: ConstrainedBlockHashSize,
1319        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1320    {
1321        let other = other.as_ref();
1322        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1323        assert!(block_size::is_near_lt(_bs1, _bs2));
1324        self.is_comparison_candidate_near_lt_internal(other)
1325    }
1326
1327    /// The internal implementation of [`Self::is_comparison_candidate_near_gt_unchecked()`].
1328    #[inline]
1329    fn is_comparison_candidate_near_gt_internal<const S1: usize, const S2: usize>(
1330        &self,
1331        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1332    ) -> bool
1333    where
1334        BlockHashSize<S1>: ConstrainedBlockHashSize,
1335        BlockHashSize<S2>: ConstrainedBlockHashSize,
1336        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1337    {
1338        let other = other.as_ref();
1339        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1340        debug_assert!(block_size::is_near_gt(_bs1, _bs2));
1341        self.block_hash_1_internal()
1342            .has_common_substring_internal(other.block_hash_2())
1343    }
1344
1345    /// Tests whether `other` is a candidate for edit distance-based comparison
1346    /// assuming that their block sizes have a relation of
1347    /// [`BlockSizeRelation::NearGt`].
1348    ///
1349    /// See also: [`is_comparison_candidate()`](Self::is_comparison_candidate())
1350    ///
1351    /// # Safety
1352    ///
1353    /// *   Both fuzzy hashes (`self` and `other`) must have
1354    ///     block size relation of [`BlockSizeRelation::NearGt`].
1355    ///
1356    /// If the condition above is not satisfied, it will return
1357    /// a meaningless value.
1358    #[cfg(feature = "unchecked")]
1359    #[allow(unsafe_code)]
1360    #[inline(always)]
1361    pub unsafe fn is_comparison_candidate_near_gt_unchecked<const S1: usize, const S2: usize>(
1362        &self,
1363        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1364    ) -> bool
1365    where
1366        BlockHashSize<S1>: ConstrainedBlockHashSize,
1367        BlockHashSize<S2>: ConstrainedBlockHashSize,
1368        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1369    {
1370        self.is_comparison_candidate_near_gt_internal(other)
1371    }
1372
1373    /// Tests whether `other` is a candidate for edit distance-based comparison
1374    /// assuming that their block sizes have a relation of
1375    /// [`BlockSizeRelation::NearGt`].
1376    ///
1377    /// See also: [`is_comparison_candidate()`](Self::is_comparison_candidate())
1378    ///
1379    /// # Usage Constraints
1380    ///
1381    /// *   Both fuzzy hashes (`self` and `other`) must have
1382    ///     block size relation of [`BlockSizeRelation::NearGt`].
1383    #[inline(always)]
1384    pub fn is_comparison_candidate_near_gt<const S1: usize, const S2: usize>(
1385        &self,
1386        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1387    ) -> bool
1388    where
1389        BlockHashSize<S1>: ConstrainedBlockHashSize,
1390        BlockHashSize<S2>: ConstrainedBlockHashSize,
1391        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1392    {
1393        let other = other.as_ref();
1394        let (_bs1, _bs2) = (self.log_blocksize, other.log_blocksize);
1395        assert!(block_size::is_near_gt(_bs1, _bs2));
1396        self.is_comparison_candidate_near_gt_internal(other)
1397    }
1398
1399    /// Tests whether `other` is a candidate for edit distance-based comparison.
1400    ///
1401    /// If this function returns [`false`] **and** `self` and `other` are not
1402    /// [equivalent](Self::is_equiv()), their similarity will be
1403    /// calculated to 0.
1404    ///
1405    /// # Use Case (Example)
1406    ///
1407    /// This operation is useful to divide a set of *unique* (normalized)
1408    /// fuzzy hashes into smaller distinct sets.  The similarity score can be
1409    /// non-zero if and only if two fuzzy hashes belong to the same set.
1410    ///
1411    /// # Safety (Warning)
1412    ///
1413    /// This function (and its variants) can return [`false`] if `self` and
1414    /// `other` are equivalent (the base fuzzy hash object of `self` and `other`
1415    /// are the same).  In this case, their similarity score is `100`.
1416    ///
1417    /// Because of this, we have to use a set of *unique* fuzzy hash values
1418    /// on the use case above to prevent false-negative matches.
1419    ///
1420    /// See ["Fuzzy Hash Comparison" section of `FuzzyHashData`](FuzzyHashData#fuzzy-hash-comparison)
1421    /// for the reason why we need to care about those cases.
1422    ///
1423    /// # Useful Property
1424    ///
1425    /// If two fuzzy hashes are correctly provided and this method (or its
1426    /// family) returns [`true`], the similarity score is guaranteed to be
1427    /// greater than zero.
1428    ///
1429    /// This property can be used to simplify clustering since we are able to
1430    /// prove that the similarity score of two *different* fuzzy hashes is
1431    /// non-zero if this method (or its family) returns [`true`] (i.e. no actual
1432    /// comparison is required to split clusters on single-linkage clustering).
1433    ///
1434    /// # Advanced Topic: Implementing Equivalents
1435    ///
1436    /// While this method family can be important to preprocessing on
1437    /// single-linkage clustering, it can be inefficient as the number of fuzzy
1438    /// hash increases.
1439    /// On such cases, precomputing useful information to compute "comparison
1440    /// candidate" relations will help.
1441    ///
1442    /// [`FuzzyHashData::block_hash_1_windows()`] and family methods provide
1443    /// window access to block hash windows to enable implementing functionality
1444    /// equivalent to this method family.
1445    #[rustfmt::skip]
1446    #[inline]
1447    pub fn is_comparison_candidate<const S1: usize, const S2: usize>(
1448        &self,
1449        other: impl AsRef<fuzzy_norm_type!(S1, S2)>,
1450    ) -> bool
1451    where
1452        BlockHashSize<S1>: ConstrainedBlockHashSize,
1453        BlockHashSize<S2>: ConstrainedBlockHashSize,
1454        BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1455    {
1456        let other = other.as_ref();
1457        match block_size::compare_sizes(self.log_blocksize, other.log_blocksize) { // grcov-excl-br-line:MATCH_ENUM
1458            BlockSizeRelation::Far => false,
1459            BlockSizeRelation::NearEq => self.is_comparison_candidate_near_eq_internal(other),
1460            BlockSizeRelation::NearLt => self.is_comparison_candidate_near_lt_internal(other),
1461            BlockSizeRelation::NearGt => self.is_comparison_candidate_near_gt_internal(other),
1462        }
1463    }
1464}
1465
1466impl<const S1: usize, const S2: usize> core::convert::From<fuzzy_norm_type!(S1, S2)>
1467    for FuzzyHashCompareTarget
1468where
1469    BlockHashSize<S1>: ConstrainedBlockHashSize,
1470    BlockHashSize<S2>: ConstrainedBlockHashSize,
1471    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1472{
1473    // This "allow(unknown_lints)" is a workaround for Rust -1.62.
1474    // Remove this once MSRV 1.63 is acceptable.
1475    #[allow(unknown_lints)]
1476    #[allow(clippy::needless_borrow)]
1477    #[allow(clippy::needless_borrows_for_generic_args)]
1478    #[inline]
1479    fn from(value: fuzzy_norm_type!(S1, S2)) -> Self {
1480        let mut dest: Self = Self::new();
1481        dest.init_from_partial(&value);
1482        dest
1483    }
1484}
1485
1486impl<const S1: usize, const S2: usize> core::convert::From<&fuzzy_norm_type!(S1, S2)>
1487    for FuzzyHashCompareTarget
1488where
1489    BlockHashSize<S1>: ConstrainedBlockHashSize,
1490    BlockHashSize<S2>: ConstrainedBlockHashSize,
1491    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1492{
1493    #[inline]
1494    fn from(value: &fuzzy_norm_type!(S1, S2)) -> Self {
1495        let mut dest: Self = Self::new();
1496        dest.init_from_partial(value);
1497        dest
1498    }
1499}
1500
1501impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1502    core::convert::From<FuzzyHashDualData<S1, S2, C1, C2>> for FuzzyHashCompareTarget
1503where
1504    BlockHashSize<S1>: ConstrainedBlockHashSize,
1505    BlockHashSize<S2>: ConstrainedBlockHashSize,
1506    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1507    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1508    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1509{
1510    #[allow(clippy::needless_borrow)]
1511    #[inline]
1512    fn from(value: FuzzyHashDualData<S1, S2, C1, C2>) -> Self {
1513        Self::from(value.as_ref())
1514    }
1515}
1516
1517impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1518    core::convert::From<&FuzzyHashDualData<S1, S2, C1, C2>> for FuzzyHashCompareTarget
1519where
1520    BlockHashSize<S1>: ConstrainedBlockHashSize,
1521    BlockHashSize<S2>: ConstrainedBlockHashSize,
1522    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1523    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1524    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1525{
1526    #[allow(clippy::needless_borrow)]
1527    #[inline]
1528    fn from(value: &FuzzyHashDualData<S1, S2, C1, C2>) -> Self {
1529        Self::from(value.as_ref())
1530    }
1531}
1532
1533/// Additional implementation for normalized fuzzy hashes,
1534/// enabling comparison between two fuzzy hashes directly.
1535impl<const S1: usize, const S2: usize> fuzzy_norm_type!(S1, S2)
1536where
1537    BlockHashSize<S1>: ConstrainedBlockHashSize,
1538    BlockHashSize<S2>: ConstrainedBlockHashSize,
1539    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1540{
1541    /// Internal function for [`compare()`](Self::compare()) and
1542    /// [`compare_unequal_internal()`](Self::compare_unequal_internal()) methods.
1543    ///
1544    /// `check_equality` parameter determines whether to perform
1545    /// an equality test (`100` if `self` and `other` are the same).
1546    #[rustfmt::skip]
1547    #[inline(always)]
1548    fn compare_optimized_internal(&self, other: &Self, check_equality: bool) -> u32 {
1549        let rel = block_size::compare_sizes(self.log_blocksize, other.log_blocksize);
1550        match rel { // grcov-excl-br-line:MATCH_ENUM
1551            BlockSizeRelation::NearEq => {
1552                if check_equality && self == other {
1553                    return 100;
1554                }
1555                let target = FuzzyHashCompareTarget::from(self);
1556                target.compare_unequal_near_eq_internal(other)
1557            }
1558            BlockSizeRelation::NearLt => {
1559                let mut target = BlockHashPositionArray::new();
1560                target.init_from_partial(self.block_hash_2());
1561                target.score_strings_internal(other.block_hash_1(), other.log_blocksize)
1562            }
1563            BlockSizeRelation::NearGt => {
1564                let mut target = BlockHashPositionArray::new();
1565                target.init_from_partial(self.block_hash_1());
1566                target.score_strings_internal(other.block_hash_2(), self.log_blocksize)
1567            }
1568            BlockSizeRelation::Far => 0,
1569        }
1570    }
1571
1572    /// Compare two fuzzy hashes and retrieves the similarity score.
1573    #[inline]
1574    pub fn compare(&self, other: impl AsRef<Self>) -> u32 {
1575        let other = other.as_ref();
1576        self.compare_optimized_internal(other, true)
1577    }
1578
1579    /// The internal implementation of [`Self::compare_unequal_unchecked()`].
1580    #[inline]
1581    fn compare_unequal_internal(&self, other: impl AsRef<Self>) -> u32 {
1582        let other = other.as_ref();
1583        debug_assert!(self != other);
1584        self.compare_optimized_internal(other, false)
1585    }
1586
1587    /// Compare two fuzzy hashes assuming both are different.
1588    ///
1589    /// # Safety
1590    ///
1591    /// *   `self` and `other` must be different.
1592    ///
1593    /// If the condition above is not satisfied, it will return
1594    /// a meaningless score.
1595    #[cfg(feature = "unchecked")]
1596    #[allow(unsafe_code)]
1597    #[inline(always)]
1598    pub unsafe fn compare_unequal_unchecked(&self, other: impl AsRef<Self>) -> u32 {
1599        self.compare_unequal_internal(other)
1600    }
1601
1602    /// *Slow*: Compare two fuzzy hashes assuming both are different.
1603    ///
1604    /// # Usage Constraints
1605    ///
1606    /// *   `self` and `other` must be different.
1607    ///
1608    /// # Performance Consideration
1609    ///
1610    /// This method's performance is not good enough (because of constraint
1611    /// checking).
1612    ///
1613    /// Use those instead:
1614    /// *   [`compare()`](Self::compare()) (checked)
1615    /// *   [`compare_unequal_unchecked()`](Self::compare_unequal_unchecked())
1616    ///     (unchecked)
1617    #[inline(always)]
1618    pub fn compare_unequal(&self, other: impl AsRef<Self>) -> u32 {
1619        let other = other.as_ref();
1620        assert!(self != other);
1621        self.compare_unequal_internal(other)
1622    }
1623}
1624
1625/// Constant assertions related to this module
1626#[doc(hidden)]
1627mod const_asserts {
1628    use static_assertions::{const_assert, const_assert_eq};
1629
1630    use super::*;
1631
1632    /// Check whether a given block size requires no score capping.
1633    #[allow(dead_code)] // to avoid false error
1634    const fn is_log_block_size_needs_no_capping(log_block_size: u8) -> bool {
1635        // Test whether score_cap in score_strings method is equal to
1636        // or greater than 100 (meaning, no capping is required).
1637        (100 + block_hash::MIN_LCS_FOR_COMPARISON as u64 - 1)
1638            / block_hash::MIN_LCS_FOR_COMPARISON as u64
1639            <= block_size::from_log_internal_const(log_block_size) as u64 / block_size::MIN as u64
1640    }
1641
1642    // Compare with the precomputed value
1643    // (block_size / block_size::MIN >= 15, log_block_size >= 4 [2^log_block_size >= 16])
1644    const_assert_eq!(FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER, 4);
1645
1646    // Regular tests.
1647    const_assert!(block_size::is_log_valid(
1648        FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER
1649    ));
1650    const_assert!(!is_log_block_size_needs_no_capping(
1651        FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER - 1
1652    ));
1653    const_assert!(is_log_block_size_needs_no_capping(
1654        FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER
1655    ));
1656
1657    // Regular tests (dynamic)
1658    // grcov-excl-tests-start
1659    #[cfg(test)]
1660    #[test]
1661    fn log_block_size_capping_border_is_correct() {
1662        assert!(!is_log_block_size_needs_no_capping(
1663            FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER - 1
1664        ));
1665        assert!(is_log_block_size_needs_no_capping(
1666            FuzzyHashCompareTarget::LOG_BLOCK_SIZE_CAPPING_BORDER
1667        ));
1668    }
1669    // grcov-excl-tests-stop
1670
1671    // Test whether no arithmetic overflow occurs on
1672    // the similarity score computation.
1673    // grcov-excl-tests-start
1674    #[cfg(test)]
1675    #[test]
1676    fn score_arithmetic_causes_no_overflow() {
1677        /*
1678            Possible arithmetic operations to check overflow:
1679            1.  (block_hash::FULL_SIZE * 2) * block_hash::FULL_SIZE
1680            2.  100 * block_hash::FULL_SIZE
1681        */
1682        assert!(u32::try_from(block_hash::FULL_SIZE)
1683            .ok()
1684            .and_then(|x| x.checked_mul(2))
1685            .and_then(|x| x.checked_mul(u32::try_from(block_hash::FULL_SIZE).unwrap()))
1686            .is_some());
1687        assert!(u32::try_from(block_hash::FULL_SIZE)
1688            .ok()
1689            .and_then(|x| x.checked_mul(100))
1690            .is_some());
1691    }
1692    // grcov-excl-tests-stop
1693}
1694
1695mod test_utils;
1696mod tests;