ssdeep/internals/hash/
block.rs

1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: Copyright (C) 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>.
3
4//! Block size handlings and block hash parameters / utilities.
5
6use core::cmp::Ordering;
7
8/// A type to represent relation between two block sizes.
9///
10/// Because the core comparison function can only compare two block hashes
11/// with the same effective block size, we cannot compare two fuzzy hashes
12/// if their block sizes are not near enough.
13///
14/// There are three cases where we can perform actual block hash comparison:
15///
16/// 1. **Equals** ([`NearEq`](Self::NearEq))  
17///    `bs_a == bs_b`
18/// 2. **Less than** ([`NearLt`](Self::NearLt))  
19///    `bs_a < bs_b && bs_a * 2 == bs_b`
20/// 3. **Greater than** ([`NearGt`](Self::NearGt))  
21///    `bs_a > bs_b && bs_a == bs_b * 2`
22///
23/// See also: ["Relations with Block Size" section of `FuzzyHashData`](crate::FuzzyHashData#relations-with-block-size)
24///
25/// This type represents those *near* cases (three variants) and the case which
26/// two fuzzy hashes cannot perform a block hash comparison, the *far* case
27/// (the [`Far`](Self::Far) variant).
28///
29/// A value of this type can be retrieved by using
30/// [`block_size::compare_sizes()`](crate::block_size::compare_sizes()) or
31/// [`FuzzyHashData::compare_block_sizes()`](crate::internals::hash::FuzzyHashData::compare_block_sizes()).
32///
33/// # Compatibility Note
34///
35/// Since the version 0.3, the representation of this enum is no longer
36/// specified as specific representation of this enum is not important.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum BlockSizeRelation {
39    /// Near and less than (i.e. `x < x * 2`).
40    ///
41    /// Two block sizes are *near* and the block hash 2 (one with a larger block
42    /// size) of the left side (of comparison) can be compared with the block
43    /// hash 1 (one with a smaller block size) of the right side.
44    NearLt,
45
46    /// Near and equals (i.e. `x == x`).
47    ///
48    /// Two block sizes are not just *near* but the same.
49    /// We compare both block hashes with the other and take the maximum value
50    /// for the output.
51    NearEq,
52
53    /// Near and greater than (i.e. `x * 2 > x`).
54    ///
55    /// Two block sizes are *near* and the block hash 1 (one with a smaller
56    /// block size) of the left side (of comparison) can be compared with the
57    /// block hash 2 (one with a larger block size) of the right side.
58    NearGt,
59
60    /// Far (e.g. `x` and `x * 4`).
61    ///
62    /// Two block sizes are *far* and a block hash comparison
63    /// cannot be performed.
64    Far,
65}
66
67impl BlockSizeRelation {
68    /// Checks whether a given value denotes one of the three *near* cases.
69    pub fn is_near(&self) -> bool {
70        !matches!(self, BlockSizeRelation::Far)
71    }
72}
73
74/// Utility related to block size part of the fuzzy hash.
75///
76/// See also: ["Block Size" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-size)
77pub mod block_size {
78    use super::*;
79
80    /// The minimum block size of a fuzzy hash.
81    ///
82    /// Any block size generated by ssdeep can be represented as
83    /// [`MIN`] * 2<sup>n</sup> (a power of two where `n` ≧ 0).
84    ///
85    /// This is the smallest valid value of the block size part of a fuzzy hash.
86    pub const MIN: u32 = 3;
87
88    /// The number of valid block sizes.
89    ///
90    /// [`NUM_VALID`] is the smallest value which
91    /// [`MIN`] * 2<sup>n</sup>exceeds [`u32::MAX`] and this value
92    /// itself is not valid as a *base-2 logarithm* form of the block size (in
93    /// fact, this is the *smallest* invalid value) in the fuzzy hash object.
94    ///
95    /// Note that, it *can* however be a valid *effective base-2 logarithm* form
96    /// of the block size of the block hash 2 where the (base) block size (as in
97    /// a fuzzy hash) is the largest valid one.
98    /// Some low level methods may accept this value (in [`u8`]) as a *base-2
99    /// logarithm* form of the block size (explicitly documented in such cases).
100    pub const NUM_VALID: usize = 31;
101
102    /// The range representing the valid *base-2 logarithm* form of the block size
103    /// (used while testing).
104    #[cfg(any(test, doc))]
105    pub(crate) const RANGE_LOG_VALID: core::ops::Range<u8> = 0..block_size::NUM_VALID as u8;
106
107    /// Checks whether a given block size is valid.
108    #[inline]
109    pub const fn is_valid(block_size: u32) -> bool {
110        (block_size % MIN == 0) && (block_size / MIN).is_power_of_two()
111    }
112
113    /// Checks whether *base-2 logarithm* form of the block size is valid.
114    #[inline(always)]
115    pub const fn is_log_valid(log_block_size: u8) -> bool {
116        log_block_size < NUM_VALID as u8
117    }
118
119    /// The internal implementation of [`from_log_unchecked()`].
120    #[inline(always)]
121    pub(crate) const fn from_log_internal_const(log_block_size: u8) -> u32 {
122        MIN << log_block_size
123    }
124
125    /// The internal implementation of [`from_log_unchecked()`].
126    #[inline(always)]
127    pub(crate) fn from_log_internal(log_block_size: u8) -> u32 {
128        debug_assert!(is_log_valid(log_block_size));
129        from_log_internal_const(log_block_size)
130    }
131
132    /// Converts *base-2 logarithm* form of the block size to the actual one
133    /// without checking validity of the block size.
134    ///
135    /// # Safety
136    ///
137    /// `log_block_size` must be valid.
138    ///
139    /// See also:
140    /// ["Block Size" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-size)
141    #[cfg(feature = "unchecked")]
142    #[allow(unsafe_code)]
143    #[inline(always)]
144    pub unsafe fn from_log_unchecked(log_block_size: u8) -> u32 {
145        from_log_internal(log_block_size)
146    }
147
148    /// Converts *base-2 logarithm* form of the block size to the actual one.
149    ///
150    /// It returns [`None`] if `log_block_size` is not valid.
151    ///
152    /// See also:
153    /// ["Block Size" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-size)
154    #[inline]
155    pub fn from_log(log_block_size: u8) -> Option<u32> {
156        is_log_valid(log_block_size).then(|| from_log_internal(log_block_size))
157    }
158
159    /// Precomputed block size strings.
160    ///
161    /// All valid block sizes are precomputed as raw strings to avoid
162    /// calling [`u32::to_string()`](std::string::ToString::to_string())
163    /// from [`FuzzyHash::to_string()`](crate::internals::hash::FuzzyHash::to_string()).
164    pub(crate) const BLOCK_SIZES_STR: [&str; NUM_VALID] = [
165        "3",
166        "6",
167        "12",
168        "24",
169        "48",
170        "96",
171        "192",
172        "384",
173        "768",
174        "1536",
175        "3072",
176        "6144",
177        "12288",
178        "24576",
179        "49152",
180        "98304",
181        "196608",
182        "393216",
183        "786432",
184        "1572864",
185        "3145728",
186        "6291456",
187        "12582912",
188        "25165824",
189        "50331648",
190        "100663296",
191        "201326592",
192        "402653184",
193        "805306368",
194        "1610612736",
195        "3221225472",
196    ];
197
198    /// Maximum length of the precomputed block size strings.
199    pub(crate) const MAX_BLOCK_SIZE_LEN_IN_CHARS: usize =
200        BLOCK_SIZES_STR[BLOCK_SIZES_STR.len() - 1].len();
201
202    /// The custom constant for a variant of de Bruijn sequence to convert
203    /// all valid block size values into the unique index.
204    ///
205    /// # Internal Notes
206    ///
207    /// It uses a custom variant of de Bruijn sequence for conversion.
208    /// This is a result of a manual search so that we can have unique index
209    /// values for all `(3<<i) for i in 0..31`.  Note that:
210    ///
211    /// *   `block_size::MIN == 3`
212    /// *   `block_size::NUM_VALID == 31`
213    const LOG_DEBRUIJN_CONSTANT: u32 = 0x017713ca;
214
215    /// The function to convert a block size into an index of
216    /// a variant of de Bruijn sequence.
217    #[inline(always)]
218    const fn debruijn_index(block_size: u32) -> usize {
219        (block_size.wrapping_mul(LOG_DEBRUIJN_CONSTANT) >> 27) as usize
220    }
221
222    /// The custom table for a variant of de Bruijn sequence to convert
223    /// all valid block size values into the *base-2 logarithm* form.
224    ///
225    /// The element `[0x1f]` is unused (and assigned an invalid number `0xff`).
226    ///
227    /// See [`LOG_DEBRUIJN_CONSTANT`] for internal notes.
228    #[rustfmt::skip]
229    const LOG_DEBRUIJN_TABLE: [u8; 32] = [
230        0x00, 0x01, 0x02, 0x06, 0x03, 0x0b, 0x07, 0x10,
231        0x04, 0x0e, 0x0c, 0x18, 0x08, 0x15, 0x11, 0x1a,
232        0x1e, 0x05, 0x0a, 0x0f, 0x0d, 0x17, 0x14, 0x19,
233        0x1d, 0x09, 0x16, 0x13, 0x1c, 0x12, 0x1b, 0xff,
234    ];
235
236    /// The internal implementation of [`log_from_valid_unchecked()`].
237    #[inline(always)]
238    pub(crate) fn log_from_valid_internal(block_size: u32) -> u8 {
239        debug_assert!(is_valid(block_size));
240        LOG_DEBRUIJN_TABLE[debruijn_index(block_size)] // grcov-excl-br-line:ARRAY
241    }
242
243    /// Computes the *base-2 logarithm* form of a valid block size
244    /// but do not check whether the block size is valid.
245    ///
246    /// This is the same as computing `n` for a valid block size
247    /// which can be represented as ([`MIN`] * 2<sup>n</sup>) (`0 <= n`).
248    ///
249    /// # Safety
250    ///
251    /// If `block_size` is not valid, the result will be unpredictable.
252    #[cfg(feature = "unchecked")]
253    #[allow(unsafe_code)]
254    #[inline(always)]
255    pub unsafe fn log_from_valid_unchecked(block_size: u32) -> u8 {
256        log_from_valid_internal(block_size)
257    }
258
259    /// Computes the *base-2 logarithm* form of a valid block size.
260    ///
261    /// This is the same as computing `n` for a valid block size
262    /// which can be represented as ([`MIN`] * 2<sup>n</sup>) (`0 <= n`).
263    ///
264    /// # Usage Constraints
265    ///
266    /// *   `block_size` must be valid.
267    #[inline(always)]
268    pub fn log_from_valid(block_size: u32) -> u8 {
269        assert!(is_valid(block_size));
270        log_from_valid_internal(block_size)
271    }
272
273    /// Checks whether two *base-2 logarithm* forms of the block size values
274    /// form a *near* relation (one of the three *near* cases).
275    ///
276    /// Both arguments must be valid.
277    #[inline(always)]
278    pub fn is_near(lhs: u8, rhs: u8) -> bool {
279        debug_assert!(is_log_valid(lhs));
280        debug_assert!(is_log_valid(rhs));
281        // Optimize using u32
282        u32::wrapping_sub(lhs as u32, rhs as u32).wrapping_add(1) <= 2
283    }
284
285    /// Checks whether two *base-2 logarithm* forms of the block size values
286    /// form a [`BlockSizeRelation::NearEq`] relation.
287    ///
288    /// Both arguments must be valid.
289    #[inline(always)]
290    pub fn is_near_eq(lhs: u8, rhs: u8) -> bool {
291        debug_assert!(is_log_valid(lhs));
292        debug_assert!(is_log_valid(rhs));
293        lhs == rhs
294    }
295
296    /// Checks whether two *base-2 logarithm* forms of the block size values
297    /// form a [`BlockSizeRelation::NearLt`] relation.
298    ///
299    /// Both arguments must be valid.
300    #[inline(always)]
301    pub fn is_near_lt(lhs: u8, rhs: u8) -> bool {
302        debug_assert!(is_log_valid(lhs));
303        debug_assert!(is_log_valid(rhs));
304        // Optimize using i32
305        (rhs as i32) - (lhs as i32) == 1
306    }
307
308    /// Checks whether two *base-2 logarithm* forms of the block size values
309    /// form a [`BlockSizeRelation::NearGt`] relation.
310    ///
311    /// Both arguments must be valid.
312    #[inline(always)]
313    pub fn is_near_gt(lhs: u8, rhs: u8) -> bool {
314        is_near_lt(rhs, lhs)
315    }
316
317    /// Compare two *base-2 logarithm* forms of the block size values to
318    /// determine the relation between two block sizes.
319    ///
320    /// The result is the one of the [`BlockSizeRelation`] values, representing
321    /// the relation between two block sizes.
322    ///
323    /// Both arguments must be valid.
324    #[inline(always)]
325    pub fn compare_sizes(lhs: u8, rhs: u8) -> BlockSizeRelation {
326        debug_assert!(is_log_valid(lhs));
327        debug_assert!(is_log_valid(rhs));
328        // Optimize using i32
329        match (lhs as i32) - (rhs as i32) {
330            -1 => BlockSizeRelation::NearLt,
331            0 => BlockSizeRelation::NearEq,
332            1 => BlockSizeRelation::NearGt,
333            _ => BlockSizeRelation::Far,
334        }
335    }
336
337    /// Compares two *base-2 logarithm* forms of the block size values
338    /// for ordering.
339    ///
340    /// Both arguments must be valid.
341    #[inline(always)]
342    pub fn cmp(lhs: u8, rhs: u8) -> Ordering {
343        debug_assert!(is_log_valid(lhs));
344        debug_assert!(is_log_valid(rhs));
345        u8::cmp(&lhs, &rhs)
346    }
347}
348
349/// Utilities related to block hash part of the fuzzy hash.
350///
351/// See also: ["Block Hashes" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-hashes)
352pub mod block_hash {
353    /// The number of alphabets used in the block hash part of a fuzzy hash.
354    ///
355    /// It is same as the number of Base64 alphabets and the block hash part is
356    /// represented as variable number of Base64 alphabets.
357    /// However, ssdeep does not use Base64 encoding
358    /// (since ssdeep generates a 6-bit hash value per "piece").
359    pub const ALPHABET_SIZE: usize = 64;
360
361    /// The maximum size of the block hash.
362    ///
363    /// ssdeep is a fuzzy *hash*.  We should be able to easily interchange
364    /// the hash value and storing 6-bit hash values for all pieces is not useful
365    /// enough.
366    /// This constant limits the number of "pieces" to store in each block hash.
367    ///
368    /// Note that, since ssdeep is not a cryptographic hash and is in variable
369    /// length, it's important to limit the size of the block hash to prevent
370    /// an adversary to generate a number of "pieces" by placing an adversarial
371    /// pattern (that would make the resulting hash huge if the size of the
372    /// block hash is not limited properly).
373    pub const FULL_SIZE: usize = 64;
374
375    /// The half (truncated) size of the block hash.
376    ///
377    /// This is the half of [`FULL_SIZE`].
378    ///
379    /// Normally, the second block hash is truncated to this size.
380    ///
381    /// See also:
382    /// ["Truncation" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#truncation)
383    pub const HALF_SIZE: usize = FULL_SIZE / 2;
384
385    /// The maximum size of the sequence so that the same character can be
386    /// repeated in a normalized block hash.
387    ///
388    /// See also: ["Normalization" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#normalization)
389    pub const MAX_SEQUENCE_SIZE: usize = 3;
390
391    /// The minimum length of the common substring to compute edit distance
392    /// between two block hashes.
393    ///
394    /// To score similarity between two block hashes with the same block size,
395    /// ssdeep expects that two block hashes are similar enough.
396    /// In specific, ssdeep expects that they
397    /// [have a common substring](crate::internals::compare::position_array::BlockHashPositionArrayImpl::has_common_substring())
398    /// of a length [`MIN_LCS_FOR_COMPARISON`] or longer to reduce the
399    /// possibility of false matches by chance.
400    ///
401    /// If we couldn't find such a common substring, the low level block hash
402    /// comparison method returns zero (meaning, not similar as long as
403    /// [two normalized fuzzy hashes are different](crate::internals::hash::FuzzyHashData#fuzzy-hash-comparison)).
404    ///
405    /// Finding such common substrings is a special case of finding a
406    /// [longest common substring (LCS)](https://en.wikipedia.org/wiki/Longest_common_substring).
407    ///
408    /// For instance, those two strings:
409    ///
410    /// *  `+r/kcOpEYXB+0ZJ`
411    /// *  `7ocOpEYXB+0ZF29`
412    ///
413    /// have a common substring `cOpEYXB+0Z` (length 10), long enough
414    /// (≧ [`MIN_LCS_FOR_COMPARISON`]) to compute the edit distance to compute
415    /// the similarity score.
416    ///
417    /// See also: ["Fuzzy Hash Comparison" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#fuzzy-hash-comparison)
418    pub const MIN_LCS_FOR_COMPARISON: usize = 7;
419
420    /// Numeric windows of a block hash, each value representing unique value
421    /// corresponding a substring of length [`MIN_LCS_FOR_COMPARISON`].
422    ///
423    /// An object with this type is created by either of those methods
424    /// (*normalized forms only*):
425    ///
426    /// *   [`FuzzyHashData::block_hash_1_numeric_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_numeric_windows())
427    /// *   [`FuzzyHashData::block_hash_2_numeric_windows()`](crate::internals::hash::FuzzyHashData::block_hash_2_numeric_windows())
428    ///
429    /// Unlike [`block_hash_1_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_windows()) and
430    /// [`block_hash_2_windows()`](crate::internals::hash::FuzzyHashData::block_hash_2_windows()),
431    /// each element of this iterator is a numeric value.
432    ///
433    /// This numeric form has an one-to-one correspondence with the original
434    /// substring (and is compressed).  In the current ssdeep-compatible
435    /// configuration, each value is a 42-bit unsigned integer, generated from
436    /// seven (7) hash characters (consuming 6 bits each).
437    ///
438    /// See also: [`FuzzyHashData::block_hash_1_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_windows())
439    ///
440    /// *Note*:
441    /// 7 equals [`MIN_LCS_FOR_COMPARISON`] and
442    /// 6 equals the base-2 logarithm of [`ALPHABET_SIZE`].
443    pub struct NumericWindows<'a> {
444        /// Remaining block hash portion to compute numeric windows.
445        v: &'a [u8],
446
447        /// The "last" value of the numeric windows iterator
448        /// (an incomplete value when no values are generated yet).
449        ///
450        /// The [`Self::next()`] value can be retrieved by
451        /// [shifting this value](Self::ILOG2_OF_ALPHABETS),
452        /// [masking](Self::MASK) and then adding the first byte of [`Self::v`].
453        hash: u64,
454    }
455
456    /// Numeric windows of a block hash, each value representing unique value
457    /// corresponding a substring of length [`MIN_LCS_FOR_COMPARISON`] *and*
458    /// the block size.
459    ///
460    /// An object with this type is created by either of those methods
461    /// (*normalized forms only*):
462    ///
463    /// *   [`FuzzyHashData::block_hash_1_index_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_index_windows())
464    /// *   [`FuzzyHashData::block_hash_2_index_windows()`](crate::internals::hash::FuzzyHashData::block_hash_2_index_windows())
465    ///
466    /// This is similar to that of [`NumericWindows`] but each numeric value
467    /// *also* contains the *base-2 logarithm* form of the block size
468    /// (at highest bits).
469    ///
470    /// This numeric form has an one-to-one correspondence with the original
471    /// substring plus the block size.  In the current ssdeep-compatible
472    /// configuration, each value is a 47-bit unsigned integer, generated from
473    /// low 42-bit value from [`NumericWindows`] and high 5-bit value from
474    /// the block size.
475    pub struct IndexWindows<'a> {
476        /// Inner [`NumericWindows`] object.
477        inner: NumericWindows<'a>,
478
479        /// The *base-2 logarithm* form of the block size.
480        log_block_size: u8,
481    }
482
483    impl<'a> NumericWindows<'a> {
484        /*
485            TODO:
486            Once MSRV of 1.57 is acceptable, ILOG2_OF_ALPHABETS and MASK
487            can be calculated dynamically.
488            If MSRV of 1.67 is acceptable, its definition will be more natural.
489        */
490
491        /// A Base-2 logarithm of [`ALPHABET_SIZE`].
492        pub(crate) const ILOG2_OF_ALPHABETS: u32 = 6;
493
494        /// The width of a substring (in a numeric form) in bits.
495        pub const BITS: u32 = (MIN_LCS_FOR_COMPARISON as u32) * Self::ILOG2_OF_ALPHABETS;
496
497        /// The mask value corresponding [`BITS`](Self::BITS).
498        pub const MASK: u64 = (1u64 << Self::BITS).wrapping_sub(1);
499
500        /// Creates a new object from an existing block hash.
501        #[inline]
502        pub(crate) fn new(block_hash: &'a [u8]) -> Self {
503            if block_hash.len() < MIN_LCS_FOR_COMPARISON {
504                Self { v: &[], hash: 0 }
505            } else {
506                // grcov-excl-br-start
507                Self {
508                    v: &block_hash[MIN_LCS_FOR_COMPARISON - 1..],
509                    hash: block_hash[..MIN_LCS_FOR_COMPARISON - 1]
510                        .iter()
511                        .enumerate()
512                        .map(|(i, &value)| {
513                            (value as u64)
514                                << (Self::ILOG2_OF_ALPHABETS
515                                    * (MIN_LCS_FOR_COMPARISON - 2 - i) as u32)
516                        })
517                        .fold(
518                            0u64,
519                            #[inline(always)]
520                            |x, y| x | y,
521                        ),
522                }
523                // grcov-excl-br-stop
524            }
525        }
526    }
527
528    impl<'a> IndexWindows<'a> {
529        /// The actual number of bits consumed by the block size.
530        pub(crate) const BLOCK_SIZE_BITS: u32 = 5;
531
532        /// The width of a substring (in a numeric form) in bits.
533        pub const BITS: u32 = NumericWindows::BITS + Self::BLOCK_SIZE_BITS;
534
535        /// The mask value corresponding [`BITS`](Self::BITS).
536        pub const MASK: u64 = (1u64 << Self::BITS).wrapping_sub(1);
537
538        /// Creates a new object from an existing block hash and the block size.
539        #[inline]
540        pub(crate) fn new(block_hash: &'a [u8], log_block_size: u8) -> Self {
541            Self {
542                inner: NumericWindows::new(block_hash),
543                log_block_size,
544            }
545        }
546    }
547
548    impl Iterator for NumericWindows<'_> {
549        type Item = u64;
550
551        #[inline]
552        fn next(&mut self) -> Option<Self::Item> {
553            if let Some((&value, rest)) = self.v.split_first() {
554                self.hash = ((self.hash << Self::ILOG2_OF_ALPHABETS) | (value as u64)) & Self::MASK;
555                self.v = rest;
556                Some(self.hash)
557            } else {
558                None
559            }
560        }
561
562        #[inline]
563        fn size_hint(&self) -> (usize, Option<usize>) {
564            (self.v.len(), Some(self.v.len()))
565        }
566    }
567
568    impl Iterator for IndexWindows<'_> {
569        type Item = u64;
570
571        #[inline(always)]
572        fn next(&mut self) -> Option<Self::Item> {
573            self.inner.next().map(
574                #[inline(always)]
575                |x| (x | ((self.log_block_size as u64) << NumericWindows::BITS)),
576            )
577        }
578
579        #[inline(always)]
580        fn size_hint(&self) -> (usize, Option<usize>) {
581            self.inner.size_hint()
582        }
583    }
584
585    impl ExactSizeIterator for NumericWindows<'_> {
586        #[inline]
587        fn len(&self) -> usize {
588            self.v.len()
589        }
590    }
591
592    impl ExactSizeIterator for IndexWindows<'_> {
593        #[inline(always)]
594        fn len(&self) -> usize {
595            self.inner.len()
596        }
597    }
598
599    #[allow(unsafe_code)]
600    #[cfg(all(feature = "unsafe-guarantee", feature = "unstable"))]
601    unsafe impl core::iter::TrustedLen for NumericWindows<'_> {}
602    #[allow(unsafe_code)]
603    #[cfg(all(feature = "unsafe-guarantee", feature = "unstable"))]
604    unsafe impl core::iter::TrustedLen for IndexWindows<'_> {}
605
606    impl core::iter::FusedIterator for NumericWindows<'_> {}
607    impl core::iter::FusedIterator for IndexWindows<'_> {}
608}
609
610/// A generic type to constrain given block hash size using [`ConstrainedBlockHashSize`].
611pub struct BlockHashSize<const N: usize> {}
612/// A generic type to constrain given two block hash sizes using [`ConstrainedBlockHashSizes`].
613pub struct BlockHashSizes<const S1: usize, const S2: usize> {}
614
615/// Private module to declare sealed block hash constraints.
616mod private {
617    use super::{block_hash, BlockHashSize, BlockHashSizes};
618
619    /// A trait to constrain block hash size.
620    ///
621    /// This type is implemented for [`BlockHashSize`] with following sizes:
622    ///
623    /// *   [`block_hash::FULL_SIZE`]
624    /// *   [`block_hash::HALF_SIZE`]
625    ///
626    /// This is a sealed trait.
627    pub trait SealedBlockHashSize {}
628    impl SealedBlockHashSize for BlockHashSize<{ block_hash::FULL_SIZE }> {}
629    impl SealedBlockHashSize for BlockHashSize<{ block_hash::HALF_SIZE }> {}
630
631    /// A trait to constrain block hash sizes.
632    ///
633    /// This type is implemented for [`BlockHashSizes`] with following sizes:
634    ///
635    /// *   [`block_hash::FULL_SIZE`] and [`block_hash::FULL_SIZE`]
636    /// *   [`block_hash::FULL_SIZE`] and [`block_hash::HALF_SIZE`]
637    ///
638    /// This is a sealed trait.
639    pub trait SealedBlockHashSizes {}
640    impl SealedBlockHashSizes for BlockHashSizes<{ block_hash::FULL_SIZE }, { block_hash::FULL_SIZE }> {}
641    impl SealedBlockHashSizes for BlockHashSizes<{ block_hash::FULL_SIZE }, { block_hash::HALF_SIZE }> {}
642}
643
644/// A sealed trait to constrain block hash size.
645///
646/// This type is implemented for [`BlockHashSize`] with following sizes:
647///
648/// *   [`block_hash::FULL_SIZE`]
649/// *   [`block_hash::HALF_SIZE`]
650pub trait ConstrainedBlockHashSize: private::SealedBlockHashSize {
651    /// The maximum size of a block hash.
652    const SIZE: usize;
653}
654impl<const SZ_BH: usize> ConstrainedBlockHashSize for BlockHashSize<SZ_BH>
655where
656    BlockHashSize<SZ_BH>: private::SealedBlockHashSize,
657{
658    const SIZE: usize = SZ_BH;
659}
660
661/// A sealed trait to constrain block hash sizes.
662///
663/// This type is implemented for [`BlockHashSizes`] with following sizes:
664///
665/// *   [`block_hash::FULL_SIZE`] and [`block_hash::FULL_SIZE`]
666/// *   [`block_hash::FULL_SIZE`] and [`block_hash::HALF_SIZE`]
667pub trait ConstrainedBlockHashSizes: private::SealedBlockHashSizes {
668    /// The maximum size of the block hash 1.
669    const MAX_BLOCK_HASH_SIZE_1: usize;
670    /// The maximum size of the block hash 2.
671    const MAX_BLOCK_HASH_SIZE_2: usize;
672}
673impl<const S1: usize, const S2: usize> ConstrainedBlockHashSizes for BlockHashSizes<S1, S2>
674where
675    BlockHashSizes<S1, S2>: private::SealedBlockHashSizes,
676{
677    const MAX_BLOCK_HASH_SIZE_1: usize = S1;
678    const MAX_BLOCK_HASH_SIZE_2: usize = S2;
679}
680
681/// Constant assertions related to this module.
682#[doc(hidden)]
683#[allow(clippy::unnecessary_cast)]
684mod const_asserts {
685    use static_assertions::{const_assert, const_assert_eq, const_assert_ne};
686
687    use super::{block_hash, block_size};
688
689    // We must restrict alphabet size to number of Base64 alphabets.
690    // It minimizes memory usage of FuzzyHashCompareTarget.
691    const_assert_eq!(block_hash::ALPHABET_SIZE, 64);
692
693    // FULL_SIZE must be even.
694    const_assert!(block_hash::FULL_SIZE % 2 == 0);
695
696    // Compare with original ssdeep constants
697    // fuzzy.h: SPAMSUM_LENGTH
698    const_assert_eq!(block_hash::FULL_SIZE, 64);
699    // fuzzy.c: MIN_BLOCKSIZE
700    const_assert_eq!(block_size::MIN, 3);
701    // fuzzy.c: NUM_BLOCKHASHES
702    const_assert_eq!(block_size::NUM_VALID, 31);
703    // fuzzy.c: (implementation of memcpy_eliminate_sequences)
704    const_assert_eq!(block_hash::MAX_SEQUENCE_SIZE, 3);
705
706    // NUM_VALID + 1 must be a valid u8 value.
707    const_assert_ne!(block_size::NUM_VALID as u8, u8::MAX);
708
709    // MAX_SEQUENCE_SIZE: fits in u32 and safe to add 1 (in either u32 or usize)
710    const_assert!(usize::BITS < 32 || block_hash::MAX_SEQUENCE_SIZE < 0xffff_ffff as usize);
711    const_assert_ne!(block_hash::MAX_SEQUENCE_SIZE, usize::MAX);
712
713    // block_size::NUM_VALID - 1 indicates the largest n so that
714    // (block_size::MIN << n) fits in 32-bits.
715    const_assert!((block_size::MIN as u64) << (block_size::NUM_VALID - 1) <= u32::MAX as u64);
716    const_assert!((block_size::MIN as u64) << block_size::NUM_VALID > u32::MAX as u64);
717
718    // For block_hash::NumericWindows
719    const_assert!(block_hash::MIN_LCS_FOR_COMPARISON > 0);
720
721    // NumericWindows must have a valid bit width.
722    // It must be *less than* 64 because we use bit shift with that width.
723    const_assert!(block_hash::NumericWindows::BITS < u64::BITS);
724
725    // IndexWindows must have a valid total bit width.
726    // It must be *less than* 64 because we use bit shift with that width.
727    const_assert!(block_hash::IndexWindows::BITS < u64::BITS);
728
729    // IndexWindows must have a valid block size bit width.
730    // It must be *less than* 64 because we use bit shift with that width.
731    //
732    // It must also be capable of storing numbers up to NUM_VALID because
733    // IndexWindows represents a window of a block hash
734    // (and NUM_VALID may be valid here).
735    const_assert!(block_hash::IndexWindows::BLOCK_SIZE_BITS < u64::BITS);
736    const_assert!(
737        (block_size::NUM_VALID as u64) < (1 << block_hash::IndexWindows::BLOCK_SIZE_BITS)
738    );
739}
740
741mod tests;