ssdeep/internals/hash/block.rs
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: Copyright (C) 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>.
3
4//! Block size handlings and block hash parameters / utilities.
5
6use core::cmp::Ordering;
7
8/// A type to represent relation between two block sizes.
9///
10/// Because the core comparison function can only compare two block hashes
11/// with the same effective block size, we cannot compare two fuzzy hashes
12/// if their block sizes are not near enough.
13///
14/// There are three cases where we can perform actual block hash comparison:
15///
16/// 1. **Equals** ([`NearEq`](Self::NearEq))
17/// `bs_a == bs_b`
18/// 2. **Less than** ([`NearLt`](Self::NearLt))
19/// `bs_a < bs_b && bs_a * 2 == bs_b`
20/// 3. **Greater than** ([`NearGt`](Self::NearGt))
21/// `bs_a > bs_b && bs_a == bs_b * 2`
22///
23/// See also: ["Relations with Block Size" section of `FuzzyHashData`](crate::FuzzyHashData#relations-with-block-size)
24///
25/// This type represents those *near* cases (three variants) and the case which
26/// two fuzzy hashes cannot perform a block hash comparison, the *far* case
27/// (the [`Far`](Self::Far) variant).
28///
29/// A value of this type can be retrieved by using
30/// [`block_size::compare_sizes()`](crate::block_size::compare_sizes()) or
31/// [`FuzzyHashData::compare_block_sizes()`](crate::internals::hash::FuzzyHashData::compare_block_sizes()).
32///
33/// # Compatibility Note
34///
35/// Since the version 0.3, the representation of this enum is no longer
36/// specified as specific representation of this enum is not important.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum BlockSizeRelation {
39 /// Near and less than (i.e. `x < x * 2`).
40 ///
41 /// Two block sizes are *near* and the block hash 2 (one with a larger block
42 /// size) of the left side (of comparison) can be compared with the block
43 /// hash 1 (one with a smaller block size) of the right side.
44 NearLt,
45
46 /// Near and equals (i.e. `x == x`).
47 ///
48 /// Two block sizes are not just *near* but the same.
49 /// We compare both block hashes with the other and take the maximum value
50 /// for the output.
51 NearEq,
52
53 /// Near and greater than (i.e. `x * 2 > x`).
54 ///
55 /// Two block sizes are *near* and the block hash 1 (one with a smaller
56 /// block size) of the left side (of comparison) can be compared with the
57 /// block hash 2 (one with a larger block size) of the right side.
58 NearGt,
59
60 /// Far (e.g. `x` and `x * 4`).
61 ///
62 /// Two block sizes are *far* and a block hash comparison
63 /// cannot be performed.
64 Far,
65}
66
67impl BlockSizeRelation {
68 /// Checks whether a given value denotes one of the three *near* cases.
69 pub fn is_near(&self) -> bool {
70 !matches!(self, BlockSizeRelation::Far)
71 }
72}
73
74/// Utility related to block size part of the fuzzy hash.
75///
76/// See also: ["Block Size" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-size)
77pub mod block_size {
78 use super::*;
79
80 /// The minimum block size of a fuzzy hash.
81 ///
82 /// Any block size generated by ssdeep can be represented as
83 /// [`MIN`] * 2<sup>n</sup> (a power of two where `n` ≧ 0).
84 ///
85 /// This is the smallest valid value of the block size part of a fuzzy hash.
86 pub const MIN: u32 = 3;
87
88 /// The number of valid block sizes.
89 ///
90 /// [`NUM_VALID`] is the smallest value which
91 /// [`MIN`] * 2<sup>n</sup>exceeds [`u32::MAX`] and this value
92 /// itself is not valid as a *base-2 logarithm* form of the block size (in
93 /// fact, this is the *smallest* invalid value) in the fuzzy hash object.
94 ///
95 /// Note that, it *can* however be a valid *effective base-2 logarithm* form
96 /// of the block size of the block hash 2 where the (base) block size (as in
97 /// a fuzzy hash) is the largest valid one.
98 /// Some low level methods may accept this value (in [`u8`]) as a *base-2
99 /// logarithm* form of the block size (explicitly documented in such cases).
100 pub const NUM_VALID: usize = 31;
101
102 /// The range representing the valid *base-2 logarithm* form of the block size
103 /// (used while testing).
104 #[cfg(any(test, doc))]
105 pub(crate) const RANGE_LOG_VALID: core::ops::Range<u8> = 0..block_size::NUM_VALID as u8;
106
107 /// Checks whether a given block size is valid.
108 #[inline]
109 pub const fn is_valid(block_size: u32) -> bool {
110 (block_size % MIN == 0) && (block_size / MIN).is_power_of_two()
111 }
112
113 /// Checks whether *base-2 logarithm* form of the block size is valid.
114 #[inline(always)]
115 pub const fn is_log_valid(log_block_size: u8) -> bool {
116 log_block_size < NUM_VALID as u8
117 }
118
119 /// The internal implementation of [`from_log_unchecked()`].
120 #[inline(always)]
121 pub(crate) const fn from_log_internal_const(log_block_size: u8) -> u32 {
122 MIN << log_block_size
123 }
124
125 /// The internal implementation of [`from_log_unchecked()`].
126 #[inline(always)]
127 pub(crate) fn from_log_internal(log_block_size: u8) -> u32 {
128 debug_assert!(is_log_valid(log_block_size));
129 from_log_internal_const(log_block_size)
130 }
131
132 /// Converts *base-2 logarithm* form of the block size to the actual one
133 /// without checking validity of the block size.
134 ///
135 /// # Safety
136 ///
137 /// `log_block_size` must be valid.
138 ///
139 /// See also:
140 /// ["Block Size" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-size)
141 #[cfg(feature = "unchecked")]
142 #[allow(unsafe_code)]
143 #[inline(always)]
144 pub unsafe fn from_log_unchecked(log_block_size: u8) -> u32 {
145 from_log_internal(log_block_size)
146 }
147
148 /// Converts *base-2 logarithm* form of the block size to the actual one.
149 ///
150 /// It returns [`None`] if `log_block_size` is not valid.
151 ///
152 /// See also:
153 /// ["Block Size" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-size)
154 #[inline]
155 pub fn from_log(log_block_size: u8) -> Option<u32> {
156 is_log_valid(log_block_size).then(|| from_log_internal(log_block_size))
157 }
158
159 /// Precomputed block size strings.
160 ///
161 /// All valid block sizes are precomputed as raw strings to avoid
162 /// calling [`u32::to_string()`](std::string::ToString::to_string())
163 /// from [`FuzzyHash::to_string()`](crate::internals::hash::FuzzyHash::to_string()).
164 pub(crate) const BLOCK_SIZES_STR: [&str; NUM_VALID] = [
165 "3",
166 "6",
167 "12",
168 "24",
169 "48",
170 "96",
171 "192",
172 "384",
173 "768",
174 "1536",
175 "3072",
176 "6144",
177 "12288",
178 "24576",
179 "49152",
180 "98304",
181 "196608",
182 "393216",
183 "786432",
184 "1572864",
185 "3145728",
186 "6291456",
187 "12582912",
188 "25165824",
189 "50331648",
190 "100663296",
191 "201326592",
192 "402653184",
193 "805306368",
194 "1610612736",
195 "3221225472",
196 ];
197
198 /// Maximum length of the precomputed block size strings.
199 pub(crate) const MAX_BLOCK_SIZE_LEN_IN_CHARS: usize =
200 BLOCK_SIZES_STR[BLOCK_SIZES_STR.len() - 1].len();
201
202 /// The custom constant for a variant of de Bruijn sequence to convert
203 /// all valid block size values into the unique index.
204 ///
205 /// # Internal Notes
206 ///
207 /// It uses a custom variant of de Bruijn sequence for conversion.
208 /// This is a result of a manual search so that we can have unique index
209 /// values for all `(3<<i) for i in 0..31`. Note that:
210 ///
211 /// * `block_size::MIN == 3`
212 /// * `block_size::NUM_VALID == 31`
213 const LOG_DEBRUIJN_CONSTANT: u32 = 0x017713ca;
214
215 /// The function to convert a block size into an index of
216 /// a variant of de Bruijn sequence.
217 #[inline(always)]
218 const fn debruijn_index(block_size: u32) -> usize {
219 (block_size.wrapping_mul(LOG_DEBRUIJN_CONSTANT) >> 27) as usize
220 }
221
222 /// The custom table for a variant of de Bruijn sequence to convert
223 /// all valid block size values into the *base-2 logarithm* form.
224 ///
225 /// The element `[0x1f]` is unused (and assigned an invalid number `0xff`).
226 ///
227 /// See [`LOG_DEBRUIJN_CONSTANT`] for internal notes.
228 #[rustfmt::skip]
229 const LOG_DEBRUIJN_TABLE: [u8; 32] = [
230 0x00, 0x01, 0x02, 0x06, 0x03, 0x0b, 0x07, 0x10,
231 0x04, 0x0e, 0x0c, 0x18, 0x08, 0x15, 0x11, 0x1a,
232 0x1e, 0x05, 0x0a, 0x0f, 0x0d, 0x17, 0x14, 0x19,
233 0x1d, 0x09, 0x16, 0x13, 0x1c, 0x12, 0x1b, 0xff,
234 ];
235
236 /// The internal implementation of [`log_from_valid_unchecked()`].
237 #[inline(always)]
238 pub(crate) fn log_from_valid_internal(block_size: u32) -> u8 {
239 debug_assert!(is_valid(block_size));
240 LOG_DEBRUIJN_TABLE[debruijn_index(block_size)] // grcov-excl-br-line:ARRAY
241 }
242
243 /// Computes the *base-2 logarithm* form of a valid block size
244 /// but do not check whether the block size is valid.
245 ///
246 /// This is the same as computing `n` for a valid block size
247 /// which can be represented as ([`MIN`] * 2<sup>n</sup>) (`0 <= n`).
248 ///
249 /// # Safety
250 ///
251 /// If `block_size` is not valid, the result will be unpredictable.
252 #[cfg(feature = "unchecked")]
253 #[allow(unsafe_code)]
254 #[inline(always)]
255 pub unsafe fn log_from_valid_unchecked(block_size: u32) -> u8 {
256 log_from_valid_internal(block_size)
257 }
258
259 /// Computes the *base-2 logarithm* form of a valid block size.
260 ///
261 /// This is the same as computing `n` for a valid block size
262 /// which can be represented as ([`MIN`] * 2<sup>n</sup>) (`0 <= n`).
263 ///
264 /// # Usage Constraints
265 ///
266 /// * `block_size` must be valid.
267 #[inline(always)]
268 pub fn log_from_valid(block_size: u32) -> u8 {
269 assert!(is_valid(block_size));
270 log_from_valid_internal(block_size)
271 }
272
273 /// Checks whether two *base-2 logarithm* forms of the block size values
274 /// form a *near* relation (one of the three *near* cases).
275 ///
276 /// Both arguments must be valid.
277 #[inline(always)]
278 pub fn is_near(lhs: u8, rhs: u8) -> bool {
279 debug_assert!(is_log_valid(lhs));
280 debug_assert!(is_log_valid(rhs));
281 // Optimize using u32
282 u32::wrapping_sub(lhs as u32, rhs as u32).wrapping_add(1) <= 2
283 }
284
285 /// Checks whether two *base-2 logarithm* forms of the block size values
286 /// form a [`BlockSizeRelation::NearEq`] relation.
287 ///
288 /// Both arguments must be valid.
289 #[inline(always)]
290 pub fn is_near_eq(lhs: u8, rhs: u8) -> bool {
291 debug_assert!(is_log_valid(lhs));
292 debug_assert!(is_log_valid(rhs));
293 lhs == rhs
294 }
295
296 /// Checks whether two *base-2 logarithm* forms of the block size values
297 /// form a [`BlockSizeRelation::NearLt`] relation.
298 ///
299 /// Both arguments must be valid.
300 #[inline(always)]
301 pub fn is_near_lt(lhs: u8, rhs: u8) -> bool {
302 debug_assert!(is_log_valid(lhs));
303 debug_assert!(is_log_valid(rhs));
304 // Optimize using i32
305 (rhs as i32) - (lhs as i32) == 1
306 }
307
308 /// Checks whether two *base-2 logarithm* forms of the block size values
309 /// form a [`BlockSizeRelation::NearGt`] relation.
310 ///
311 /// Both arguments must be valid.
312 #[inline(always)]
313 pub fn is_near_gt(lhs: u8, rhs: u8) -> bool {
314 is_near_lt(rhs, lhs)
315 }
316
317 /// Compare two *base-2 logarithm* forms of the block size values to
318 /// determine the relation between two block sizes.
319 ///
320 /// The result is the one of the [`BlockSizeRelation`] values, representing
321 /// the relation between two block sizes.
322 ///
323 /// Both arguments must be valid.
324 #[inline(always)]
325 pub fn compare_sizes(lhs: u8, rhs: u8) -> BlockSizeRelation {
326 debug_assert!(is_log_valid(lhs));
327 debug_assert!(is_log_valid(rhs));
328 // Optimize using i32
329 match (lhs as i32) - (rhs as i32) {
330 -1 => BlockSizeRelation::NearLt,
331 0 => BlockSizeRelation::NearEq,
332 1 => BlockSizeRelation::NearGt,
333 _ => BlockSizeRelation::Far,
334 }
335 }
336
337 /// Compares two *base-2 logarithm* forms of the block size values
338 /// for ordering.
339 ///
340 /// Both arguments must be valid.
341 #[inline(always)]
342 pub fn cmp(lhs: u8, rhs: u8) -> Ordering {
343 debug_assert!(is_log_valid(lhs));
344 debug_assert!(is_log_valid(rhs));
345 u8::cmp(&lhs, &rhs)
346 }
347}
348
349/// Utilities related to block hash part of the fuzzy hash.
350///
351/// See also: ["Block Hashes" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-hashes)
352pub mod block_hash {
353 /// The number of alphabets used in the block hash part of a fuzzy hash.
354 ///
355 /// It is same as the number of Base64 alphabets and the block hash part is
356 /// represented as variable number of Base64 alphabets.
357 /// However, ssdeep does not use Base64 encoding
358 /// (since ssdeep generates a 6-bit hash value per "piece").
359 pub const ALPHABET_SIZE: usize = 64;
360
361 /// The maximum size of the block hash.
362 ///
363 /// ssdeep is a fuzzy *hash*. We should be able to easily interchange
364 /// the hash value and storing 6-bit hash values for all pieces is not useful
365 /// enough.
366 /// This constant limits the number of "pieces" to store in each block hash.
367 ///
368 /// Note that, since ssdeep is not a cryptographic hash and is in variable
369 /// length, it's important to limit the size of the block hash to prevent
370 /// an adversary to generate a number of "pieces" by placing an adversarial
371 /// pattern (that would make the resulting hash huge if the size of the
372 /// block hash is not limited properly).
373 pub const FULL_SIZE: usize = 64;
374
375 /// The half (truncated) size of the block hash.
376 ///
377 /// This is the half of [`FULL_SIZE`].
378 ///
379 /// Normally, the second block hash is truncated to this size.
380 ///
381 /// See also:
382 /// ["Truncation" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#truncation)
383 pub const HALF_SIZE: usize = FULL_SIZE / 2;
384
385 /// The maximum size of the sequence so that the same character can be
386 /// repeated in a normalized block hash.
387 ///
388 /// See also: ["Normalization" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#normalization)
389 pub const MAX_SEQUENCE_SIZE: usize = 3;
390
391 /// The minimum length of the common substring to compute edit distance
392 /// between two block hashes.
393 ///
394 /// To score similarity between two block hashes with the same block size,
395 /// ssdeep expects that two block hashes are similar enough.
396 /// In specific, ssdeep expects that they
397 /// [have a common substring](crate::internals::compare::position_array::BlockHashPositionArrayImpl::has_common_substring())
398 /// of a length [`MIN_LCS_FOR_COMPARISON`] or longer to reduce the
399 /// possibility of false matches by chance.
400 ///
401 /// If we couldn't find such a common substring, the low level block hash
402 /// comparison method returns zero (meaning, not similar as long as
403 /// [two normalized fuzzy hashes are different](crate::internals::hash::FuzzyHashData#fuzzy-hash-comparison)).
404 ///
405 /// Finding such common substrings is a special case of finding a
406 /// [longest common substring (LCS)](https://en.wikipedia.org/wiki/Longest_common_substring).
407 ///
408 /// For instance, those two strings:
409 ///
410 /// * `+r/kcOpEYXB+0ZJ`
411 /// * `7ocOpEYXB+0ZF29`
412 ///
413 /// have a common substring `cOpEYXB+0Z` (length 10), long enough
414 /// (≧ [`MIN_LCS_FOR_COMPARISON`]) to compute the edit distance to compute
415 /// the similarity score.
416 ///
417 /// See also: ["Fuzzy Hash Comparison" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#fuzzy-hash-comparison)
418 pub const MIN_LCS_FOR_COMPARISON: usize = 7;
419
420 /// Numeric windows of a block hash, each value representing unique value
421 /// corresponding a substring of length [`MIN_LCS_FOR_COMPARISON`].
422 ///
423 /// An object with this type is created by either of those methods
424 /// (*normalized forms only*):
425 ///
426 /// * [`FuzzyHashData::block_hash_1_numeric_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_numeric_windows())
427 /// * [`FuzzyHashData::block_hash_2_numeric_windows()`](crate::internals::hash::FuzzyHashData::block_hash_2_numeric_windows())
428 ///
429 /// Unlike [`block_hash_1_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_windows()) and
430 /// [`block_hash_2_windows()`](crate::internals::hash::FuzzyHashData::block_hash_2_windows()),
431 /// each element of this iterator is a numeric value.
432 ///
433 /// This numeric form has an one-to-one correspondence with the original
434 /// substring (and is compressed). In the current ssdeep-compatible
435 /// configuration, each value is a 42-bit unsigned integer, generated from
436 /// seven (7) hash characters (consuming 6 bits each).
437 ///
438 /// See also: [`FuzzyHashData::block_hash_1_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_windows())
439 ///
440 /// *Note*:
441 /// 7 equals [`MIN_LCS_FOR_COMPARISON`] and
442 /// 6 equals the base-2 logarithm of [`ALPHABET_SIZE`].
443 pub struct NumericWindows<'a> {
444 /// Remaining block hash portion to compute numeric windows.
445 v: &'a [u8],
446
447 /// The "last" value of the numeric windows iterator
448 /// (an incomplete value when no values are generated yet).
449 ///
450 /// The [`Self::next()`] value can be retrieved by
451 /// [shifting this value](Self::ILOG2_OF_ALPHABETS),
452 /// [masking](Self::MASK) and then adding the first byte of [`Self::v`].
453 hash: u64,
454 }
455
456 /// Numeric windows of a block hash, each value representing unique value
457 /// corresponding a substring of length [`MIN_LCS_FOR_COMPARISON`] *and*
458 /// the block size.
459 ///
460 /// An object with this type is created by either of those methods
461 /// (*normalized forms only*):
462 ///
463 /// * [`FuzzyHashData::block_hash_1_index_windows()`](crate::internals::hash::FuzzyHashData::block_hash_1_index_windows())
464 /// * [`FuzzyHashData::block_hash_2_index_windows()`](crate::internals::hash::FuzzyHashData::block_hash_2_index_windows())
465 ///
466 /// This is similar to that of [`NumericWindows`] but each numeric value
467 /// *also* contains the *base-2 logarithm* form of the block size
468 /// (at highest bits).
469 ///
470 /// This numeric form has an one-to-one correspondence with the original
471 /// substring plus the block size. In the current ssdeep-compatible
472 /// configuration, each value is a 47-bit unsigned integer, generated from
473 /// low 42-bit value from [`NumericWindows`] and high 5-bit value from
474 /// the block size.
475 pub struct IndexWindows<'a> {
476 /// Inner [`NumericWindows`] object.
477 inner: NumericWindows<'a>,
478
479 /// The *base-2 logarithm* form of the block size.
480 log_block_size: u8,
481 }
482
483 impl<'a> NumericWindows<'a> {
484 /*
485 TODO:
486 Once MSRV of 1.57 is acceptable, ILOG2_OF_ALPHABETS and MASK
487 can be calculated dynamically.
488 If MSRV of 1.67 is acceptable, its definition will be more natural.
489 */
490
491 /// A Base-2 logarithm of [`ALPHABET_SIZE`].
492 pub(crate) const ILOG2_OF_ALPHABETS: u32 = 6;
493
494 /// The width of a substring (in a numeric form) in bits.
495 pub const BITS: u32 = (MIN_LCS_FOR_COMPARISON as u32) * Self::ILOG2_OF_ALPHABETS;
496
497 /// The mask value corresponding [`BITS`](Self::BITS).
498 pub const MASK: u64 = (1u64 << Self::BITS).wrapping_sub(1);
499
500 /// Creates a new object from an existing block hash.
501 #[inline]
502 pub(crate) fn new(block_hash: &'a [u8]) -> Self {
503 if block_hash.len() < MIN_LCS_FOR_COMPARISON {
504 Self { v: &[], hash: 0 }
505 } else {
506 // grcov-excl-br-start
507 Self {
508 v: &block_hash[MIN_LCS_FOR_COMPARISON - 1..],
509 hash: block_hash[..MIN_LCS_FOR_COMPARISON - 1]
510 .iter()
511 .enumerate()
512 .map(|(i, &value)| {
513 (value as u64)
514 << (Self::ILOG2_OF_ALPHABETS
515 * (MIN_LCS_FOR_COMPARISON - 2 - i) as u32)
516 })
517 .fold(
518 0u64,
519 #[inline(always)]
520 |x, y| x | y,
521 ),
522 }
523 // grcov-excl-br-stop
524 }
525 }
526 }
527
528 impl<'a> IndexWindows<'a> {
529 /// The actual number of bits consumed by the block size.
530 pub(crate) const BLOCK_SIZE_BITS: u32 = 5;
531
532 /// The width of a substring (in a numeric form) in bits.
533 pub const BITS: u32 = NumericWindows::BITS + Self::BLOCK_SIZE_BITS;
534
535 /// The mask value corresponding [`BITS`](Self::BITS).
536 pub const MASK: u64 = (1u64 << Self::BITS).wrapping_sub(1);
537
538 /// Creates a new object from an existing block hash and the block size.
539 #[inline]
540 pub(crate) fn new(block_hash: &'a [u8], log_block_size: u8) -> Self {
541 Self {
542 inner: NumericWindows::new(block_hash),
543 log_block_size,
544 }
545 }
546 }
547
548 impl Iterator for NumericWindows<'_> {
549 type Item = u64;
550
551 #[inline]
552 fn next(&mut self) -> Option<Self::Item> {
553 if let Some((&value, rest)) = self.v.split_first() {
554 self.hash = ((self.hash << Self::ILOG2_OF_ALPHABETS) | (value as u64)) & Self::MASK;
555 self.v = rest;
556 Some(self.hash)
557 } else {
558 None
559 }
560 }
561
562 #[inline]
563 fn size_hint(&self) -> (usize, Option<usize>) {
564 (self.v.len(), Some(self.v.len()))
565 }
566 }
567
568 impl Iterator for IndexWindows<'_> {
569 type Item = u64;
570
571 #[inline(always)]
572 fn next(&mut self) -> Option<Self::Item> {
573 self.inner.next().map(
574 #[inline(always)]
575 |x| (x | ((self.log_block_size as u64) << NumericWindows::BITS)),
576 )
577 }
578
579 #[inline(always)]
580 fn size_hint(&self) -> (usize, Option<usize>) {
581 self.inner.size_hint()
582 }
583 }
584
585 impl ExactSizeIterator for NumericWindows<'_> {
586 #[inline]
587 fn len(&self) -> usize {
588 self.v.len()
589 }
590 }
591
592 impl ExactSizeIterator for IndexWindows<'_> {
593 #[inline(always)]
594 fn len(&self) -> usize {
595 self.inner.len()
596 }
597 }
598
599 #[allow(unsafe_code)]
600 #[cfg(all(feature = "unsafe-guarantee", feature = "unstable"))]
601 unsafe impl core::iter::TrustedLen for NumericWindows<'_> {}
602 #[allow(unsafe_code)]
603 #[cfg(all(feature = "unsafe-guarantee", feature = "unstable"))]
604 unsafe impl core::iter::TrustedLen for IndexWindows<'_> {}
605
606 impl core::iter::FusedIterator for NumericWindows<'_> {}
607 impl core::iter::FusedIterator for IndexWindows<'_> {}
608}
609
610/// A generic type to constrain given block hash size using [`ConstrainedBlockHashSize`].
611pub struct BlockHashSize<const N: usize> {}
612/// A generic type to constrain given two block hash sizes using [`ConstrainedBlockHashSizes`].
613pub struct BlockHashSizes<const S1: usize, const S2: usize> {}
614
615/// Private module to declare sealed block hash constraints.
616mod private {
617 use super::{block_hash, BlockHashSize, BlockHashSizes};
618
619 /// A trait to constrain block hash size.
620 ///
621 /// This type is implemented for [`BlockHashSize`] with following sizes:
622 ///
623 /// * [`block_hash::FULL_SIZE`]
624 /// * [`block_hash::HALF_SIZE`]
625 ///
626 /// This is a sealed trait.
627 pub trait SealedBlockHashSize {}
628 impl SealedBlockHashSize for BlockHashSize<{ block_hash::FULL_SIZE }> {}
629 impl SealedBlockHashSize for BlockHashSize<{ block_hash::HALF_SIZE }> {}
630
631 /// A trait to constrain block hash sizes.
632 ///
633 /// This type is implemented for [`BlockHashSizes`] with following sizes:
634 ///
635 /// * [`block_hash::FULL_SIZE`] and [`block_hash::FULL_SIZE`]
636 /// * [`block_hash::FULL_SIZE`] and [`block_hash::HALF_SIZE`]
637 ///
638 /// This is a sealed trait.
639 pub trait SealedBlockHashSizes {}
640 impl SealedBlockHashSizes for BlockHashSizes<{ block_hash::FULL_SIZE }, { block_hash::FULL_SIZE }> {}
641 impl SealedBlockHashSizes for BlockHashSizes<{ block_hash::FULL_SIZE }, { block_hash::HALF_SIZE }> {}
642}
643
644/// A sealed trait to constrain block hash size.
645///
646/// This type is implemented for [`BlockHashSize`] with following sizes:
647///
648/// * [`block_hash::FULL_SIZE`]
649/// * [`block_hash::HALF_SIZE`]
650pub trait ConstrainedBlockHashSize: private::SealedBlockHashSize {
651 /// The maximum size of a block hash.
652 const SIZE: usize;
653}
654impl<const SZ_BH: usize> ConstrainedBlockHashSize for BlockHashSize<SZ_BH>
655where
656 BlockHashSize<SZ_BH>: private::SealedBlockHashSize,
657{
658 const SIZE: usize = SZ_BH;
659}
660
661/// A sealed trait to constrain block hash sizes.
662///
663/// This type is implemented for [`BlockHashSizes`] with following sizes:
664///
665/// * [`block_hash::FULL_SIZE`] and [`block_hash::FULL_SIZE`]
666/// * [`block_hash::FULL_SIZE`] and [`block_hash::HALF_SIZE`]
667pub trait ConstrainedBlockHashSizes: private::SealedBlockHashSizes {
668 /// The maximum size of the block hash 1.
669 const MAX_BLOCK_HASH_SIZE_1: usize;
670 /// The maximum size of the block hash 2.
671 const MAX_BLOCK_HASH_SIZE_2: usize;
672}
673impl<const S1: usize, const S2: usize> ConstrainedBlockHashSizes for BlockHashSizes<S1, S2>
674where
675 BlockHashSizes<S1, S2>: private::SealedBlockHashSizes,
676{
677 const MAX_BLOCK_HASH_SIZE_1: usize = S1;
678 const MAX_BLOCK_HASH_SIZE_2: usize = S2;
679}
680
681/// Constant assertions related to this module.
682#[doc(hidden)]
683#[allow(clippy::unnecessary_cast)]
684mod const_asserts {
685 use static_assertions::{const_assert, const_assert_eq, const_assert_ne};
686
687 use super::{block_hash, block_size};
688
689 // We must restrict alphabet size to number of Base64 alphabets.
690 // It minimizes memory usage of FuzzyHashCompareTarget.
691 const_assert_eq!(block_hash::ALPHABET_SIZE, 64);
692
693 // FULL_SIZE must be even.
694 const_assert!(block_hash::FULL_SIZE % 2 == 0);
695
696 // Compare with original ssdeep constants
697 // fuzzy.h: SPAMSUM_LENGTH
698 const_assert_eq!(block_hash::FULL_SIZE, 64);
699 // fuzzy.c: MIN_BLOCKSIZE
700 const_assert_eq!(block_size::MIN, 3);
701 // fuzzy.c: NUM_BLOCKHASHES
702 const_assert_eq!(block_size::NUM_VALID, 31);
703 // fuzzy.c: (implementation of memcpy_eliminate_sequences)
704 const_assert_eq!(block_hash::MAX_SEQUENCE_SIZE, 3);
705
706 // NUM_VALID + 1 must be a valid u8 value.
707 const_assert_ne!(block_size::NUM_VALID as u8, u8::MAX);
708
709 // MAX_SEQUENCE_SIZE: fits in u32 and safe to add 1 (in either u32 or usize)
710 const_assert!(usize::BITS < 32 || block_hash::MAX_SEQUENCE_SIZE < 0xffff_ffff as usize);
711 const_assert_ne!(block_hash::MAX_SEQUENCE_SIZE, usize::MAX);
712
713 // block_size::NUM_VALID - 1 indicates the largest n so that
714 // (block_size::MIN << n) fits in 32-bits.
715 const_assert!((block_size::MIN as u64) << (block_size::NUM_VALID - 1) <= u32::MAX as u64);
716 const_assert!((block_size::MIN as u64) << block_size::NUM_VALID > u32::MAX as u64);
717
718 // For block_hash::NumericWindows
719 const_assert!(block_hash::MIN_LCS_FOR_COMPARISON > 0);
720
721 // NumericWindows must have a valid bit width.
722 // It must be *less than* 64 because we use bit shift with that width.
723 const_assert!(block_hash::NumericWindows::BITS < u64::BITS);
724
725 // IndexWindows must have a valid total bit width.
726 // It must be *less than* 64 because we use bit shift with that width.
727 const_assert!(block_hash::IndexWindows::BITS < u64::BITS);
728
729 // IndexWindows must have a valid block size bit width.
730 // It must be *less than* 64 because we use bit shift with that width.
731 //
732 // It must also be capable of storing numbers up to NUM_VALID because
733 // IndexWindows represents a window of a block hash
734 // (and NUM_VALID may be valid here).
735 const_assert!(block_hash::IndexWindows::BLOCK_SIZE_BITS < u64::BITS);
736 const_assert!(
737 (block_size::NUM_VALID as u64) < (1 << block_hash::IndexWindows::BLOCK_SIZE_BITS)
738 );
739}
740
741mod tests;