ssdeep/internals/
hash_dual.rs

1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: Copyright (C) 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>
3
4//! Dual fuzzy hashes effectively containing both normalized and raw data.
5
6#[cfg(feature = "alloc")]
7use alloc::string::String;
8
9use crate::internals::base64::BASE64_TABLE_U8;
10use crate::internals::hash::block::{
11    block_hash, block_size, BlockHashSize, BlockHashSizes, ConstrainedBlockHashSize,
12    ConstrainedBlockHashSizes,
13};
14use crate::internals::hash::parser_state::{
15    BlockHashParseState, ParseError, ParseErrorKind, ParseErrorOrigin,
16};
17use crate::internals::hash::{fuzzy_norm_type, fuzzy_raw_type, FuzzyHashData};
18use crate::internals::intrinsics::unlikely;
19use crate::internals::macros::invariant;
20
21/// An RLE Encoding as used in [`FuzzyHashDualData`].
22///
23/// # Compression Scheme
24///
25/// See: ["Bit Fields" section](Self#bit-fields) below for detailed encoding.
26///
27/// Suppose that we have a block hash `ABCCCCCCDDDDDDDDDDDD`
28/// (`A`, `B` and then 6 `C`s and 12 `D`s).
29///
30/// We also have a normalized block hash.
31///
32/// ```text
33/// Raw:        ABCCCCCCDDDDDDDDDDDD
34/// Normalized: ABCCCDDD
35/// ```
36///
37/// To reconstruct the raw block hash, we store "where and how many
38/// characters to repeat".  That's the idea of classic run-length encoding.
39///
40/// ```text
41/// ABCCC...
42///     |
43///     +--Repeat 3 more times: RLE(pos=4, len=3)
44///
45/// -> ABCCCCCC...
46///         ^^^
47/// ```
48///
49/// The `pos` field is that of the normalized block hash and the *last
50/// character* of the consecutive characters (to prohibit redundant encodings
51/// and to reserve position `0` for [the terminator](rle_encoding::TERMINATOR)).
52///
53/// Due to the limitation of the bit field encoding, we can only repeat
54/// up to 4 characters by one RLE encoding.  On such cases, we use multiple RLE
55/// encodings with the same `pos` field:
56///
57/// ```text
58///     +--Repeat 3 more times: RLE(pos=4, len=3)
59///     |
60/// ABCCCDDD
61///        |
62///        +-- Repeat 9 more times:
63///               RLE(pos=7, len=4)
64///               RLE(pos=7, len=4)
65///               RLE(pos=7, len=1)
66///
67/// -> ABCCCCCCDDDDDDDDDDDD
68///         ^^^   ^^^^----^
69/// ```
70///
71/// For reasons below, lengths larger than 4 are encoded by consecutive 4s and
72/// a remainder.  So, `9` is encoded as `4+4+1`, not `1+4+4`, `3+3+3`
73/// or `2+2+2+2+1`:
74///
75/// *   To avoid issues regarding redundant encodings,
76/// *   To make sure that plain memory comparison is sufficient
77///     to check equivalence and
78/// *   To maximize the compression rate.
79///
80/// # Bit Fields
81///
82/// Current design of the RLE block is basic and compact RLE encoded bytes
83/// each consisting of following bitfields:
84///
85/// *   6 bits of offset (`pos`)
86/// *   2 bits of length (`len`)
87///
88/// 6 bits is enough to store any block hash offset.
89///
90/// This `pos` is the one of a normalized block hash (and must be the last
91/// character offset of the sequence).
92///
93/// Because [`block_hash::MAX_SEQUENCE_SIZE`] is larger than `1`, we can use the
94/// offset zero as the terminator (if the offset is zero, the length must be
95/// encoded as zero, making the RLE block
96/// [zero-terminated](rle_encoding::TERMINATOR)).
97///
98/// 2 bits of length is enough to compress
99/// [`block_hash::MAX_SEQUENCE_SIZE`]` + 1` bytes into one RLE encoding, making
100/// the long sequence able to be compressed in a fixed-size RLE block.
101///
102/// The encoded length is one less than the actual length for efficiency.
103/// For instance, encoded `len` of `0` actually means repeating a character
104/// once (`1` time) to reverse normalization.  Likewise, encoded `1` means
105/// repeating a character twice (`2` times).
106///
107/// 2 bits of length is still small.  If we need to extend a character 5
108/// (`4 + 1`) times or more, we need multiple RLE encodings (with the same
109/// `pos` field).
110mod rle_encoding {
111    /// Bits used to represent the position (offset).
112    ///
113    /// This is the start offset to repeat the same character.
114    ///
115    /// If this field is zero, all succeeding encodings are
116    /// not meant to be used.
117    pub const BITS_POSITION: u32 = 6;
118
119    /// Mask used to represent the position (offset).
120    pub const MASK_POSITION: u8 = (1u8 << BITS_POSITION) - 1;
121
122    /// Bits used to represent the run length.
123    ///
124    /// If this RLE encoding is valid, high bits are used to represent
125    /// `len + 1` because we don't encode zero length.
126    pub const BITS_RUN_LENGTH: u32 = 2;
127
128    /// Maximum run length for the RLE encoding.
129    pub const MAX_RUN_LENGTH: usize = 1usize << BITS_RUN_LENGTH;
130
131    /// The terminator symbol.
132    ///
133    /// A valid RLE block must be terminated by this symbol (and filled
134    /// thereafter).  It can be detected as `pos == 0` after decoding.
135    pub const TERMINATOR: u8 = 0;
136
137    /// Constant assertions related to RLE encoding prerequisites.
138    #[doc(hidden)]
139    #[allow(clippy::int_plus_one)]
140    mod const_asserts {
141        use static_assertions::{const_assert, const_assert_eq, const_assert_ne};
142
143        use crate::internals::hash::block::block_hash;
144
145        use super::*;
146
147        // Basic Constraints
148        const_assert_ne!(BITS_POSITION, 0);
149        const_assert_ne!(BITS_RUN_LENGTH, 0);
150        const_assert_eq!(BITS_POSITION + BITS_RUN_LENGTH, u8::BITS);
151
152        // To use `offset` of zero can be used as the terminator,
153        // MAX_SEQUENCE_SIZE must be larger than 1 (must be at least 2).
154        const_assert!(block_hash::MAX_SEQUENCE_SIZE >= 2);
155
156        // Offset can contain any block hash index
157        const_assert!(block_hash::FULL_SIZE <= (1usize << BITS_POSITION));
158        // Length is large enough to compress MAX_SEQUENCE_SIZE + 1 bytes.
159        const_assert!(block_hash::MAX_SEQUENCE_SIZE + 1 <= MAX_RUN_LENGTH);
160    }
161
162    /// Encode an RLE encoding from a (position, length) pair.
163    #[inline(always)]
164    pub(crate) fn encode(pos: u8, len: u8) -> u8 {
165        debug_assert!(len != 0);
166        debug_assert!(len <= MAX_RUN_LENGTH as u8);
167        debug_assert!(pos != 0);
168        debug_assert!(pos <= MASK_POSITION);
169        pos | ((len - 1) << BITS_POSITION)
170    }
171
172    /// Decode an RLE encoding into a (position, length) pair.
173    #[inline(always)]
174    pub(crate) fn decode(value: u8) -> (u8, u8) {
175        (value & MASK_POSITION, (value >> BITS_POSITION) + 1)
176    }
177
178    // grcov-excl-tests-start
179    #[cfg(test)]
180    #[test]
181    fn decode_terminator() {
182        let (pos, _) = decode(TERMINATOR);
183        assert_eq!(pos, 0);
184    }
185    // grcov-excl-tests-stop
186}
187
188/// A generic type to constrain given block hash size using [`ConstrainedBlockHashSize`].
189///
190/// # Unstable Type
191///
192/// Despite that this type is public, it is strongly discouraged to use this
193/// type because it exposes a part of opaque "reverse normalization" data and
194/// the only reason this type is public is due to restrictions of Rust's
195/// current constant generics.
196///
197/// This type should not be considered stable.
198pub struct ReconstructionBlockSize<const SZ_BH: usize, const SZ_R: usize> {}
199
200/// Private module to declare sealed reconstruction block constraints.
201mod private {
202    use crate::internals::hash::block::block_hash;
203
204    use super::*;
205
206    /// A trait to constrain RLE block size for given block hash size.
207    ///
208    /// This type is implemented for [`ReconstructionBlockSize`]
209    /// with following block hash sizes:
210    ///
211    /// *   [`block_hash::FULL_SIZE`]
212    /// *   [`block_hash::HALF_SIZE`]
213    ///
214    /// This is a sealed trait.
215    pub trait SealedReconstructionBlockSize {}
216
217    /// Template to generate RLE block size constraints
218    /// including constant assertions.
219    macro_rules! rle_size_for_block_hash_template {
220        { $(sizes_def($block_hash_size: expr, $rle_size: expr);)* } => {
221            $(impl SealedReconstructionBlockSize for ReconstructionBlockSize<{$block_hash_size}, {$rle_size}> {})*
222
223            /// Constant assertions related to RLE block size requirements.
224            #[doc(hidden)]
225            mod const_asserts {
226                use static_assertions::const_assert;
227
228                use super::*;
229
230                // grcov-excl-br-start
231                // Consider removing it once MSRV of 1.73 is acceptable.
232                #[cfg_attr(feature = "unstable", coverage(off))]
233                #[allow(dead_code)]
234                const fn div_ceil(a: usize, b: usize) -> usize {
235                    cfg_if::cfg_if! {
236                        if #[cfg(ffuzzy_div_ceil = "fallback")] {
237                            a / b + (if a % b == 0 { 0 } else { 1 })
238                        } else {
239                            usize::div_ceil(a, b)
240                        }
241                    }
242                }
243
244                #[cfg(test)]
245                #[test]
246                fn div_ceil_examples() {
247                    assert_eq!(div_ceil(0, 1), 0);
248                    assert_eq!(div_ceil(1, 1), 1);
249                    assert_eq!(div_ceil(2, 1), 2);
250                    assert_eq!(div_ceil(3, 1), 3);
251                    assert_eq!(div_ceil(4, 1), 4);
252                    assert_eq!(div_ceil(5, 1), 5);
253                    assert_eq!(div_ceil(6, 1), 6);
254                    assert_eq!(div_ceil(7, 1), 7);
255                    assert_eq!(div_ceil(8, 1), 8);
256                    assert_eq!(div_ceil(0, 2), 0);
257                    assert_eq!(div_ceil(1, 2), 1);
258                    assert_eq!(div_ceil(2, 2), 1);
259                    assert_eq!(div_ceil(3, 2), 2);
260                    assert_eq!(div_ceil(4, 2), 2);
261                    assert_eq!(div_ceil(5, 2), 3);
262                    assert_eq!(div_ceil(6, 2), 3);
263                    assert_eq!(div_ceil(7, 2), 4);
264                    assert_eq!(div_ceil(8, 2), 4);
265                    assert_eq!(div_ceil(0, 3), 0);
266                    assert_eq!(div_ceil(1, 3), 1);
267                    assert_eq!(div_ceil(2, 3), 1);
268                    assert_eq!(div_ceil(3, 3), 1);
269                    assert_eq!(div_ceil(4, 3), 2);
270                    assert_eq!(div_ceil(5, 3), 2);
271                    assert_eq!(div_ceil(6, 3), 2);
272                    assert_eq!(div_ceil(7, 3), 3);
273                    assert_eq!(div_ceil(8, 3), 3);
274                    assert_eq!(div_ceil(0, 4), 0);
275                    assert_eq!(div_ceil(1, 4), 1);
276                    assert_eq!(div_ceil(2, 4), 1);
277                    assert_eq!(div_ceil(3, 4), 1);
278                    assert_eq!(div_ceil(4, 4), 1);
279                    assert_eq!(div_ceil(5, 4), 2);
280                    assert_eq!(div_ceil(6, 4), 2);
281                    assert_eq!(div_ceil(7, 4), 2);
282                    assert_eq!(div_ceil(8, 4), 2);
283                }
284                // grcov-excl-br-stop
285
286                // Test each RLE block sizes
287                $(
288                    // This lower bound is exact.
289                    const_assert!(
290                        div_ceil($block_hash_size, block_hash::MAX_SEQUENCE_SIZE + 1) <= $rle_size
291                    );
292                    // This lower bound might be too pessimistic.
293                    const_assert!(
294                        div_ceil($block_hash_size, rle_encoding::MAX_RUN_LENGTH) <= $rle_size
295                    );
296                )*
297            }
298        };
299    }
300
301    rle_size_for_block_hash_template! {
302        sizes_def(block_hash::FULL_SIZE, block_hash::FULL_SIZE / 4);
303        sizes_def(block_hash::HALF_SIZE, block_hash::HALF_SIZE / 4);
304    }
305}
306
307/// A sealed trait to constrain RLE block size for given block hash size.
308///
309/// This type is implemented for [`ReconstructionBlockSize`] with
310/// following block hash sizes:
311///
312/// *   [`block_hash::FULL_SIZE`]
313/// *   [`block_hash::HALF_SIZE`]
314///
315/// # Unstable Trait
316///
317/// Despite that this trait is public, it is strongly discouraged to use this
318/// trait because it exposes a part of opaque "reverse normalization" data and
319/// the only reason this trait is public is due to restrictions of Rust's
320/// current constant generics.
321///
322/// This trait should not be considered stable.
323pub trait ConstrainedReconstructionBlockSize: private::SealedReconstructionBlockSize {}
324impl<T> ConstrainedReconstructionBlockSize for T where T: private::SealedReconstructionBlockSize {}
325
326/// RLE block handling (algorithms).
327mod algorithms {
328    use super::*;
329
330    /// Update the RLE block to compressed given sequence.
331    ///
332    /// `pos` is of the last consecutive character in the sequence and
333    /// `len` is the raw sequence length.
334    ///
335    /// It returns new `rle_offset` to use.
336    #[inline(always)]
337    pub(crate) fn update_rle_block<const SZ_RLE: usize>(
338        rle_block: &mut [u8; SZ_RLE],
339        rle_offset: usize,
340        pos: usize,
341        len: usize,
342    ) -> usize {
343        debug_assert!(len > block_hash::MAX_SEQUENCE_SIZE);
344        let extend_len_minus_one = len - block_hash::MAX_SEQUENCE_SIZE - 1;
345        let seq_fill_size = extend_len_minus_one / rle_encoding::MAX_RUN_LENGTH;
346        let start = rle_offset;
347        invariant!(start <= rle_block.len());
348        invariant!(start + seq_fill_size <= rle_block.len());
349        invariant!(start <= start + seq_fill_size);
350        // grcov-excl-br-start:ARRAY
351        rle_block[start..start + seq_fill_size].fill(rle_encoding::encode(
352            pos as u8,
353            rle_encoding::MAX_RUN_LENGTH as u8,
354        ));
355        // grcov-excl-br-stop
356        invariant!(start + seq_fill_size < rle_block.len());
357        // grcov-excl-br-start:ARRAY
358        rle_block[start + seq_fill_size] = rle_encoding::encode(
359            pos as u8,
360            (extend_len_minus_one % rle_encoding::MAX_RUN_LENGTH) as u8 + 1,
361        );
362        // grcov-excl-br-stop
363        start + seq_fill_size + 1
364    }
365
366    /// Compress a raw block hash with normalizing and generating RLE encodings.
367    ///
368    /// Note that, the length of `blockhash_in` must not exceed the maximum
369    /// length of `blockhash_out` (`SZ_BH`).
370    #[inline]
371    pub(crate) fn compress_block_hash_with_rle<const SZ_BH: usize, const SZ_RLE: usize>(
372        blockhash_out: &mut [u8; SZ_BH],
373        rle_block_out: &mut [u8; SZ_RLE],
374        blockhash_len_out: &mut u8,
375        blockhash_in: &[u8],
376    ) where
377        BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
378        ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
379    {
380        debug_assert!(blockhash_in.len() <= SZ_BH);
381        let mut rle_offset = 0;
382        let mut seq = 0usize;
383        let mut len = 0usize;
384        let mut prev = crate::internals::base64::BASE64_INVALID;
385        for &curr in blockhash_in {
386            if curr == prev {
387                seq += 1;
388                if seq >= block_hash::MAX_SEQUENCE_SIZE {
389                    // Preserve sequence length for RLE encoding.
390                    continue;
391                }
392            } else {
393                if seq >= block_hash::MAX_SEQUENCE_SIZE {
394                    rle_offset = update_rle_block(rle_block_out, rle_offset, len - 1, seq + 1);
395                }
396                seq = 0;
397                prev = curr;
398            }
399            invariant!(len < blockhash_out.len());
400            blockhash_out[len] = curr; // grcov-excl-br-line:ARRAY
401            len += 1;
402        }
403        // If we processed all original block hash, there's a case where
404        // we are in an identical character sequence.
405        if seq >= block_hash::MAX_SEQUENCE_SIZE {
406            rle_offset = update_rle_block(rle_block_out, rle_offset, len - 1, seq + 1);
407        }
408        *blockhash_len_out = len as u8;
409        invariant!(len <= blockhash_out.len());
410        blockhash_out[len..].fill(0); // grcov-excl-br-line:ARRAY
411        invariant!(rle_offset <= rle_block_out.len());
412        rle_block_out[rle_offset..].fill(rle_encoding::TERMINATOR); // grcov-excl-br-line:ARRAY
413    }
414
415    /// Expand a normalized block hash to a raw form using RLE encodings.
416    #[inline]
417    pub(crate) fn expand_block_hash_using_rle<const SZ_BH: usize, const SZ_RLE: usize>(
418        blockhash_out: &mut [u8; SZ_BH],
419        blockhash_len_out: &mut u8,
420        blockhash_in: &[u8; SZ_BH],
421        blockhash_len_in: u8,
422        rle_block_in: &[u8; SZ_RLE],
423    ) where
424        BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
425        ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
426    {
427        let mut offset_src = 0usize;
428        let mut offset_dst = 0usize;
429        let mut len_out = blockhash_len_in;
430        let copy_as_is = |blockhash_out: &mut [u8; SZ_BH], dst, src, len| {
431            invariant!(src <= blockhash_in.len());
432            invariant!(src + len <= blockhash_in.len());
433            invariant!(src <= src + len);
434            invariant!(dst <= blockhash_out.len());
435            invariant!(dst + len <= blockhash_out.len());
436            invariant!(dst <= dst + len);
437            blockhash_out[dst..dst + len].clone_from_slice(&blockhash_in[src..src + len]);
438            // grcov-excl-br-line:ARRAY
439        };
440        for &rle in rle_block_in {
441            // Decode position and length
442            let (pos, len) = rle_encoding::decode(rle);
443            if pos == 0 {
444                // Met the terminator
445                debug_assert!(rle == rle_encoding::TERMINATOR);
446                break;
447            }
448            let pos = pos as usize;
449            len_out += len;
450            let len = len as usize;
451            // Copy as is
452            let copy_len = pos - offset_src;
453            copy_as_is(blockhash_out, offset_dst, offset_src, copy_len);
454            // Copy with duplication
455            invariant!(pos < blockhash_in.len());
456            let lastch = blockhash_in[pos]; // grcov-excl-br-line:ARRAY
457            invariant!(offset_dst + copy_len <= blockhash_out.len());
458            invariant!(offset_dst + copy_len + len <= blockhash_out.len());
459            invariant!(offset_dst + copy_len <= offset_dst + copy_len + len);
460            blockhash_out[offset_dst + copy_len..offset_dst + copy_len + len].fill(lastch); // grcov-excl-br-line:ARRAY
461                                                                                            // Update next offset
462            offset_src += copy_len;
463            offset_dst += copy_len + len;
464        }
465        // Copy as is (tail)
466        let copy_len = len_out as usize - offset_dst;
467        copy_as_is(blockhash_out, offset_dst, offset_src, copy_len);
468        // Finalize
469        invariant!(offset_dst + copy_len <= blockhash_out.len());
470        blockhash_out[offset_dst + copy_len..].fill(0); // grcov-excl-br-line:ARRAY
471        *blockhash_len_out = len_out;
472    }
473
474    /// Expand a normalized block hash to a raw form using RLE encodings.
475    pub(crate) fn is_valid_rle_block_for_block_hash<const SZ_BH: usize, const SZ_RLE: usize>(
476        blockhash: &[u8; SZ_BH],
477        rle_block: &[u8; SZ_RLE],
478        blockhash_len: u8,
479    ) -> bool
480    where
481        BlockHashSize<SZ_BH>: ConstrainedBlockHashSize,
482        ReconstructionBlockSize<SZ_BH, SZ_RLE>: ConstrainedReconstructionBlockSize,
483    {
484        let mut expanded_len = blockhash_len as u32;
485        let mut terminator_expected = false;
486        let mut prev_pos = 0u8;
487        let mut prev_len = 0u8;
488        for &rle in rle_block {
489            if unlikely(rle != rle_encoding::TERMINATOR && terminator_expected) {
490                // Non-zero byte after null-terminated encoding.
491                return false;
492            }
493            if rle == rle_encoding::TERMINATOR {
494                // Null terminator or later.
495                terminator_expected = true;
496                continue;
497            }
498            // Decode position and length
499            let (pos, len) = rle_encoding::decode(rle);
500            // Check position
501            if unlikely(
502                pos < block_hash::MAX_SEQUENCE_SIZE as u8 - 1
503                    || pos >= blockhash_len
504                    || pos < prev_pos,
505            ) {
506                return false;
507            }
508            if prev_pos == pos {
509                // For extension with the same position, check canonicality.
510                if unlikely(prev_len != rle_encoding::MAX_RUN_LENGTH as u8) {
511                    return false;
512                }
513            } else {
514                // For new sequence, check if corresponding block hash makes
515                // identical character sequence.
516                let end = pos as usize;
517                let start = end - (block_hash::MAX_SEQUENCE_SIZE - 1);
518                invariant!(start < blockhash.len());
519                invariant!(end < blockhash.len());
520                #[allow(clippy::int_plus_one)]
521                {
522                    invariant!(start + 1 <= end);
523                }
524                let ch = blockhash[start]; // grcov-excl-br-line:ARRAY
525                if unlikely(
526                    blockhash[start + 1..=end] // grcov-excl-br-line:ARRAY
527                        .iter()
528                        .any(|x| *x != ch),
529                ) {
530                    return false;
531                }
532            }
533            // Update the state.
534            prev_pos = pos;
535            prev_len = len;
536            expanded_len += len as u32;
537        }
538        if unlikely(expanded_len as usize > SZ_BH) {
539            return false;
540        }
541        true
542    }
543}
544
545/// An efficient compressed fuzzy hash representation, containing both
546/// normalized and raw block hash contents.
547///
548/// This struct contains a normalized [fuzzy hash object](FuzzyHashData) and
549/// opaque data to perform "reverse normalization" afterwards.
550///
551/// On the current design, it allows compression ratio of about 5 / 8
552/// (compared to two fuzzy hash objects, one normalized and another raw).
553///
554/// With this, you can compare many fuzzy hashes efficiently while preserving
555/// the original string representation without requesting too much memory.
556///
557/// Some methods accept [`AsRef`] to the normalized [`FuzzyHashData`].
558/// On such cases, it is possible to pass this object directly
559/// (e.g. [`FuzzyHashCompareTarget::compare()`](crate::internals::compare::FuzzyHashCompareTarget::compare())).
560///
561/// # Ordering
562///
563/// Sorting objects of this type will result in the following order.
564///
565/// *   Two equivalent [`FuzzyHashDualData`] objects are considered equal
566///     (and the underlying sorting algorithm decides ordering of equivalent
567///     objects).
568/// *   Two different [`FuzzyHashDualData`] objects with different normalized
569///     [`FuzzyHashData`] objects (inside) will be ordered as the same order as
570///     the underlying [`FuzzyHashData`].
571/// *   Two different [`FuzzyHashDualData`] objects with the same normalized
572///     [`FuzzyHashData`] objects (inside) will be ordered
573///     in an implementation-defined manner.
574///
575/// The implementation-defined order is not currently guaranteed to be stable.
576/// For instance, different versions of this crate may order them differently.
577/// However, it is guaranteed deterministic so that you can expect the same
578/// order in the same version of this crate.
579///
580/// # Safety
581///
582/// Generic parameters of this type should not be considered stable because some
583/// generic parameters are just there because of the current restrictions of
584/// Rust's constant generics (that will be resolved after the feature
585/// `generic_const_exprs` is stabilized).
586///
587/// **Do not** use [`FuzzyHashDualData`] directly.
588///
589/// Instead, use instantiations of this generic type:
590/// *   [`DualFuzzyHash`] (will be sufficient on most cases)
591/// *   [`LongDualFuzzyHash`]
592///
593/// # Examples
594///
595/// ```
596/// // Requires either the "alloc" feature or std environment on your crate
597/// // to use the `to_string()` method (default enabled).
598/// use ssdeep::{DualFuzzyHash, FuzzyHash, RawFuzzyHash};
599///
600/// let hash_str_raw  = "12288:+ySwl5P+C5IxJ845HYV5sxOH/cccccccei:+Klhav84a5sxJ";
601/// let hash_str_norm = "12288:+ySwl5P+C5IxJ845HYV5sxOH/cccei:+Klhav84a5sxJ";
602///
603/// let dual_hash: DualFuzzyHash = str::parse(hash_str_raw).unwrap();
604///
605/// // This object can effectively contain both
606/// // normalized and raw fuzzy hash representations.
607/// assert_eq!(dual_hash.to_raw_form().to_string(),   hash_str_raw);
608/// assert_eq!(dual_hash.to_normalized().to_string(), hash_str_norm);
609///
610/// let another_hash: FuzzyHash = str::parse(
611///     "12288:+yUwldx+C5IxJ845HYV5sxOH/cccccccex:+glvav84a5sxK"
612/// ).unwrap();
613///
614/// // You can directly compare a DualFuzzyHash against a FuzzyHash.
615/// //
616/// // This is almost as fast as comparison between two FuzzyHash objects
617/// // because the native representation inside DualFuzzyHash
618/// // is a FuzzyHash object.
619/// assert_eq!(another_hash.compare(dual_hash), 88);
620///
621/// // But DualFuzzyHash is not a drop-in replacement of FuzzyHash.
622/// // You need to use `as_normalized()` to compare a FuzzyHash against
623/// // a DualFuzzyHash (direct comparison may be provided on the later version).
624/// assert_eq!(dual_hash.as_normalized().compare(&another_hash), 88);
625/// ```
626#[repr(align(8))]
627#[derive(Copy, Clone)]
628pub struct FuzzyHashDualData<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
629where
630    BlockHashSize<S1>: ConstrainedBlockHashSize,
631    BlockHashSize<S2>: ConstrainedBlockHashSize,
632    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
633    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
634    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
635{
636    /// RLE block 1 for reverse normalization of
637    /// [block hash 1](crate::internals::hash::FuzzyHashData::blockhash1).
638    ///
639    /// See [`rle_encoding`] for encoding details and full compression scheme.
640    rle_block1: [u8; C1],
641
642    /// RLE block 2 for reverse normalization of
643    /// [block hash 2](crate::internals::hash::FuzzyHashData::blockhash2).
644    ///
645    /// See [`rle_encoding`] for encoding details and full compression scheme.
646    rle_block2: [u8; C2],
647
648    /// A normalized fuzzy hash object for comparison and the base storage
649    /// before RLE-based decompression.
650    norm_hash: fuzzy_norm_type!(S1, S2),
651}
652
653impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
654    FuzzyHashDualData<S1, S2, C1, C2>
655where
656    BlockHashSize<S1>: ConstrainedBlockHashSize,
657    BlockHashSize<S2>: ConstrainedBlockHashSize,
658    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
659    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
660    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
661{
662    /// The maximum size of the block hash 1.
663    ///
664    /// This value is the same as the
665    /// [underlying fuzzy hash type](FuzzyHashData::MAX_BLOCK_HASH_SIZE_1).
666    pub const MAX_BLOCK_HASH_SIZE_1: usize = <fuzzy_norm_type!(S1, S2)>::MAX_BLOCK_HASH_SIZE_1;
667
668    /// The maximum size of the block hash 2.
669    ///
670    /// This value is the same as the
671    /// [underlying fuzzy hash type](FuzzyHashData::MAX_BLOCK_HASH_SIZE_2).
672    pub const MAX_BLOCK_HASH_SIZE_2: usize = <fuzzy_norm_type!(S1, S2)>::MAX_BLOCK_HASH_SIZE_2;
673
674    /// The number of RLE block entries in the block hash 1.
675    #[allow(dead_code)]
676    const RLE_BLOCK_SIZE_1: usize = C1;
677
678    /// The number of RLE block entries in the block hash 2.
679    #[allow(dead_code)]
680    const RLE_BLOCK_SIZE_2: usize = C2;
681
682    /// Denotes whether the fuzzy type only contains a normalized form.
683    ///
684    /// In this type, it is always [`false`].
685    pub const IS_NORMALIZED_FORM: bool = false;
686
687    /// Denotes whether the fuzzy type can contain a non-truncated fuzzy hash.
688    ///
689    /// This value is the same as the
690    /// [underlying fuzzy hash type](FuzzyHashData::IS_LONG_FORM).
691    pub const IS_LONG_FORM: bool = <fuzzy_norm_type!(S1, S2)>::IS_LONG_FORM;
692
693    /// The maximum length in the string representation.
694    ///
695    /// This value is the same as the
696    /// [underlying fuzzy hash type](FuzzyHashData::MAX_LEN_IN_STR).
697    pub const MAX_LEN_IN_STR: usize = <fuzzy_norm_type!(S1, S2)>::MAX_LEN_IN_STR;
698
699    /// Creates a new fuzzy hash object with empty contents.
700    ///
701    /// This is equivalent to the fuzzy hash string `3::`.
702    pub fn new() -> Self {
703        Self {
704            rle_block1: [rle_encoding::TERMINATOR; C1],
705            rle_block2: [rle_encoding::TERMINATOR; C2],
706            norm_hash: FuzzyHashData::new(),
707        }
708    }
709
710    /// Initialize the object from a raw fuzzy hash.
711    pub fn init_from_raw_form(&mut self, hash: &fuzzy_raw_type!(S1, S2)) {
712        self.norm_hash.log_blocksize = hash.log_blocksize;
713        algorithms::compress_block_hash_with_rle(
714            &mut self.norm_hash.blockhash1,
715            &mut self.rle_block1,
716            &mut self.norm_hash.len_blockhash1,
717            hash.block_hash_1(),
718        );
719        algorithms::compress_block_hash_with_rle(
720            &mut self.norm_hash.blockhash2,
721            &mut self.rle_block2,
722            &mut self.norm_hash.len_blockhash2,
723            hash.block_hash_2(),
724        );
725    }
726
727    /// The internal implementation of [`Self::new_from_internals_near_raw_unchecked()`].
728    fn new_from_internals_near_raw_internal(
729        log_block_size: u8,
730        block_hash_1: &[u8],
731        block_hash_2: &[u8],
732    ) -> Self {
733        debug_assert!(block_size::is_log_valid(log_block_size));
734        debug_assert!(block_hash_1
735            .iter()
736            .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
737        debug_assert!(block_hash_2
738            .iter()
739            .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
740        invariant!(block_hash_1.len() <= S1);
741        invariant!(block_hash_2.len() <= S2);
742        let mut hash = Self::new();
743        hash.norm_hash.log_blocksize = log_block_size;
744        algorithms::compress_block_hash_with_rle(
745            &mut hash.norm_hash.blockhash1,
746            &mut hash.rle_block1,
747            &mut hash.norm_hash.len_blockhash1,
748            block_hash_1,
749        );
750        algorithms::compress_block_hash_with_rle(
751            &mut hash.norm_hash.blockhash2,
752            &mut hash.rle_block2,
753            &mut hash.norm_hash.len_blockhash2,
754            block_hash_2,
755        );
756        hash
757    }
758
759    /// Creates a new fuzzy hash object with internal contents (with raw block size).
760    ///
761    /// # Safety
762    ///
763    /// *   `block_hash_1` and `block_hash_2` must have valid lengths.
764    /// *   Elements of `block_hash_1` and `block_hash_2` must consist of valid
765    ///     Base64 indices.
766    /// *   `log_block_size` must hold a valid
767    ///     *base-2 logarithm* form of a block size.
768    ///
769    /// If they are not satisfied, the resulting object will be corrupted.
770    #[cfg(feature = "unchecked")]
771    #[allow(unsafe_code)]
772    #[inline(always)]
773    pub unsafe fn new_from_internals_near_raw_unchecked(
774        log_block_size: u8,
775        block_hash_1: &[u8],
776        block_hash_2: &[u8],
777    ) -> Self {
778        Self::new_from_internals_near_raw_internal(log_block_size, block_hash_1, block_hash_2)
779    }
780
781    /// Creates a new fuzzy hash object with internal contents (with raw block size).
782    ///
783    /// Because this function assumes that you know the fuzzy hash internals,
784    /// it panics when you fail to satisfy fuzzy hash constraints.
785    ///
786    /// # Usage Constraints
787    ///
788    /// *   `block_hash_1` and `block_hash_2` must have valid lengths.
789    /// *   Elements of `block_hash_1` and `block_hash_2` must consist of valid
790    ///     Base64 indices.
791    /// *   `log_block_size` must hold a valid
792    ///     *base-2 logarithm* form of a block size.
793    #[inline]
794    pub fn new_from_internals_near_raw(
795        log_block_size: u8,
796        block_hash_1: &[u8],
797        block_hash_2: &[u8],
798    ) -> Self {
799        assert!(block_size::is_log_valid(log_block_size));
800        assert!(block_hash_1.len() <= S1);
801        assert!(block_hash_2.len() <= S2);
802        assert!(block_hash_1
803            .iter()
804            .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
805        assert!(block_hash_2
806            .iter()
807            .all(|&x| x < block_hash::ALPHABET_SIZE as u8));
808        Self::new_from_internals_near_raw_internal(log_block_size, block_hash_1, block_hash_2)
809    }
810
811    /// The internal implementation of [`Self::new_from_internals_unchecked()`].
812    #[allow(dead_code)]
813    #[inline(always)]
814    fn new_from_internals_internal(
815        block_size: u32,
816        block_hash_1: &[u8],
817        block_hash_2: &[u8],
818    ) -> Self {
819        debug_assert!(block_size::is_valid(block_size));
820        Self::new_from_internals_near_raw_internal(
821            block_size::log_from_valid_internal(block_size),
822            block_hash_1,
823            block_hash_2,
824        )
825    }
826
827    /// Creates a new fuzzy hash object with internal contents.
828    ///
829    /// # Safety
830    ///
831    /// *   `block_hash_1` and `block_hash_2` must have valid lengths.
832    /// *   Elements of `block_hash_1` and `block_hash_2` must consist of valid
833    ///     Base64 indices.
834    /// *   `block_size` must hold a valid block size.
835    ///
836    /// If they are not satisfied, the resulting object will be corrupted.
837    #[cfg(feature = "unchecked")]
838    #[allow(unsafe_code)]
839    #[inline(always)]
840    pub unsafe fn new_from_internals_unchecked(
841        block_size: u32,
842        block_hash_1: &[u8],
843        block_hash_2: &[u8],
844    ) -> Self {
845        Self::new_from_internals_internal(block_size, block_hash_1, block_hash_2)
846    }
847
848    /// Creates a new fuzzy hash object with internal contents.
849    ///
850    /// Because this function assumes that you know the fuzzy hash internals,
851    /// it panics when you fail to satisfy fuzzy hash constraints.
852    ///
853    /// # Usage Constraints
854    ///
855    /// *   `block_hash_1` and `block_hash_2` must have valid lengths.
856    /// *   Elements of `block_hash_1` and `block_hash_2` must consist of valid
857    ///     Base64 indices.
858    /// *   `block_size` must hold a valid block size.
859    #[inline]
860    pub fn new_from_internals(block_size: u32, block_hash_1: &[u8], block_hash_2: &[u8]) -> Self {
861        assert!(block_size::is_valid(block_size));
862        Self::new_from_internals_near_raw(
863            block_size::log_from_valid_internal(block_size),
864            block_hash_1,
865            block_hash_2,
866        )
867    }
868
869    /// The *base-2 logarithm* form of the block size.
870    ///
871    /// See also: ["Block Size" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#block-size)
872    #[inline(always)]
873    pub fn log_block_size(&self) -> u8 {
874        self.norm_hash.log_blocksize
875    }
876
877    /// The block size of the fuzzy hash.
878    #[inline]
879    pub fn block_size(&self) -> u32 {
880        block_size::from_log_internal(self.norm_hash.log_blocksize)
881    }
882
883    /// A reference to the normalized fuzzy hash.
884    ///
885    /// To note, this operation should be fast enough because this type
886    /// contains this object directly.
887    #[inline(always)]
888    pub fn as_normalized(&self) -> &fuzzy_norm_type!(S1, S2) {
889        &self.norm_hash
890    }
891
892    /// Constructs an object from a raw fuzzy hash.
893    pub fn from_raw_form(hash: &fuzzy_raw_type!(S1, S2)) -> Self {
894        let mut dual_hash = FuzzyHashDualData::new();
895        dual_hash.init_from_raw_form(hash);
896        dual_hash
897    }
898
899    /// Constructs an object from a normalized fuzzy hash.
900    pub fn from_normalized(hash: &fuzzy_norm_type!(S1, S2)) -> Self {
901        Self {
902            rle_block1: [rle_encoding::TERMINATOR; C1],
903            rle_block2: [rle_encoding::TERMINATOR; C2],
904            norm_hash: *hash,
905        }
906    }
907
908    /// Decompresses a raw variant of the fuzzy hash and stores into
909    /// an existing object.
910    pub fn into_mut_raw_form(&self, hash: &mut fuzzy_raw_type!(S1, S2)) {
911        hash.log_blocksize = self.norm_hash.log_blocksize;
912        algorithms::expand_block_hash_using_rle(
913            &mut hash.blockhash1,
914            &mut hash.len_blockhash1,
915            &self.norm_hash.blockhash1,
916            self.norm_hash.len_blockhash1,
917            &self.rle_block1,
918        );
919        algorithms::expand_block_hash_using_rle(
920            &mut hash.blockhash2,
921            &mut hash.len_blockhash2,
922            &self.norm_hash.blockhash2,
923            self.norm_hash.len_blockhash2,
924            &self.rle_block2,
925        );
926    }
927
928    /// Decompresses and generates a raw variant of the fuzzy hash.
929    ///
930    /// Based on the normalized fuzzy hash representation and the "reverse
931    /// normalization" data, this method generates the original, a raw variant
932    /// of the fuzzy hash.
933    pub fn to_raw_form(&self) -> fuzzy_raw_type!(S1, S2) {
934        let mut hash = FuzzyHashData::new();
935        self.into_mut_raw_form(&mut hash);
936        hash
937    }
938
939    /// Returns the clone of the normalized fuzzy hash.
940    ///
941    /// Where possible, [`as_normalized()`](Self::as_normalized()) or
942    /// [`AsRef::as_ref()`] should be used instead.
943    #[inline(always)]
944    pub fn to_normalized(&self) -> fuzzy_norm_type!(S1, S2) {
945        self.norm_hash
946    }
947
948    /// Converts the fuzzy hash to the string (normalized form).
949    ///
950    /// This method returns the string corresponding
951    /// the normalized form.
952    #[cfg(feature = "alloc")]
953    pub fn to_normalized_string(&self) -> String {
954        self.norm_hash.to_string()
955    }
956
957    /// Converts the fuzzy hash to the string (raw form).
958    ///
959    /// This method returns the string corresponding the raw
960    /// (non-normalized) form.
961    #[cfg(feature = "alloc")]
962    pub fn to_raw_form_string(&self) -> String {
963        self.to_raw_form().to_string()
964    }
965
966    /// The internal implementation of [`from_bytes_with_last_index()`](Self::from_bytes_with_last_index()).
967    #[inline(always)]
968    fn from_bytes_with_last_index_internal(
969        str: &[u8],
970        index: &mut usize,
971    ) -> Result<Self, ParseError> {
972        use crate::internals::hash::{
973            algorithms, hash_from_bytes_with_last_index_internal_template,
974        };
975        use crate::internals::hash_dual::algorithms::update_rle_block;
976        let mut fuzzy = Self::new();
977        hash_from_bytes_with_last_index_internal_template! {
978            str, index, true,
979            fuzzy.norm_hash.log_blocksize,
980            { let mut  rle_offset = 0; },
981            #[inline(always)] |pos, len| rle_offset = update_rle_block(
982                &mut fuzzy.rle_block1, rle_offset, pos + block_hash::MAX_SEQUENCE_SIZE - 1, len),
983            fuzzy.norm_hash.blockhash1, fuzzy.norm_hash.len_blockhash1,
984            { let mut  rle_offset = 0; },
985            #[inline(always)] |pos, len| rle_offset = update_rle_block(
986                &mut fuzzy.rle_block2, rle_offset, pos + block_hash::MAX_SEQUENCE_SIZE - 1, len),
987            fuzzy.norm_hash.blockhash2, fuzzy.norm_hash.len_blockhash2
988        }
989        Ok(fuzzy)
990    }
991
992    /// Parse a fuzzy hash from given bytes (a slice of [`u8`])
993    /// of a string representation.
994    ///
995    /// If the parser succeeds, it also updates the `index` argument to the
996    /// first non-used index to construct the fuzzy hash, which is that of
997    /// either the end of the string or the character `','` to separate the rest
998    /// of the fuzzy hash and the file name field.
999    ///
1000    /// If the parser fails, `index` is not updated.
1001    ///
1002    /// The behavior of this method is affected by the `strict-parser` feature.
1003    /// For more information, see [The Strict Parser](FuzzyHashData#the-strict-parser).
1004    pub fn from_bytes_with_last_index(str: &[u8], index: &mut usize) -> Result<Self, ParseError> {
1005        Self::from_bytes_with_last_index_internal(str, index)
1006    }
1007
1008    /// Parse a fuzzy hash from given bytes (a slice of [`u8`])
1009    /// of a string representation.
1010    ///
1011    /// The behavior of this method is affected by the `strict-parser` feature.
1012    /// For more information, see [The Strict Parser](FuzzyHashData#the-strict-parser).
1013    pub fn from_bytes(str: &[u8]) -> Result<Self, ParseError> {
1014        Self::from_bytes_with_last_index_internal(str, &mut 0usize)
1015    }
1016
1017    /// Normalize the fuzzy hash in place.
1018    ///
1019    /// After calling this method, `self` will be normalized.
1020    ///
1021    /// In this implementation, it clears all "reverse normalization" data.
1022    ///
1023    /// See also: ["Normalization" section of `FuzzyHashData`](FuzzyHashData#normalization)
1024    pub fn normalize_in_place(&mut self) {
1025        self.rle_block1 = [rle_encoding::TERMINATOR; C1];
1026        self.rle_block2 = [rle_encoding::TERMINATOR; C2];
1027    }
1028
1029    /// Returns whether the dual fuzzy hash is normalized.
1030    pub fn is_normalized(&self) -> bool {
1031        self.rle_block1[0] == rle_encoding::TERMINATOR
1032            && self.rle_block2[0] == rle_encoding::TERMINATOR
1033    }
1034
1035    /// Performs full validity checking of the internal structure.
1036    ///
1037    /// The primary purpose of this is debugging and it should always
1038    /// return [`true`] unless...
1039    ///
1040    /// *   There is a bug in this crate, corrupting this structure,
1041    /// *   A memory corruption is occurred somewhere else or
1042    /// *   An `unsafe` function to construct this object is misused.
1043    ///
1044    /// Because of its purpose, this method is not designed to be fast.
1045    ///
1046    /// Note that, despite that it is only relevant to users when the
1047    /// `unchecked` feature is enabled but made public without any features
1048    /// because this method is not *unsafe* or *unchecked* in any way.
1049    ///
1050    /// # Safety: No Panic Guarantee
1051    ///
1052    /// This method is guaranteed to be panic-free as long as the underlying
1053    /// memory region corresponding to `self` is sound.
1054    /// In other words, it won't cause panic by itself if *any* data is
1055    /// contained in this object.
1056    pub fn is_valid(&self) -> bool {
1057        self.norm_hash.is_valid()
1058            && algorithms::is_valid_rle_block_for_block_hash(
1059                &self.norm_hash.blockhash1,
1060                &self.rle_block1,
1061                self.norm_hash.len_blockhash1,
1062            )
1063            && algorithms::is_valid_rle_block_for_block_hash(
1064                &self.norm_hash.blockhash2,
1065                &self.rle_block2,
1066                self.norm_hash.len_blockhash2,
1067            )
1068    }
1069}
1070
1071impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1072    AsRef<fuzzy_norm_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
1073where
1074    BlockHashSize<S1>: ConstrainedBlockHashSize,
1075    BlockHashSize<S2>: ConstrainedBlockHashSize,
1076    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1077    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1078    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1079{
1080    #[inline(always)]
1081    fn as_ref(&self) -> &fuzzy_norm_type!(S1, S2) {
1082        &self.norm_hash
1083    }
1084}
1085
1086impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Default
1087    for FuzzyHashDualData<S1, S2, C1, C2>
1088where
1089    BlockHashSize<S1>: ConstrainedBlockHashSize,
1090    BlockHashSize<S2>: ConstrainedBlockHashSize,
1091    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1092    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1093    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1094{
1095    fn default() -> Self {
1096        Self::new()
1097    }
1098}
1099
1100impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> PartialEq
1101    for FuzzyHashDualData<S1, S2, C1, C2>
1102where
1103    BlockHashSize<S1>: ConstrainedBlockHashSize,
1104    BlockHashSize<S2>: ConstrainedBlockHashSize,
1105    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1106    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1107    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1108{
1109    fn eq(&self, other: &Self) -> bool {
1110        self.norm_hash == other.norm_hash
1111            && self.rle_block1 == other.rle_block1
1112            && self.rle_block2 == other.rle_block2
1113    }
1114}
1115
1116impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Eq
1117    for FuzzyHashDualData<S1, S2, C1, C2>
1118where
1119    BlockHashSize<S1>: ConstrainedBlockHashSize,
1120    BlockHashSize<S2>: ConstrainedBlockHashSize,
1121    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1122    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1123    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1124{
1125}
1126
1127impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::hash::Hash
1128    for FuzzyHashDualData<S1, S2, C1, C2>
1129where
1130    BlockHashSize<S1>: ConstrainedBlockHashSize,
1131    BlockHashSize<S2>: ConstrainedBlockHashSize,
1132    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1133    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1134    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1135{
1136    #[inline]
1137    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
1138        self.norm_hash.hash(state);
1139        state.write(&self.rle_block1);
1140        state.write(&self.rle_block2);
1141    }
1142}
1143
1144impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> Ord
1145    for FuzzyHashDualData<S1, S2, C1, C2>
1146where
1147    BlockHashSize<S1>: ConstrainedBlockHashSize,
1148    BlockHashSize<S2>: ConstrainedBlockHashSize,
1149    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1150    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1151    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1152{
1153    #[inline]
1154    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
1155        (self.norm_hash, self.rle_block1, self.rle_block2).cmp(&(
1156            other.norm_hash,
1157            other.rle_block1,
1158            other.rle_block2,
1159        ))
1160    }
1161}
1162
1163impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> PartialOrd
1164    for FuzzyHashDualData<S1, S2, C1, C2>
1165where
1166    BlockHashSize<S1>: ConstrainedBlockHashSize,
1167    BlockHashSize<S2>: ConstrainedBlockHashSize,
1168    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1169    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1170    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1171{
1172    #[inline(always)]
1173    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
1174        Some(self.cmp(other))
1175    }
1176}
1177
1178impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::fmt::Debug
1179    for FuzzyHashDualData<S1, S2, C1, C2>
1180where
1181    BlockHashSize<S1>: ConstrainedBlockHashSize,
1182    BlockHashSize<S2>: ConstrainedBlockHashSize,
1183    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1184    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1185    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1186{
1187    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1188        /// The type to print an RLE encoded byte.
1189        struct DebugBuilderForRLEBlockEntry(u8);
1190        /// The type to print a valid RLE block.
1191        struct DebugBuilderForValidRLEBlock<'a, const N: usize> {
1192            /// The RLE block to print.
1193            block: &'a [u8; N],
1194        }
1195        /// The type to print an invalid RLE block.
1196        struct DebugBuilderForInvalidRLEBlock<'a, const N: usize> {
1197            /// The RLE block to print.
1198            block: &'a [u8; N],
1199        }
1200        impl<'a, const N: usize> DebugBuilderForValidRLEBlock<'a, N> {
1201            /// Creates the new object from an RLE block.
1202            pub fn new(rle_block: &'a [u8; N]) -> Self {
1203                Self { block: rle_block }
1204            }
1205        }
1206        impl<'a, const N: usize> DebugBuilderForInvalidRLEBlock<'a, N> {
1207            /// Creates the new object from an RLE block.
1208            pub fn new(rle_block: &'a [u8; N]) -> Self {
1209                Self { block: rle_block }
1210            }
1211        }
1212        impl core::fmt::Debug for DebugBuilderForRLEBlockEntry {
1213            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1214                if self.0 != rle_encoding::TERMINATOR {
1215                    let (pos, len) = rle_encoding::decode(self.0);
1216                    f.debug_tuple("RLE").field(&pos).field(&len).finish()
1217                } else {
1218                    f.debug_tuple("RLENull").finish()
1219                }
1220            }
1221        }
1222        impl<const N: usize> core::fmt::Debug for DebugBuilderForValidRLEBlock<'_, N> {
1223            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1224                f.debug_list()
1225                    .entries(
1226                        self.block
1227                            .iter()
1228                            .copied()
1229                            .filter(|x| *x != rle_encoding::TERMINATOR)
1230                            .map(DebugBuilderForRLEBlockEntry),
1231                    )
1232                    .finish()
1233            }
1234        }
1235        impl<const N: usize> core::fmt::Debug for DebugBuilderForInvalidRLEBlock<'_, N> {
1236            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1237                // Don't filter zeroes when invalid,
1238                // unlike DebugBuilderForValidRLEBlock above.
1239                f.debug_list()
1240                    .entries(self.block.iter().copied().map(DebugBuilderForRLEBlockEntry))
1241                    .finish()
1242            }
1243        }
1244
1245        // It's for debug purposes and do the full checking.
1246        if self.is_valid() {
1247            // Table lookup is safe.  All entries are `0 <= x < 64`.
1248            let buffer1 = self
1249                .norm_hash
1250                .blockhash1
1251                .map(|x| BASE64_TABLE_U8[x as usize]); // grcov-excl-br-line:ARRAY
1252            let buffer2 = self
1253                .norm_hash
1254                .blockhash2
1255                .map(|x| BASE64_TABLE_U8[x as usize]); // grcov-excl-br-line:ARRAY
1256            f.debug_struct("FuzzyHashDualData")
1257                .field("LONG", &(S2 == block_hash::FULL_SIZE))
1258                .field(
1259                    "block_size",
1260                    &block_size::from_log_internal(self.norm_hash.log_blocksize),
1261                )
1262                .field(
1263                    "blockhash1",
1264                    &core::str::from_utf8(&buffer1[..self.norm_hash.len_blockhash1 as usize])
1265                        .unwrap(),
1266                )
1267                .field(
1268                    "blockhash2",
1269                    &core::str::from_utf8(&buffer2[..self.norm_hash.len_blockhash2 as usize])
1270                        .unwrap(),
1271                )
1272                .field(
1273                    "rle_block1",
1274                    &(DebugBuilderForValidRLEBlock::new(&self.rle_block1)),
1275                )
1276                .field(
1277                    "rle_block2",
1278                    &(DebugBuilderForValidRLEBlock::new(&self.rle_block2)),
1279                )
1280                .finish()
1281        } else {
1282            f.debug_struct("FuzzyHashDualData")
1283                .field("ILL_FORMED", &true)
1284                .field("LONG", &(S2 == block_hash::FULL_SIZE))
1285                .field("log_blocksize", &self.norm_hash.log_blocksize)
1286                .field("len_blockhash1", &self.norm_hash.len_blockhash1)
1287                .field("len_blockhash2", &self.norm_hash.len_blockhash2)
1288                .field("blockhash1", &self.norm_hash.blockhash1)
1289                .field("blockhash2", &self.norm_hash.blockhash2)
1290                .field(
1291                    "rle_block1",
1292                    &(DebugBuilderForInvalidRLEBlock::new(&self.rle_block1)),
1293                )
1294                .field(
1295                    "rle_block2",
1296                    &(DebugBuilderForInvalidRLEBlock::new(&self.rle_block2)),
1297                )
1298                .finish()
1299        }
1300    }
1301}
1302
1303impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::fmt::Display
1304    for FuzzyHashDualData<S1, S2, C1, C2>
1305where
1306    BlockHashSize<S1>: ConstrainedBlockHashSize,
1307    BlockHashSize<S2>: ConstrainedBlockHashSize,
1308    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1309    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1310    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1311{
1312    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1313        write!(f, "{{{}|{}}}", self.norm_hash, self.to_raw_form())
1314    }
1315}
1316
1317impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize> core::str::FromStr
1318    for FuzzyHashDualData<S1, S2, C1, C2>
1319where
1320    BlockHashSize<S1>: ConstrainedBlockHashSize,
1321    BlockHashSize<S2>: ConstrainedBlockHashSize,
1322    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1323    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1324    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1325{
1326    type Err = ParseError;
1327    #[inline(always)]
1328    fn from_str(s: &str) -> Result<Self, Self::Err> {
1329        Self::from_bytes(s.as_bytes())
1330    }
1331}
1332
1333impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1334    core::convert::From<fuzzy_norm_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
1335where
1336    BlockHashSize<S1>: ConstrainedBlockHashSize,
1337    BlockHashSize<S2>: ConstrainedBlockHashSize,
1338    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1339    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1340    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1341{
1342    #[inline]
1343    fn from(value: fuzzy_norm_type!(S1, S2)) -> Self {
1344        Self::from_normalized(&value)
1345    }
1346}
1347
1348impl<const S1: usize, const S2: usize, const C1: usize, const C2: usize>
1349    core::convert::From<fuzzy_raw_type!(S1, S2)> for FuzzyHashDualData<S1, S2, C1, C2>
1350where
1351    BlockHashSize<S1>: ConstrainedBlockHashSize,
1352    BlockHashSize<S2>: ConstrainedBlockHashSize,
1353    BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes,
1354    ReconstructionBlockSize<S1, C1>: ConstrainedReconstructionBlockSize,
1355    ReconstructionBlockSize<S2, C2>: ConstrainedReconstructionBlockSize,
1356{
1357    #[inline]
1358    fn from(value: fuzzy_raw_type!(S1, S2)) -> Self {
1359        Self::from_raw_form(&value)
1360    }
1361}
1362
1363/// Regular (truncated) dual fuzzy hash type which contains both normalized
1364/// and raw contents.
1365///
1366/// This type effectively contains the data equivalent to those two objects:
1367///
1368/// *   [`FuzzyHash`](crate::internals::hash::FuzzyHash) (native)
1369/// *   [`RawFuzzyHash`](crate::internals::hash::RawFuzzyHash) (compressed)
1370///
1371/// See also: [`FuzzyHashDualData`]
1372pub type DualFuzzyHash = FuzzyHashDualData<
1373    { block_hash::FULL_SIZE },
1374    { block_hash::HALF_SIZE },
1375    { block_hash::FULL_SIZE / 4 },
1376    { block_hash::HALF_SIZE / 4 },
1377>;
1378
1379/// Long (non-truncated) dual fuzzy hash type which contains both normalized
1380/// and raw contents.
1381///
1382/// This type effectively contains the data equivalent to those two objects:
1383///
1384/// *   [`LongFuzzyHash`](crate::internals::hash::LongFuzzyHash) (native)
1385/// *   [`LongRawFuzzyHash`](crate::internals::hash::LongRawFuzzyHash) (compressed)
1386///
1387/// See also: [`FuzzyHashDualData`]
1388pub type LongDualFuzzyHash = FuzzyHashDualData<
1389    { block_hash::FULL_SIZE },
1390    { block_hash::FULL_SIZE },
1391    { block_hash::FULL_SIZE / 4 },
1392    { block_hash::FULL_SIZE / 4 },
1393>;
1394
1395mod tests;