ssdeep/internals/generate/hashes/rolling_hash.rs
1// SPDX-License-Identifier: GPL-2.0-or-later
2// SPDX-FileCopyrightText: Copyright Andrew Tridgell <tridge@samba.org> 2002
3// SPDX-FileCopyrightText: Copyright (C) 2006 ManTech International Corporation
4// SPDX-FileCopyrightText: Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
5// SPDX-FileCopyrightText: Copyright (C) 2017, 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>
6
7//! A 32-bit rolling hash as used in the fuzzy hash generator.
8
9use core::ops::AddAssign;
10
11use crate::internals::macros::invariant;
12
13/// See [`RollingHash::WINDOW_SIZE`].
14pub const ROLLING_WINDOW: usize = 7;
15
16// grcov-excl-br-start:STRUCT_MEMBER
17
18/// Hasher which computes a variant of 32-bit rolling hash as used in ssdeep.
19///
20/// In ssdeep, this is the most important hash function to decide whether to
21/// trigger a context update based on the last 7 bytes it met.
22///
23/// Specifically, [`RollingHash`] implements the rolling hash implemented in
24/// ssdeep version 2.13 or later. This is the first version that officially
25/// supported ≧4GiB files and implemented a true rolling hash function.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub struct RollingHash {
28 /// Current rolling window index.
29 ///
30 /// **Performance Analysis:**
31 /// Substituting this variable's type to `usize` (more semantically correct)
32 /// resulted in some slowdown (~10%). Keeping this state for now.
33 index: u32,
34
35 /// Hash component 1.
36 ///
37 /// This is the sum of the last [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes.
38 ///
39 /// In the "ssdeep-compatible" configuration, this value is in the range
40 /// of `0..=1785` (`0..=(7*0xff)`).
41 h1: u32,
42
43 /// Hash component 2.
44 ///
45 /// This is the sum of the last [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes
46 /// but the more recent byte has a higher weight (the latest byte has a
47 /// weight of [`WINDOW_SIZE`](Self::WINDOW_SIZE) and the last (fading) byte
48 /// has a weight of 1).
49 ///
50 /// In the "ssdeep-compatible" configuration, this value is in the range
51 /// of `0..=7140` (`0..=(sum(1..=7)*0xff)`).
52 h2: u32,
53
54 /// Hash component 3.
55 ///
56 /// This is the only "big" component of the hash.
57 /// Each time it processes a byte, this value is left-shifted by
58 /// [`H3_LSHIFT`](Self::H3_LSHIFT) and xor-ed with the latest byte value.
59 ///
60 /// If it processes [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes,
61 /// older bytes are shifted out (larger than its MSB).
62 h3: u32,
63
64 /// The last [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes of the processed data.
65 window: [u8; ROLLING_WINDOW],
66}
67
68// grcov-excl-br-stop
69
70impl RollingHash {
71 /// The window size of the rolling hash.
72 ///
73 /// This is 7 bytes in ssdeep.
74 pub const WINDOW_SIZE: usize = ROLLING_WINDOW;
75
76 /// Left shift width of [`h3`](Self::h3) for each byte.
77 ///
78 /// This is 5 in ssdeep.
79 const H3_LSHIFT: usize = 5;
80
81 /// Creates a new [`RollingHash`] with the initial value.
82 pub fn new() -> Self {
83 RollingHash {
84 index: 0,
85 h1: 0,
86 h2: 0,
87 h3: 0,
88 window: [0; ROLLING_WINDOW],
89 }
90 }
91
92 /// Updates the hash value by processing a byte.
93 #[inline]
94 pub fn update_by_byte(&mut self, ch: u8) -> &mut Self {
95 invariant!((self.index as usize) < Self::WINDOW_SIZE);
96 self.h2 = self.h2.wrapping_sub(self.h1);
97 self.h2 = self
98 .h2
99 .wrapping_add(u32::wrapping_mul(ROLLING_WINDOW as u32, ch as u32));
100 self.h1 = self.h1.wrapping_add(ch as u32);
101 self.h1 = self
102 .h1
103 .wrapping_sub(self.window[self.index as usize] as u32); // grcov-excl-br-line:ARRAY
104 self.window[self.index as usize] = ch; // grcov-excl-br-line:ARRAY
105 self.index += 1;
106 if self.index as usize == ROLLING_WINDOW {
107 self.index = 0;
108 }
109 self.h3 <<= Self::H3_LSHIFT;
110 self.h3 ^= ch as u32;
111 self
112 }
113
114 /// Updates the hash value by processing an iterator of [`u8`].
115 pub fn update_by_iter(&mut self, iter: impl Iterator<Item = u8>) -> &mut Self {
116 for ch in iter {
117 self.update_by_byte(ch);
118 }
119 self
120 }
121
122 /// Updates the hash value by processing a slice of [`u8`].
123 pub fn update(&mut self, buf: &[u8]) -> &mut Self {
124 for &ch in buf.iter() {
125 self.update_by_byte(ch);
126 }
127 self
128 }
129
130 /// Returns the current hash value.
131 ///
132 /// Note that there's no "finalization" on this rolling hash.
133 /// You can even continue updating after reading the hash value.
134 ///
135 /// This is the sum of its three internal states (`h1`, `h2`, and `h3`).
136 /// See the source code and the private documentation for
137 /// its mathematical details.
138 #[inline]
139 pub fn value(&self) -> u32 {
140 self.h1.wrapping_add(self.h2).wrapping_add(self.h3)
141 }
142}
143
144impl AddAssign<&[u8]> for RollingHash {
145 /// Updates the hash value by processing a slice of [`u8`].
146 #[inline(always)]
147 fn add_assign(&mut self, buffer: &[u8]) {
148 self.update(buffer);
149 }
150}
151
152impl<const N: usize> AddAssign<&[u8; N]> for RollingHash {
153 /// Updates the hash value by processing an array of [`u8`].
154 #[inline(always)]
155 fn add_assign(&mut self, buffer: &[u8; N]) {
156 self.update(&buffer[..]);
157 }
158}
159
160impl AddAssign<u8> for RollingHash {
161 /// Updates the hash value by processing a byte.
162 #[inline(always)]
163 fn add_assign(&mut self, byte: u8) {
164 self.update_by_byte(byte);
165 }
166}
167
168impl Default for RollingHash {
169 fn default() -> Self {
170 Self::new()
171 }
172}
173
174/// Constant assertions related to this module.
175#[doc(hidden)]
176mod const_asserts {
177 use static_assertions::const_assert_eq;
178
179 use super::*;
180
181 // Compare with original ssdeep constants
182 // fuzzy.c: ROLLING_WINDOW
183 const_assert_eq!(ROLLING_WINDOW, 7);
184
185 // Consistency between module-local and struct-global one.
186 const_assert_eq!(ROLLING_WINDOW, RollingHash::WINDOW_SIZE);
187
188 // RollingHash::h3 is a rolling component and an input byte
189 // are vanished after processing more RollingHash::WINDOW_SIZE bytes.
190 // grcov-excl-tests-start
191 #[cfg(test)]
192 #[test]
193 fn rolling_hash_h3_shift_amount() {
194 assert!(RollingHash::WINDOW_SIZE
195 .checked_mul(RollingHash::H3_LSHIFT)
196 .and_then(|x| u32::try_from(x).ok())
197 .map(|x| x >= u32::BITS)
198 .unwrap_or(false));
199 }
200 // grcov-excl-tests-stop
201}
202
203pub(crate) mod test_utils;
204mod tests;