1#![cfg_attr(not(feature = "std"), no_std)]
22
23const MOD: u32 = 65521;
24
25#[derive(Clone)]
27pub struct Adler32 {
28 a: u16,
29 b: u16,
30}
31
32type UpdateFn = fn(u16, u16, &[u8]) -> (u16, u16);
33
34impl Adler32 {
35 pub fn new() -> Self {
36 Self { a: 1, b: 0 }
37 }
38
39 pub fn from_checksum(checksum: u32) -> Self {
40 Self {
41 a: checksum as u16,
42 b: (checksum >> 16) as u16,
43 }
44 }
45
46 pub fn checksum(&self) -> u32 {
47 (u32::from(self.b) << 16) | u32::from(self.a)
48 }
49
50 pub fn write(&mut self, data: &[u8]) {
51 let imp = get_imp();
52 let (a, b) = imp(self.a, self.b, data);
53 self.a = a;
54 self.b = b;
55 }
56
57 pub fn finish(&self) -> u32 {
58 self.checksum()
59 }
60}
61
62impl Default for Adler32 {
63 fn default() -> Self {
64 Self::new()
65 }
66}
67
68#[cfg(feature = "std")]
69impl std::hash::Hasher for Adler32 {
70 fn write(&mut self, bytes: &[u8]) {
71 Adler32::write(self, bytes);
72 }
73
74 fn finish(&self) -> u64 {
75 self.checksum() as u64
76 }
77}
78
79pub fn adler32(data: &[u8]) -> u32 {
81 let mut h = Adler32::new();
82 h.write(data);
83 h.checksum()
84}
85
86fn get_imp() -> UpdateFn {
87 #[cfg(target_arch = "aarch64")]
88 {
89 if is_aarch64_neon_available() {
90 return neon::update;
91 }
92 }
93
94 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
95 {
96 if is_x86_ssse3_available() {
97 return ssse3::update;
98 }
99 }
100
101 scalar::update
102}
103
104#[cfg(target_arch = "aarch64")]
105fn is_aarch64_neon_available() -> bool {
106 #[cfg(feature = "std")]
107 {
108 std::arch::is_aarch64_feature_detected!("neon")
109 }
110 #[cfg(not(feature = "std"))]
111 {
112 cfg!(target_feature = "neon")
113 }
114}
115
116#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
117fn is_x86_ssse3_available() -> bool {
118 #[cfg(feature = "std")]
119 {
120 std::is_x86_feature_detected!("ssse3")
121 }
122 #[cfg(not(feature = "std"))]
123 {
124 cfg!(target_feature = "ssse3")
125 }
126}
127
128mod scalar {
129 use super::MOD;
130
131 const NMAX: usize = 5552;
132
133 pub fn update(mut a: u16, mut b: u16, data: &[u8]) -> (u16, u16) {
134 let mut a32 = a as u32;
135 let mut b32 = b as u32;
136
137 for chunk in data.chunks(NMAX) {
138 for &byte in chunk {
139 a32 += byte as u32;
140 b32 += a32;
141 }
142 a32 %= MOD;
143 b32 %= MOD;
144 }
145
146 a = a32 as u16;
147 b = b32 as u16;
148 (a, b)
149 }
150}
151
152#[cfg(target_arch = "aarch64")]
153mod neon {
154 use super::MOD;
155 use core::arch::aarch64::*;
156
157 const BLOCK_SIZE: usize = 32;
158 const NMAX: usize = 5552;
159 const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
162 unsafe { update_neon(a, b, data) }
164 }
165
166 #[target_feature(enable = "neon")]
167 unsafe fn update_neon(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
168 let mut a = a as u32;
169 let mut b = b as u32;
170
171 for chunk in data.chunks(CHUNK_SIZE) {
172 update_chunk(&mut a, &mut b, chunk);
173 a %= MOD;
174 b %= MOD;
175 }
176
177 (a as u16, b as u16)
178 }
179
180 #[inline]
181 #[target_feature(enable = "neon")]
182 unsafe fn update_chunk(a: &mut u32, b: &mut u32, data: &[u8]) {
183 let blocks = data.chunks_exact(BLOCK_SIZE);
184 let remainder = blocks.remainder();
185 let num_blocks = data.len() / BLOCK_SIZE;
186
187 let mut a_vec = vdupq_n_u32(0); let mut b_vec = vdupq_n_u32(0); let mut p = vdupq_n_u32(0); for block in blocks {
192 p = vaddq_u32(p, a_vec);
193
194 let ptr = block.as_ptr();
195 let v0 = vld1q_u8(ptr);
196 let v1 = vld1q_u8(ptr.add(16));
197
198 let sum16_0 = vpaddlq_u8(v0);
200 let sum16_1 = vpaddlq_u8(v1);
201 let sum32_0 = vpaddlq_u16(sum16_0);
202 let sum32_1 = vpaddlq_u16(sum16_1);
203 a_vec = vaddq_u32(a_vec, sum32_0);
204 a_vec = vaddq_u32(a_vec, sum32_1);
205
206 let weights_hi: [u8; 16] = [
208 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
209 ];
210 let weights_lo: [u8; 16] = [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1];
211 let wh = vld1q_u8(weights_hi.as_ptr());
212 let wl = vld1q_u8(weights_lo.as_ptr());
213
214 let prod0_lo = vmull_u8(vget_low_u8(v0), vget_low_u8(wh));
216 let prod0_hi = vmull_u8(vget_high_u8(v0), vget_high_u8(wh));
217 let prod1_lo = vmull_u8(vget_low_u8(v1), vget_low_u8(wl));
218 let prod1_hi = vmull_u8(vget_high_u8(v1), vget_high_u8(wl));
219
220 b_vec = vaddq_u32(b_vec, vpaddlq_u16(prod0_lo));
222 b_vec = vaddq_u32(b_vec, vpaddlq_u16(prod0_hi));
223 b_vec = vaddq_u32(b_vec, vpaddlq_u16(prod1_lo));
224 b_vec = vaddq_u32(b_vec, vpaddlq_u16(prod1_hi));
225 }
226
227 let a_sum = vaddvq_u32(a_vec);
229 let p_sum = vaddvq_u32(p);
230 let b_sum = vaddvq_u32(b_vec);
231
232 *b += *a * (num_blocks as u32 * BLOCK_SIZE as u32) + p_sum * BLOCK_SIZE as u32 + b_sum;
233 *a += a_sum;
234
235 for &byte in remainder {
237 *a += byte as u32;
238 *b += *a;
239 }
240 }
241}
242
243#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
244mod ssse3 {
245 use super::MOD;
246
247 #[cfg(target_arch = "x86")]
248 use core::arch::x86::*;
249 #[cfg(target_arch = "x86_64")]
250 use core::arch::x86_64::*;
251
252 const BLOCK_SIZE: usize = 32;
253 const NMAX: usize = 5552;
254 const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
255
256 pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
257 unsafe { update_ssse3(a, b, data) }
259 }
260
261 #[target_feature(enable = "ssse3")]
262 unsafe fn update_ssse3(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
263 let mut a = a as u32;
264 let mut b = b as u32;
265
266 for chunk in data.chunks(CHUNK_SIZE) {
267 let blocks = chunk.chunks_exact(BLOCK_SIZE);
268 let remainder = blocks.remainder();
269 let num_blocks = chunk.len() / BLOCK_SIZE;
270
271 let zero = _mm_setzero_si128();
272 let ones_16 = _mm_set1_epi16(1);
273
274 let weights_hi = _mm_set_epi8(
275 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
276 );
277 let weights_lo = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
278
279 let mut a_vec = _mm_setzero_si128();
280 let mut b_vec = _mm_setzero_si128();
281 let mut p = _mm_setzero_si128();
282
283 for block in blocks {
284 let ptr = block.as_ptr() as *const __m128i;
285
286 p = _mm_add_epi32(p, a_vec);
287
288 let v0 = _mm_loadu_si128(ptr);
289 let v1 = _mm_loadu_si128(ptr.add(1));
290
291 a_vec = _mm_add_epi32(a_vec, _mm_sad_epu8(v0, zero));
293 a_vec = _mm_add_epi32(a_vec, _mm_sad_epu8(v1, zero));
294
295 let mad0 = _mm_maddubs_epi16(v0, weights_hi);
297 let mad1 = _mm_maddubs_epi16(v1, weights_lo);
298 b_vec = _mm_add_epi32(b_vec, _mm_madd_epi16(mad0, ones_16));
299 b_vec = _mm_add_epi32(b_vec, _mm_madd_epi16(mad1, ones_16));
300 }
301
302 let a_sum = hsum_i32(a_vec);
303 let p_sum = hsum_i32(p);
304 let b_sum = hsum_i32(b_vec);
305
306 b += a * (num_blocks as u32 * BLOCK_SIZE as u32)
307 + p_sum as u32 * BLOCK_SIZE as u32
308 + b_sum as u32;
309 a += a_sum as u32;
310
311 for &byte in remainder {
312 a += byte as u32;
313 b += a;
314 }
315
316 a %= MOD;
317 b %= MOD;
318 }
319
320 (a as u16, b as u16)
321 }
322
323 #[inline]
324 #[target_feature(enable = "ssse3")]
325 unsafe fn hsum_i32(v: __m128i) -> i32 {
326 let hi = _mm_unpackhi_epi64(v, v);
327 let sum = _mm_add_epi32(v, hi);
328 let hi = _mm_shuffle_epi32(sum, 0b_00_00_00_01);
329 let sum = _mm_add_epi32(sum, hi);
330 _mm_cvtsi128_si32(sum)
331 }
332}
333
334#[cfg(test)]
335mod tests {
336 extern crate alloc;
337 use alloc::vec;
338 use alloc::vec::Vec;
339
340 use super::*;
341
342 fn reference_adler32(data: &[u8]) -> u32 {
343 let mut a: u32 = 1;
344 let mut b: u32 = 0;
345 for &byte in data {
346 a = (a + byte as u32) % MOD;
347 b = (b + a) % MOD;
348 }
349 (b << 16) | a
350 }
351
352 #[test]
353 fn empty() {
354 assert_eq!(adler32(&[]), reference_adler32(&[]));
355 }
356
357 #[test]
358 fn single_byte() {
359 assert_eq!(adler32(&[1]), reference_adler32(&[1]));
360 assert_eq!(adler32(&[0xff]), reference_adler32(&[0xff]));
361 }
362
363 #[test]
364 fn wikipedia_example() {
365 let data = b"Wikipedia";
366 assert_eq!(adler32(data), reference_adler32(data));
367 assert_eq!(adler32(data), 0x11E60398);
368 }
369
370 #[test]
371 fn small_data() {
372 for len in 0..=128 {
373 let data: Vec<u8> = (0..len).map(|i| (i & 0xff) as u8).collect();
374 assert_eq!(
375 adler32(&data),
376 reference_adler32(&data),
377 "mismatch at len={}",
378 len
379 );
380 }
381 }
382
383 #[test]
384 fn block_boundaries() {
385 for &len in &[31, 32, 33, 63, 64, 65, 5551, 5552, 5553, 5600, 11104] {
386 let data: Vec<u8> = (0..len).map(|i| ((i * 7 + 13) & 0xff) as u8).collect();
387 assert_eq!(
388 adler32(&data),
389 reference_adler32(&data),
390 "mismatch at len={}",
391 len
392 );
393 }
394 }
395
396 #[test]
397 fn all_zeros() {
398 let data = vec![0u8; 100_000];
399 assert_eq!(adler32(&data), reference_adler32(&data));
400 }
401
402 #[test]
403 fn all_ones() {
404 let data = vec![1u8; 100_000];
405 assert_eq!(adler32(&data), reference_adler32(&data));
406 }
407
408 #[test]
409 fn all_0xff() {
410 let data = vec![0xffu8; 100_000];
411 assert_eq!(adler32(&data), reference_adler32(&data));
412 }
413
414 #[test]
415 fn incremental_matches_oneshot() {
416 let data: Vec<u8> = (0..10_000).map(|i| (i & 0xff) as u8).collect();
417 let oneshot = adler32(&data);
418
419 let mut h = Adler32::new();
420 let mut offset = 0;
421 for chunk_size in [1, 7, 13, 31, 32, 33, 64, 100, 1000] {
422 let end = (offset + chunk_size).min(data.len());
423 if offset < end {
424 h.write(&data[offset..end]);
425 offset = end;
426 }
427 }
428 h.write(&data[offset..]);
429
430 assert_eq!(h.checksum(), oneshot);
431 }
432}