1use crate::constants::BLOCK_SIZE;
4
5#[cfg(feature = "simd")]
6use bytemuck::try_cast_slice;
7#[cfg(feature = "simd")]
8use wide::u32x4;
9
10#[inline]
15pub fn normal_sum(buf: &[u8; BLOCK_SIZE], checksum_offset: usize) -> u32 {
16 normal_sum_slice(buf, checksum_offset)
17}
18
19#[inline]
23pub fn normal_sum_slice(buf: &[u8], checksum_offset: usize) -> u32 {
24 let len = buf.len();
25 debug_assert!(
26 len.is_multiple_of(4),
27 "Buffer length must be divisible by 4"
28 );
29 debug_assert!(
30 checksum_offset.is_multiple_of(4),
31 "Checksum offset must be aligned to 4 bytes"
32 );
33
34 #[cfg(feature = "simd")]
35 {
36 normal_sum_slice_simd(buf, checksum_offset)
37 }
38
39 #[cfg(not(feature = "simd"))]
40 {
41 normal_sum_slice_scalar(buf, checksum_offset)
42 }
43}
44
45#[inline]
47fn normal_sum_slice_scalar(buf: &[u8], checksum_offset: usize) -> u32 {
48 let checksum_word = checksum_offset / 4;
49 let num_words = buf.len() / 4;
50
51 let mut sum: u32 = 0;
52 let mut offset = 0;
53
54 for i in 0..num_words {
55 if i != checksum_word {
56 let word = u32::from_be_bytes([
57 buf[offset],
58 buf[offset + 1],
59 buf[offset + 2],
60 buf[offset + 3],
61 ]);
62 sum = sum.wrapping_add(word);
63 }
64 offset += 4;
65 }
66
67 (sum as i32).wrapping_neg() as u32
68}
69
70#[cfg(feature = "simd")]
75#[inline]
76fn normal_sum_slice_simd(buf: &[u8], checksum_offset: usize) -> u32 {
77 if let Ok(words_slice) = try_cast_slice::<u8, u32>(buf) {
79 let checksum_word = checksum_offset / 4;
80 let num_words = buf.len() / 4;
81
82 let mut sum_vec = u32x4::ZERO;
84 let mut i = 0;
85
86 while i + 4 <= num_words {
88 let skip_0 = i == checksum_word;
89 let skip_1 = i + 1 == checksum_word;
90 let skip_2 = i + 2 == checksum_word;
91 let skip_3 = i + 3 == checksum_word;
92
93 let w0 = if skip_0 {
94 0
95 } else {
96 u32::from_be(words_slice[i])
97 };
98 let w1 = if skip_1 {
99 0
100 } else {
101 u32::from_be(words_slice[i + 1])
102 };
103 let w2 = if skip_2 {
104 0
105 } else {
106 u32::from_be(words_slice[i + 2])
107 };
108 let w3 = if skip_3 {
109 0
110 } else {
111 u32::from_be(words_slice[i + 3])
112 };
113
114 let words = u32x4::new([w0, w1, w2, w3]);
115 sum_vec += words;
116 i += 4;
117 }
118
119 let sum_array = sum_vec.to_array();
121 let mut sum: u32 = sum_array[0]
122 .wrapping_add(sum_array[1])
123 .wrapping_add(sum_array[2])
124 .wrapping_add(sum_array[3]);
125
126 while i < num_words {
128 if i != checksum_word {
129 let word = u32::from_be(words_slice[i]);
130 sum = sum.wrapping_add(word);
131 }
132 i += 1;
133 }
134
135 (sum as i32).wrapping_neg() as u32
136 } else {
137 normal_sum_slice_scalar(buf, checksum_offset)
139 }
140}
141
142#[inline]
146pub fn boot_sum(buf: &[u8; 1024]) -> u32 {
147 #[cfg(feature = "simd")]
148 {
149 boot_sum_simd(buf)
150 }
151
152 #[cfg(not(feature = "simd"))]
153 {
154 boot_sum_scalar(buf)
155 }
156}
157
158#[inline]
160fn boot_sum_scalar(buf: &[u8; 1024]) -> u32 {
161 let mut sum: u32 = 0;
162 let mut offset = 0;
163
164 for i in 0..256 {
165 if i != 1 {
166 let d = u32::from_be_bytes([
167 buf[offset],
168 buf[offset + 1],
169 buf[offset + 2],
170 buf[offset + 3],
171 ]);
172 let new_sum = sum.wrapping_add(d);
173 sum = new_sum.wrapping_add((new_sum < sum) as u32);
174 }
175 offset += 4;
176 }
177 !sum
178}
179
180#[cfg(feature = "simd")]
185#[inline]
186fn boot_sum_simd(buf: &[u8; 1024]) -> u32 {
187 if let Ok(words_slice) = try_cast_slice::<u8, u32>(buf) {
189 let mut sum: u32 = 0;
190
191 let d = u32::from_be(words_slice[0]);
194 let new_sum = sum.wrapping_add(d);
195 sum = new_sum.wrapping_add((new_sum < sum) as u32);
196
197 for i in (2..256).step_by(4) {
199 let words = if i + 3 < 256 {
200 u32x4::new([
201 u32::from_be(words_slice[i]),
202 u32::from_be(words_slice[i + 1]),
203 u32::from_be(words_slice[i + 2]),
204 u32::from_be(words_slice[i + 3]),
205 ])
206 } else {
207 let mut arr = [0u32; 4];
208 for (j, item) in arr.iter_mut().enumerate().take((256 - i).min(4)) {
209 *item = u32::from_be(words_slice[i + j]);
210 }
211 u32x4::new(arr)
212 };
213
214 let words_array = words.to_array();
215 for &d in &words_array {
216 if d != 0 {
217 let new_sum = sum.wrapping_add(d);
218 sum = new_sum.wrapping_add((new_sum < sum) as u32);
219 }
220 }
221 }
222
223 !sum
224 } else {
225 boot_sum_scalar(buf)
227 }
228}
229
230#[inline]
232pub fn bitmap_sum(buf: &[u8; BLOCK_SIZE]) -> u32 {
233 #[cfg(feature = "simd")]
234 {
235 bitmap_sum_simd(buf)
236 }
237
238 #[cfg(not(feature = "simd"))]
239 {
240 bitmap_sum_scalar(buf)
241 }
242}
243
244#[inline]
246fn bitmap_sum_scalar(buf: &[u8; BLOCK_SIZE]) -> u32 {
247 let mut sum: u32 = 0;
248 let mut offset = 4;
249
250 for _ in 1..128 {
251 let word = u32::from_be_bytes([
252 buf[offset],
253 buf[offset + 1],
254 buf[offset + 2],
255 buf[offset + 3],
256 ]);
257 sum = sum.wrapping_sub(word);
258 offset += 4;
259 }
260 sum
261}
262
263#[cfg(feature = "simd")]
268#[inline]
269fn bitmap_sum_simd(buf: &[u8; BLOCK_SIZE]) -> u32 {
270 if let Ok(words_slice) = try_cast_slice::<u8, u32>(buf) {
272 let mut sum_vec = u32x4::ZERO;
273
274 for i in (1..125).step_by(4) {
276 let words = u32x4::new([
277 u32::from_be(words_slice[i]),
278 u32::from_be(words_slice[i + 1]),
279 u32::from_be(words_slice[i + 2]),
280 u32::from_be(words_slice[i + 3]),
281 ]);
282 sum_vec -= words;
283 }
284
285 let mut sum: u32 = 0;
287 for &word in &words_slice[125..128] {
288 sum = sum.wrapping_sub(u32::from_be(word));
289 }
290
291 let sum_array = sum_vec.to_array();
293 sum.wrapping_add(sum_array[0])
294 .wrapping_add(sum_array[1])
295 .wrapping_add(sum_array[2])
296 .wrapping_add(sum_array[3])
297 } else {
298 bitmap_sum_scalar(buf)
300 }
301}
302
303#[inline]
305pub const fn read_u32_be(buf: &[u8; BLOCK_SIZE], offset: usize) -> u32 {
306 u32::from_be_bytes([
307 buf[offset],
308 buf[offset + 1],
309 buf[offset + 2],
310 buf[offset + 3],
311 ])
312}
313
314#[inline]
316pub const fn read_u32_be_slice(buf: &[u8], offset: usize) -> u32 {
317 u32::from_be_bytes([
318 buf[offset],
319 buf[offset + 1],
320 buf[offset + 2],
321 buf[offset + 3],
322 ])
323}
324
325#[inline]
327pub const fn read_i32_be(buf: &[u8; BLOCK_SIZE], offset: usize) -> i32 {
328 read_i32_be_slice(buf, offset)
329}
330
331#[inline]
333pub const fn read_i32_be_slice(buf: &[u8], offset: usize) -> i32 {
334 i32::from_be_bytes([
335 buf[offset],
336 buf[offset + 1],
337 buf[offset + 2],
338 buf[offset + 3],
339 ])
340}
341
342#[inline]
344pub const fn read_u16_be(buf: &[u8; BLOCK_SIZE], offset: usize) -> u16 {
345 u16::from_be_bytes([buf[offset], buf[offset + 1]])
346}
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 #[test]
353 fn test_read_u32_be() {
354 let mut buf = [0u8; BLOCK_SIZE];
355 buf[0] = 0x12;
356 buf[1] = 0x34;
357 buf[2] = 0x56;
358 buf[3] = 0x78;
359 assert_eq!(read_u32_be(&buf, 0), 0x12345678);
360 }
361
362 #[test]
363 fn test_read_i32_be() {
364 let mut buf = [0u8; BLOCK_SIZE];
365 buf[0] = 0xFF;
366 buf[1] = 0xFF;
367 buf[2] = 0xFF;
368 buf[3] = 0xFD;
369 assert_eq!(read_i32_be(&buf, 0), -3);
370 }
371}