affs_read/
checksum.rs

1//! Checksum calculation functions.
2
3use crate::constants::BLOCK_SIZE;
4
5#[cfg(feature = "simd")]
6use bytemuck::try_cast_slice;
7#[cfg(feature = "simd")]
8use wide::u32x4;
9
10/// Calculate the normal checksum for a block.
11///
12/// Used for root blocks, entry blocks, etc.
13/// The checksum is calculated such that the sum of all longwords equals 0.
14#[inline]
15pub fn normal_sum(buf: &[u8; BLOCK_SIZE], checksum_offset: usize) -> u32 {
16    normal_sum_slice(buf, checksum_offset)
17}
18
19/// Calculate the normal checksum for a variable-size block.
20///
21/// Used for root blocks, entry blocks, etc. with variable block sizes.
22#[inline]
23pub fn normal_sum_slice(buf: &[u8], checksum_offset: usize) -> u32 {
24    let len = buf.len();
25    debug_assert!(
26        len.is_multiple_of(4),
27        "Buffer length must be divisible by 4"
28    );
29    debug_assert!(
30        checksum_offset.is_multiple_of(4),
31        "Checksum offset must be aligned to 4 bytes"
32    );
33
34    #[cfg(feature = "simd")]
35    {
36        normal_sum_slice_simd(buf, checksum_offset)
37    }
38
39    #[cfg(not(feature = "simd"))]
40    {
41        normal_sum_slice_scalar(buf, checksum_offset)
42    }
43}
44
45/// Scalar implementation of normal_sum_slice.
46#[inline]
47fn normal_sum_slice_scalar(buf: &[u8], checksum_offset: usize) -> u32 {
48    let checksum_word = checksum_offset / 4;
49    let num_words = buf.len() / 4;
50
51    let mut sum: u32 = 0;
52    let mut offset = 0;
53
54    for i in 0..num_words {
55        if i != checksum_word {
56            let word = u32::from_be_bytes([
57                buf[offset],
58                buf[offset + 1],
59                buf[offset + 2],
60                buf[offset + 3],
61            ]);
62            sum = sum.wrapping_add(word);
63        }
64        offset += 4;
65    }
66
67    (sum as i32).wrapping_neg() as u32
68}
69
70/// SIMD-optimized implementation of normal_sum_slice.
71///
72/// Uses bytemuck for safe byte slice casting when alignment permits,
73/// falls back to scalar implementation otherwise.
74#[cfg(feature = "simd")]
75#[inline]
76fn normal_sum_slice_simd(buf: &[u8], checksum_offset: usize) -> u32 {
77    // Try to use bytemuck for aligned access when possible
78    if let Ok(words_slice) = try_cast_slice::<u8, u32>(buf) {
79        let checksum_word = checksum_offset / 4;
80        let num_words = buf.len() / 4;
81
82        // Use SIMD for accumulation
83        let mut sum_vec = u32x4::ZERO;
84        let mut i = 0;
85
86        // Aligned path: use bytemuck-cast slice
87        while i + 4 <= num_words {
88            let skip_0 = i == checksum_word;
89            let skip_1 = i + 1 == checksum_word;
90            let skip_2 = i + 2 == checksum_word;
91            let skip_3 = i + 3 == checksum_word;
92
93            let w0 = if skip_0 {
94                0
95            } else {
96                u32::from_be(words_slice[i])
97            };
98            let w1 = if skip_1 {
99                0
100            } else {
101                u32::from_be(words_slice[i + 1])
102            };
103            let w2 = if skip_2 {
104                0
105            } else {
106                u32::from_be(words_slice[i + 2])
107            };
108            let w3 = if skip_3 {
109                0
110            } else {
111                u32::from_be(words_slice[i + 3])
112            };
113
114            let words = u32x4::new([w0, w1, w2, w3]);
115            sum_vec += words;
116            i += 4;
117        }
118
119        // Sum the SIMD lanes
120        let sum_array = sum_vec.to_array();
121        let mut sum: u32 = sum_array[0]
122            .wrapping_add(sum_array[1])
123            .wrapping_add(sum_array[2])
124            .wrapping_add(sum_array[3]);
125
126        // Process remaining words
127        while i < num_words {
128            if i != checksum_word {
129                let word = u32::from_be(words_slice[i]);
130                sum = sum.wrapping_add(word);
131            }
132            i += 1;
133        }
134
135        (sum as i32).wrapping_neg() as u32
136    } else {
137        // Unaligned fallback: use scalar implementation
138        normal_sum_slice_scalar(buf, checksum_offset)
139    }
140}
141
142/// Calculate the boot block checksum.
143///
144/// Special checksum algorithm for the boot block.
145#[inline]
146pub fn boot_sum(buf: &[u8; 1024]) -> u32 {
147    #[cfg(feature = "simd")]
148    {
149        boot_sum_simd(buf)
150    }
151
152    #[cfg(not(feature = "simd"))]
153    {
154        boot_sum_scalar(buf)
155    }
156}
157
158/// Scalar implementation of boot_sum.
159#[inline]
160fn boot_sum_scalar(buf: &[u8; 1024]) -> u32 {
161    let mut sum: u32 = 0;
162    let mut offset = 0;
163
164    for i in 0..256 {
165        if i != 1 {
166            let d = u32::from_be_bytes([
167                buf[offset],
168                buf[offset + 1],
169                buf[offset + 2],
170                buf[offset + 3],
171            ]);
172            let new_sum = sum.wrapping_add(d);
173            sum = new_sum.wrapping_add((new_sum < sum) as u32);
174        }
175        offset += 4;
176    }
177    !sum
178}
179
180/// SIMD-optimized implementation of boot_sum.
181///
182/// Uses bytemuck for safe byte slice casting when alignment permits,
183/// falls back to scalar implementation otherwise.
184#[cfg(feature = "simd")]
185#[inline]
186fn boot_sum_simd(buf: &[u8; 1024]) -> u32 {
187    // Try to use bytemuck for aligned access when possible
188    if let Ok(words_slice) = try_cast_slice::<u8, u32>(buf) {
189        let mut sum: u32 = 0;
190
191        // Aligned path: use bytemuck-cast slice
192        // Process first word (index 0)
193        let d = u32::from_be(words_slice[0]);
194        let new_sum = sum.wrapping_add(d);
195        sum = new_sum.wrapping_add((new_sum < sum) as u32);
196
197        // Process words 2-255 in batches of 4 using SIMD (skip word at index 1)
198        for i in (2..256).step_by(4) {
199            let words = if i + 3 < 256 {
200                u32x4::new([
201                    u32::from_be(words_slice[i]),
202                    u32::from_be(words_slice[i + 1]),
203                    u32::from_be(words_slice[i + 2]),
204                    u32::from_be(words_slice[i + 3]),
205                ])
206            } else {
207                let mut arr = [0u32; 4];
208                for (j, item) in arr.iter_mut().enumerate().take((256 - i).min(4)) {
209                    *item = u32::from_be(words_slice[i + j]);
210                }
211                u32x4::new(arr)
212            };
213
214            let words_array = words.to_array();
215            for &d in &words_array {
216                if d != 0 {
217                    let new_sum = sum.wrapping_add(d);
218                    sum = new_sum.wrapping_add((new_sum < sum) as u32);
219                }
220            }
221        }
222
223        !sum
224    } else {
225        // Unaligned fallback: use scalar implementation
226        boot_sum_scalar(buf)
227    }
228}
229
230/// Calculate bitmap block checksum.
231#[inline]
232pub fn bitmap_sum(buf: &[u8; BLOCK_SIZE]) -> u32 {
233    #[cfg(feature = "simd")]
234    {
235        bitmap_sum_simd(buf)
236    }
237
238    #[cfg(not(feature = "simd"))]
239    {
240        bitmap_sum_scalar(buf)
241    }
242}
243
244/// Scalar implementation of bitmap_sum.
245#[inline]
246fn bitmap_sum_scalar(buf: &[u8; BLOCK_SIZE]) -> u32 {
247    let mut sum: u32 = 0;
248    let mut offset = 4;
249
250    for _ in 1..128 {
251        let word = u32::from_be_bytes([
252            buf[offset],
253            buf[offset + 1],
254            buf[offset + 2],
255            buf[offset + 3],
256        ]);
257        sum = sum.wrapping_sub(word);
258        offset += 4;
259    }
260    sum
261}
262
263/// SIMD-optimized implementation of bitmap_sum.
264///
265/// Uses bytemuck for safe byte slice casting when alignment permits,
266/// falls back to scalar implementation otherwise.
267#[cfg(feature = "simd")]
268#[inline]
269fn bitmap_sum_simd(buf: &[u8; BLOCK_SIZE]) -> u32 {
270    // Try to use bytemuck for aligned access when possible
271    if let Ok(words_slice) = try_cast_slice::<u8, u32>(buf) {
272        let mut sum_vec = u32x4::ZERO;
273
274        // Aligned path: use bytemuck-cast slice
275        for i in (1..125).step_by(4) {
276            let words = u32x4::new([
277                u32::from_be(words_slice[i]),
278                u32::from_be(words_slice[i + 1]),
279                u32::from_be(words_slice[i + 2]),
280                u32::from_be(words_slice[i + 3]),
281            ]);
282            sum_vec -= words;
283        }
284
285        // Process remaining words 125, 126, 127
286        let mut sum: u32 = 0;
287        for &word in &words_slice[125..128] {
288            sum = sum.wrapping_sub(u32::from_be(word));
289        }
290
291        // Sum the SIMD lanes
292        let sum_array = sum_vec.to_array();
293        sum.wrapping_add(sum_array[0])
294            .wrapping_add(sum_array[1])
295            .wrapping_add(sum_array[2])
296            .wrapping_add(sum_array[3])
297    } else {
298        // Unaligned fallback: use scalar implementation
299        bitmap_sum_scalar(buf)
300    }
301}
302
303/// Read a big-endian u32 from a buffer.
304#[inline]
305pub const fn read_u32_be(buf: &[u8; BLOCK_SIZE], offset: usize) -> u32 {
306    u32::from_be_bytes([
307        buf[offset],
308        buf[offset + 1],
309        buf[offset + 2],
310        buf[offset + 3],
311    ])
312}
313
314/// Read a big-endian u32 from a slice.
315#[inline]
316pub const fn read_u32_be_slice(buf: &[u8], offset: usize) -> u32 {
317    u32::from_be_bytes([
318        buf[offset],
319        buf[offset + 1],
320        buf[offset + 2],
321        buf[offset + 3],
322    ])
323}
324
325/// Read a big-endian i32 from a buffer.
326#[inline]
327pub const fn read_i32_be(buf: &[u8; BLOCK_SIZE], offset: usize) -> i32 {
328    read_i32_be_slice(buf, offset)
329}
330
331/// Read a big-endian i32 from a slice.
332#[inline]
333pub const fn read_i32_be_slice(buf: &[u8], offset: usize) -> i32 {
334    i32::from_be_bytes([
335        buf[offset],
336        buf[offset + 1],
337        buf[offset + 2],
338        buf[offset + 3],
339    ])
340}
341
342/// Read a big-endian u16 from a buffer.
343#[inline]
344pub const fn read_u16_be(buf: &[u8; BLOCK_SIZE], offset: usize) -> u16 {
345    u16::from_be_bytes([buf[offset], buf[offset + 1]])
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351
352    #[test]
353    fn test_read_u32_be() {
354        let mut buf = [0u8; BLOCK_SIZE];
355        buf[0] = 0x12;
356        buf[1] = 0x34;
357        buf[2] = 0x56;
358        buf[3] = 0x78;
359        assert_eq!(read_u32_be(&buf, 0), 0x12345678);
360    }
361
362    #[test]
363    fn test_read_i32_be() {
364        let mut buf = [0u8; BLOCK_SIZE];
365        buf[0] = 0xFF;
366        buf[1] = 0xFF;
367        buf[2] = 0xFF;
368        buf[3] = 0xFD;
369        assert_eq!(read_i32_be(&buf, 0), -3);
370    }
371}
affs_read/checksum.rs

affs_read/
checksum.rs