haagenti_simd/
histogram.rs1#[inline]
9pub fn byte_histogram(data: &[u8]) -> [u32; 256] {
10 #[cfg(target_arch = "x86_64")]
11 {
12 if is_x86_feature_detected!("avx2") && data.len() >= 64 {
13 return unsafe { byte_histogram_avx2(data) };
14 }
15 }
16
17 byte_histogram_scalar(data)
18}
19
20#[inline]
25pub fn byte_histogram_simd(data: &[u8]) -> [u32; 256] {
26 byte_histogram(data)
27}
28
29#[inline]
31fn byte_histogram_scalar(data: &[u8]) -> [u32; 256] {
32 let mut hist = [0u32; 256];
33
34 let chunks = data.chunks_exact(4);
36 let remainder = chunks.remainder();
37
38 for chunk in chunks {
39 hist[chunk[0] as usize] += 1;
40 hist[chunk[1] as usize] += 1;
41 hist[chunk[2] as usize] += 1;
42 hist[chunk[3] as usize] += 1;
43 }
44
45 for &b in remainder {
46 hist[b as usize] += 1;
47 }
48
49 hist
50}
51
52#[cfg(target_arch = "x86_64")]
57#[target_feature(enable = "avx2")]
58unsafe fn byte_histogram_avx2(data: &[u8]) -> [u32; 256] {
59 let mut hist0 = [0u32; 256];
61 let mut hist1 = [0u32; 256];
62 let mut hist2 = [0u32; 256];
63 let mut hist3 = [0u32; 256];
64
65 let chunks = data.chunks_exact(16);
67 let remainder = chunks.remainder();
68
69 for chunk in chunks {
70 hist0[chunk[0] as usize] += 1;
72 hist1[chunk[1] as usize] += 1;
73 hist2[chunk[2] as usize] += 1;
74 hist3[chunk[3] as usize] += 1;
75
76 hist0[chunk[4] as usize] += 1;
77 hist1[chunk[5] as usize] += 1;
78 hist2[chunk[6] as usize] += 1;
79 hist3[chunk[7] as usize] += 1;
80
81 hist0[chunk[8] as usize] += 1;
82 hist1[chunk[9] as usize] += 1;
83 hist2[chunk[10] as usize] += 1;
84 hist3[chunk[11] as usize] += 1;
85
86 hist0[chunk[12] as usize] += 1;
87 hist1[chunk[13] as usize] += 1;
88 hist2[chunk[14] as usize] += 1;
89 hist3[chunk[15] as usize] += 1;
90 }
91
92 for &b in remainder {
94 hist0[b as usize] += 1;
95 }
96
97 use std::arch::x86_64::*;
99
100 let mut result = [0u32; 256];
101
102 for i in (0..256).step_by(8) {
104 unsafe {
106 let v0 = _mm256_loadu_si256(hist0[i..].as_ptr() as *const __m256i);
107 let v1 = _mm256_loadu_si256(hist1[i..].as_ptr() as *const __m256i);
108 let v2 = _mm256_loadu_si256(hist2[i..].as_ptr() as *const __m256i);
109 let v3 = _mm256_loadu_si256(hist3[i..].as_ptr() as *const __m256i);
110
111 let sum01 = _mm256_add_epi32(v0, v1);
113 let sum23 = _mm256_add_epi32(v2, v3);
114 let sum = _mm256_add_epi32(sum01, sum23);
115
116 _mm256_storeu_si256(result[i..].as_mut_ptr() as *mut __m256i, sum);
117 }
118 }
119
120 result
121}
122
123#[cfg(test)]
124mod tests {
125 use super::*;
126
127 #[test]
128 fn test_histogram_empty() {
129 let hist = byte_histogram(&[]);
130 assert!(hist.iter().all(|&c| c == 0));
131 }
132
133 #[test]
134 fn test_histogram_single_byte() {
135 let data = vec![42u8; 100];
136 let hist = byte_histogram(&data);
137 assert_eq!(hist[42], 100);
138 assert_eq!(hist.iter().filter(|&&c| c > 0).count(), 1);
139 }
140
141 #[test]
142 fn test_histogram_all_bytes() {
143 let data: Vec<u8> = (0..=255).collect();
144 let hist = byte_histogram(&data);
145 assert!(hist.iter().all(|&c| c == 1));
146 }
147
148 #[test]
149 fn test_histogram_repeated_pattern() {
150 let pattern = b"ABCD";
151 let data: Vec<u8> = pattern.iter().cycle().take(1000).cloned().collect();
152 let hist = byte_histogram(&data);
153
154 assert_eq!(hist[b'A' as usize], 250);
155 assert_eq!(hist[b'B' as usize], 250);
156 assert_eq!(hist[b'C' as usize], 250);
157 assert_eq!(hist[b'D' as usize], 250);
158 }
159
160 #[test]
161 fn test_histogram_large() {
162 let data: Vec<u8> = (0..100_000).map(|i| (i % 256) as u8).collect();
164 let hist = byte_histogram(&data);
165
166 let expected = 100_000 / 256;
168 for &count in hist.iter() {
169 assert!(count >= expected as u32);
170 assert!(count <= (expected + 1) as u32);
171 }
172 }
173}