1#[cfg(target_arch = "x86")]
2use core::arch::x86::*;
3#[cfg(target_arch = "x86_64")]
4use core::arch::x86_64::*;
5
6#[derive(Clone, Copy, Debug, PartialEq, Eq)]
7pub struct State {
8 state: u32,
9}
10
11impl State {
12 #[cfg(not(feature = "std"))]
13 pub fn new(state: u32) -> Option<Self> {
14 if cfg!(all(
15 target_feature = "sse4.1"
16 target_feature = "pclmulqdq",
17 )) {
18 Some(Self { state })
19 } else {
20 None
21 }
22 }
23
24 #[cfg(feature = "std")]
25 pub fn new(state: u32) -> Option<Self> {
26 if is_x86_feature_detected!("pclmulqdq") && is_x86_feature_detected!("sse4.1") {
27 Some(Self { state })
28 } else {
29 None
30 }
31 }
32
33 pub fn update(&mut self, buf: &[u8]) {
34 self.state = unsafe { calculate(self.state, buf) }
35 }
36
37 pub fn as_u32(&self) -> u32 {
38 self.state
39 }
40
41 pub fn reset(&mut self) {
42 self.state = crate::DEFAULT_CRC32;
43 }
44}
45
46const RK01: u64 = 0x0029_5f23_0000_0000;
47const RK02: u64 = 0xfafa_5179_0000_0000;
48const RK03: u64 = 0x5cd8_6bb5_0000_0000;
49const RK04: u64 = 0xaf6f_37a3_0000_0000;
50const RK05: u64 = 0x0029_5f23_0000_0000;
51const RK06: u64 = 0x0000_4455_0000_0000;
52const RK07: u64 = 0x0000_0001_0000_00af;
53const RK08: u64 = 0x0000_0001_0000_00af;
54const RK09: u64 = 0x9bd5_7b5d_0000_0000;
55const RK10: u64 = 0xb7a4_d764_0000_0000;
56const RK11: u64 = 0x1ae0_0042_0000_0000;
57const RK12: u64 = 0xe772_0be6_0000_0000;
58const RK13: u64 = 0x9c7f_c8fe_0000_0000;
59const RK14: u64 = 0x3885_faf8_0000_0000;
60const RK15: u64 = 0xb477_ad71_0000_0000;
61const RK16: u64 = 0x0ac2_ae3d_0000_0000;
62const RK17: u64 = 0x5eae_9dbe_0000_0000;
63const RK18: u64 = 0x784a_4838_0000_0000;
64const RK19: u64 = 0x7d21_bf20_0000_0000;
65const RK20: u64 = 0xfaeb_d3d3_0000_0000;
66
67#[target_feature(enable = "pclmulqdq", enable = "sse4.1")]
68pub unsafe fn calculate(crc: u32, mut data: &[u8]) -> u32 {
69 if data.len() < 16 * 8 * 2 {
70 return crate::baseline::slice_by_16(crc, data);
72 }
73
74 let crc = _mm_set_epi32(crc as i32, 0x0000, 0x0000, 0x0000);
75 let smask = _mm_set_epi8(
77 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
78 );
79
80 let mut x7 = get(&mut data, smask);
82 let mut x6 = get(&mut data, smask);
83 let mut x5 = get(&mut data, smask);
84 let mut x4 = get(&mut data, smask);
85 let mut x3 = get(&mut data, smask);
86 let mut x2 = get(&mut data, smask);
87 let mut x1 = get(&mut data, smask);
88 let mut x0 = get(&mut data, smask);
89 x7 = _mm_xor_si128(x7, crc);
90
91 let k3k4 = _mm_set_epi64x(RK04 as i64, RK03 as i64);
92 while data.len() >= 128 {
93 x7 = reduce128(x7, get(&mut data, smask), k3k4);
94 x6 = reduce128(x6, get(&mut data, smask), k3k4);
95 x5 = reduce128(x5, get(&mut data, smask), k3k4);
96 x4 = reduce128(x4, get(&mut data, smask), k3k4);
97 x3 = reduce128(x3, get(&mut data, smask), k3k4);
98 x2 = reduce128(x2, get(&mut data, smask), k3k4);
99 x1 = reduce128(x1, get(&mut data, smask), k3k4);
100 x0 = reduce128(x0, get(&mut data, smask), k3k4);
101 }
102
103 let k1k2 = _mm_set_epi64x(RK02 as i64, RK01 as i64);
104 let mut x = reduce128(x7, x0, _mm_set_epi64x(RK10 as i64, RK09 as i64));
105 x = reduce128(x6, x, _mm_set_epi64x(RK12 as i64, RK11 as i64));
106 x = reduce128(x5, x, _mm_set_epi64x(RK14 as i64, RK13 as i64));
107 x = reduce128(x4, x, _mm_set_epi64x(RK16 as i64, RK15 as i64));
108 x = reduce128(x3, x, _mm_set_epi64x(RK18 as i64, RK17 as i64));
109 x = reduce128(x2, x, _mm_set_epi64x(RK20 as i64, RK19 as i64));
110 x = reduce128(x1, x, k1k2);
111
112 while data.len() >= 16 {
113 x = reduce128(x, get(&mut data, smask), k1k2);
114 }
115
116 let k5k6 = _mm_set_epi64x(RK06 as i64, RK05 as i64);
118 x = _mm_xor_si128(_mm_clmulepi64_si128(x, k5k6, 0x01), _mm_slli_si128(x, 8));
119 x = _mm_xor_si128(
120 _mm_clmulepi64_si128(_mm_srli_si128(x, 12), k5k6, 0x10),
121 _mm_and_si128(x, _mm_set_epi32(0, !0, !0, !0)),
122 );
123
124 let k7k8 = _mm_set_epi64x(RK08 as i64, RK07 as i64);
126 let t1 = _mm_slli_si128(_mm_clmulepi64_si128(x, k7k8, 0x01), 4);
127 let t2 = _mm_slli_si128(_mm_clmulepi64_si128(t1, k7k8, 0x11), 4);
128 let crc = _mm_extract_epi32(_mm_xor_si128(x, t2), 1) as u32;
129
130 if data.is_empty() {
131 crc
132 } else {
133 crate::baseline::slice_by_1(crc, data)
136 }
137}
138
139#[inline(always)]
140unsafe fn reduce128(a: __m128i, b: __m128i, keys: __m128i) -> __m128i {
141 let t1 = _mm_clmulepi64_si128(a, keys, 0x00);
142 let t2 = _mm_clmulepi64_si128(a, keys, 0x11);
143 _mm_xor_si128(_mm_xor_si128(b, t1), t2)
144}
145
146#[inline(always)]
147unsafe fn get(data: &mut &[u8], smask: __m128i) -> __m128i {
148 let r = _mm_shuffle_epi8(_mm_loadu_si128(data.as_ptr() as *const __m128i), smask);
149 *data = &data[16..];
150 r
151}
152
153#[cfg(test)]
154mod test {
155 use quickcheck_macros::quickcheck;
156
157 #[quickcheck]
158 fn check_against_baseline(init: u32, chunks: Vec<(Vec<u8>, usize)>) -> bool {
159 let mut baseline = crate::baseline::State::new(init);
160 let mut pclmulqdq = super::State::new(init).expect("not supported");
161 for (chunk, mut offset) in chunks {
162 offset &= 0xF;
164 if chunk.len() <= offset {
165 baseline.update(&chunk);
166 pclmulqdq.update(&chunk);
167 } else {
168 baseline.update(&chunk[offset..]);
169 pclmulqdq.update(&chunk[offset..]);
170 }
171 }
172 pclmulqdq.as_u32() == baseline.as_u32()
173 }
174}