1#[cfg(target_arch = "x86_64")]
2use std::arch::x86_64::*;
3
4const PATTERN_COMPLETE: &str = "0000-00-00T00:00:00.000Z00:00:00";
5const PATTERN_AFTER_YEAR: &str = "-00-00T00:00:00.";
6
7#[allow(clippy::assertions_on_constants)]
8const _: () = {
9 assert!(PATTERN_COMPLETE.len() == 32);
10 assert!(PATTERN_AFTER_YEAR.len() == 16);
11};
12
13#[inline]
14#[cfg(target_arch = "x86_64")]
15#[target_feature(enable = "sse2,ssse3,sse4.1")]
16#[doc(hidden)] pub unsafe fn format_simd_mul_to_slice(
18 slice: &mut [u8],
19 year: u32,
20 month: u32,
21 day: u32,
22 hour: u32,
23 minute: u32,
24 second: u32,
25 millisecond: u32,
26) {
27 let slice = &mut slice[0..24];
30 let year = year as i16;
31 let month = month as i16;
32 let day = day as i16;
33 let hour = hour as i16;
34 let minute = minute as i16;
35 let second = second as i16;
36 let millisecond = millisecond as i16;
37
38 let input = _mm_setr_epi16(millisecond / 10, second, minute, hour, day, month, year % 100, year / 100);
39
40 let tens = _mm_mulhi_epu16(input, _mm_set1_epi16(52429_u16 as i16));
42 let tens = _mm_srli_epi16(tens, 3);
43
44 let tens_times10 = _mm_mullo_epi16(tens, _mm_set1_epi16(10));
46 let ones = _mm_sub_epi16(input, tens_times10);
47
48 let fmt = _mm_or_si128(_mm_slli_epi16(tens, 8), ones);
50
51 let fmt_lo = _mm_shuffle_epi8(fmt, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, -1, 2, 3, -1));
53 let fmt_hi = _mm_shuffle_epi8(fmt, _mm_set_epi8(4, 5, -1, 6, 7, -1, 8, 9, -1, 10, 11, -1, 12, 13, 14, 15));
54
55 let fmt_lo = _mm_insert_epi8(fmt_lo, (millisecond % 10) as i32, 6);
58
59 let pattern_lo = _mm_loadu_si128(PATTERN_COMPLETE.as_ptr().add(16) as *const _);
64 let pattern_hi = _mm_loadu_si128(PATTERN_COMPLETE.as_ptr().add(0) as *const _);
65 let fmt_lo = _mm_or_si128(fmt_lo, pattern_lo);
66 let fmt_hi = _mm_or_si128(fmt_hi, pattern_hi);
67
68 _mm_storeu_si128(slice.as_mut_ptr() as *mut __m128i, fmt_hi);
69 _mm_storel_epi64(slice.as_mut_ptr().offset(16) as *mut __m128i, fmt_lo);
70
71 }
74
75#[inline]
76#[cfg(target_arch = "x86_64")]
77#[target_feature(enable = "sse2,ssse3")]
78unsafe fn simd_double_dabble(numbers: &[u16; 8]) -> std::arch::x86_64::__m128i {
79 let mut res = _mm_loadu_si128(numbers.as_ptr() as *const _);
80
81 let lookup_lo = _mm_setr_epi8(0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
83 let lookup_hi = _mm_slli_epi16(lookup_lo, 4);
85
86 let mask_bcd_lo = _mm_set1_epi16(0x0F00_u16 as i16);
87 let mask_bcd_hi = _mm_set1_epi16(0xF000_u16 as i16);
88
89 let mask_bcd = _mm_or_si128(mask_bcd_lo, mask_bcd_hi);
90
91 res = _mm_slli_epi16(res, 3 + 8 - 7);
92 for _i in 3..7 {
93 let bcd_lo = res;
94 let bcd_hi = _mm_srli_epi16(res, 4);
95
96 let inc_lo = _mm_shuffle_epi8(lookup_lo, bcd_lo);
97 let inc_hi = _mm_shuffle_epi8(lookup_hi, bcd_hi);
98
99 let inc = _mm_and_si128(_mm_or_si128(inc_lo, inc_hi), mask_bcd);
100
101 res = _mm_add_epi16(res, inc);
102 res = _mm_slli_epi16(res, 1);
103 }
104
105 let rlo = _mm_srli_epi16(_mm_and_si128(res, mask_bcd_lo), 0);
107 let rhi = _mm_srli_epi16(_mm_and_si128(res, mask_bcd_hi), 12);
108
109 res = _mm_or_si128(rlo, rhi);
111
112 res
113}
114
115#[inline]
116#[cfg(target_arch = "x86_64")]
117#[target_feature(enable = "avx2")]
118unsafe fn simd_double_dabble_256(numbers: &[u16; 16]) -> __m256i {
119 let mut res = _mm256_loadu_si256(numbers.as_ptr() as *const _);
120
121 let lookup_lo = _mm_setr_epi8(0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
123 let lookup_lo = _mm256_broadcastsi128_si256(lookup_lo);
124 let lookup_hi = _mm256_slli_epi16(lookup_lo, 4);
125 let mask_bcd_lo = _mm256_set1_epi16(0x0F00_u16 as i16);
128 let mask_bcd_hi = _mm256_set1_epi16(0xF000_u16 as i16);
129 let mask_bcd = _mm256_or_si256(mask_bcd_lo, mask_bcd_hi);
130
131 res = _mm256_slli_epi16(res, 3 + 8 - 7);
132 for _i in 3..7 {
133 let bcd_lo = res;
134 let bcd_hi = _mm256_srli_epi16(res, 4);
135
136 let inc_lo = _mm256_shuffle_epi8(lookup_lo, bcd_lo);
137 let inc_hi = _mm256_shuffle_epi8(lookup_hi, bcd_hi);
138
139 let inc = _mm256_and_si256(_mm256_or_si256(inc_lo, inc_hi), mask_bcd);
140
141 res = _mm256_add_epi16(res, inc);
142 res = _mm256_slli_epi16(res, 1);
143 }
144
145 let rlo = _mm256_srli_epi16(_mm256_and_si256(res, mask_bcd_lo), 0);
147 let rhi = _mm256_srli_epi16(_mm256_and_si256(res, mask_bcd_hi), 12);
148
149 res = _mm256_or_si256(rlo, rhi);
151
152 res
153}
154
155#[inline]
158#[cfg(target_arch = "x86_64")]
159#[target_feature(enable = "sse2,ssse3")]
160unsafe fn format_mmddhhmmss_double_dabble(buffer: *mut u8, month: u16, day: u16, hour: u16, minute: u16, second: u16) {
161 let mut res = simd_double_dabble(&[0, 0, 0, second, minute, hour, day, month]);
162
163 res = _mm_shuffle_epi8(res, _mm_set_epi8(-1, 9, 8, -1, 7, 6, -1, 5, 4, -1, 3, 2, -1, 1, 0, -1));
164 res = _mm_add_epi8(res, _mm_loadu_si128(PATTERN_AFTER_YEAR.as_ptr() as *const __m128i));
165
166 _mm_storeu_si128(buffer as *mut __m128i, res);
167}
168
169#[inline]
172#[cfg(target_arch = "x86_64")]
173#[target_feature(enable = "sse2,ssse3")]
174unsafe fn format_yyyymmddhhmm_double_dabble(
175 buffer: *mut u8,
176 year_hi: u16,
177 year_lo: u16,
178 month: u16,
179 day: u16,
180 hour: u16,
181 minute: u16,
182) {
183 let mut res = simd_double_dabble(&[year_hi, year_lo, month, day, hour, minute, 0, 0]);
184
185 res = _mm_shuffle_epi8(res, _mm_setr_epi8(0, 1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10, 11));
186 res = _mm_add_epi8(res, _mm_loadu_si128(PATTERN_COMPLETE.as_ptr() as *const __m128i));
187
188 _mm_storeu_si128(buffer as *mut __m128i, res);
189}
190
191#[inline]
194#[cfg(target_arch = "x86_64")]
195#[target_feature(enable = "sse2,ssse3")]
196unsafe fn format_ss_sss_double_dabble(buffer: *mut u8, second: u16, milli_hi: u16, milli_lo: u16) {
197 let mut res = simd_double_dabble(&[milli_hi, milli_lo, second, 0, 0, 0, 0, 0]);
198
199 res = _mm_shuffle_epi8(res, _mm_setr_epi8(-1, 4, 5, -1, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1));
200 res = _mm_add_epi8(res, _mm_loadu_si128(PATTERN_COMPLETE.as_ptr().add(16) as *const __m128i));
201
202 _mm_storel_epi64(buffer as *mut __m128i, res);
204}
205
206#[inline]
207#[cfg(target_arch = "x86_64")]
208#[target_feature(enable = "sse2,ssse3")]
209#[doc(hidden)] pub unsafe fn format_simd_dd_to_slice(
211 slice: &mut [u8],
212 year: u32,
213 month: u32,
214 day: u32,
215 hour: u32,
216 minute: u32,
217 second: u32,
218 millisecond: u32,
219) {
220 let slice = &mut slice[0..24];
223
224 format_yyyymmddhhmm_double_dabble(
225 slice.as_mut_ptr().add(0),
226 (year / 100) as u16,
227 (year % 100) as u16,
228 month as u16,
229 day as u16,
230 hour as u16,
231 minute as u16,
232 );
233 format_ss_sss_double_dabble(
234 slice.as_mut_ptr().add(16),
235 second as u16,
236 (millisecond / 100) as u16,
237 (millisecond % 100) as u16,
238 );
239
240 }
242
243#[inline]
244#[doc(hidden)] pub fn format_scalar_to_slice(
246 slice: &mut [u8],
247 year: u32,
248 month: u32,
249 day: u32,
250 hour: u32,
251 minute: u32,
252 second: u32,
253 millisecond: u32,
254) {
255 let slice = &mut slice[0..24];
257
258 slice[0] = (b'0' + ((year / 1000) as u8));
259 slice[1] = (b'0' + ((year / 100 % 10) as u8));
260 slice[2] = (b'0' + ((year / 10 % 10) as u8));
261 slice[3] = (b'0' + ((year % 10) as u8));
262
263 slice[4] = b'-';
264
265 slice[5] = (b'0' + ((month / 10) as u8));
266 slice[6] = (b'0' + ((month % 10) as u8));
267
268 slice[7] = b'-';
269
270 slice[8] = (b'0' + ((day / 10) as u8));
271 slice[9] = (b'0' + ((day % 10) as u8));
272
273 slice[10] = b'T';
274
275 slice[11] = (b'0' + ((hour / 10) as u8));
276 slice[12] = (b'0' + ((hour % 10) as u8));
277
278 slice[13] = b':';
279
280 slice[14] = (b'0' + ((minute / 10) as u8));
281 slice[15] = (b'0' + ((minute % 10) as u8));
282
283 slice[16] = b':';
284
285 slice[17] = (b'0' + ((second / 10) as u8));
286 slice[18] = (b'0' + ((second % 10) as u8));
287
288 slice[19] = b'.';
289
290 slice[20] = (b'0' + ((millisecond / 100 % 10) as u8));
291 slice[21] = (b'0' + ((millisecond / 10 % 10) as u8));
292 slice[22] = (b'0' + ((millisecond % 10) as u8));
293
294 slice[23] = b'Z';
295
296 }
298
299pub fn format_to_rfc3339_utc_bytes(
300 year: u32,
301 month: u32,
302 day: u32,
303 hour: u32,
304 minute: u32,
305 second: u32,
306 millisecond: u32,
307) -> [u8; 24] {
308 let mut buffer = [0_u8; 24];
309 #[cfg(all(not(miri), target_feature = "sse4.1"))]
310 unsafe {
311 format_simd_mul_to_slice(&mut buffer, year, month, day, hour, minute, second, millisecond);
312 }
313 #[cfg(not(all(not(miri), target_feature = "sse4.1")))]
314 {
315 format_scalar_to_slice(&mut buffer, year, month, day, hour, minute, second, millisecond);
316 }
317 buffer
318}
319
320#[cfg(test)]
321type FormatToSlice = unsafe fn(&mut [u8], u32, u32, u32, u32, u32, u32, u32);
322
323#[cfg(test)]
324fn assert_format(
325 expected: &str,
326 year: u32,
327 month: u32,
328 day: u32,
329 hour: u32,
330 minute: u32,
331 second: u32,
332 millisecond: u32,
333 f: FormatToSlice,
334) {
335 let mut buffer = vec![0; 24];
336
337 unsafe {
338 f(buffer.as_mut_slice(), year, month, day, hour, minute, second, millisecond);
339 }
340
341 let actual = String::from_utf8(buffer).unwrap();
342
343 assert_eq!(expected, &actual);
344}
345
346#[cfg(test)]
347mod scalar_tests {
348 use crate::format::assert_format;
349 use crate::format_scalar_to_slice;
350
351 #[test]
352 fn test_format_scalar() {
353 assert_format(
354 "2021-09-10T23:45:31.987Z",
355 2021,
356 9,
357 10,
358 23,
359 45,
360 31,
361 987,
362 format_scalar_to_slice,
363 );
364 assert_format("2021-01-01T00:00:00.000Z", 2021, 1, 1, 0, 0, 0, 0, format_scalar_to_slice);
365 assert_format(
366 "2021-12-31T23:59:60.999Z",
367 2021,
368 12,
369 31,
370 23,
371 59,
372 60,
373 999,
374 format_scalar_to_slice,
375 );
376 }
377}
378
379#[cfg(test)]
380#[cfg(all(
381 not(miri),
382 target_arch = "x86_64",
383 target_feature = "sse2",
384 target_feature = "ssse3",
385 target_feature = "sse4.1"
386))]
387mod simd_tests {
388 use crate::format::assert_format;
389 use crate::{format_simd_dd_to_slice, format_simd_mul_to_slice};
390
391 #[test]
392 fn test_format_simd_dd() {
393 assert_format(
394 "2021-09-10T23:45:31.987Z",
395 2021,
396 09,
397 10,
398 23,
399 45,
400 31,
401 987,
402 format_simd_dd_to_slice,
403 );
404 assert_format("2021-01-01T00:00:00.000Z", 2021, 1, 1, 0, 0, 0, 0, format_simd_dd_to_slice);
405 assert_format(
406 "2021-12-31T23:59:60.999Z",
407 2021,
408 12,
409 31,
410 23,
411 59,
412 60,
413 999,
414 format_simd_dd_to_slice,
415 );
416 }
417
418 #[test]
419 fn test_format_simd_mul() {
420 assert_format(
421 "2021-09-10T23:45:31.987Z",
422 2021,
423 09,
424 10,
425 23,
426 45,
427 31,
428 987,
429 format_simd_mul_to_slice,
430 );
431 assert_format("2021-01-01T00:00:00.000Z", 2021, 1, 1, 0, 0, 0, 0, format_simd_mul_to_slice);
432 assert_format(
433 "2021-12-31T23:59:60.999Z",
434 2021,
435 12,
436 31,
437 23,
438 59,
439 60,
440 999,
441 format_simd_mul_to_slice,
442 );
443 }
444}