packedtime_rs/
format.rs

1#[cfg(target_arch = "x86_64")]
2use std::arch::x86_64::*;
3
4const PATTERN_COMPLETE: &str = "0000-00-00T00:00:00.000Z00:00:00";
5const PATTERN_AFTER_YEAR: &str = "-00-00T00:00:00.";
6
7#[allow(clippy::assertions_on_constants)]
8const _: () = {
9    assert!(PATTERN_COMPLETE.len() == 32);
10    assert!(PATTERN_AFTER_YEAR.len() == 16);
11};
12
13#[inline]
14#[cfg(target_arch = "x86_64")]
15#[target_feature(enable = "sse2,ssse3,sse4.1")]
16#[doc(hidden)] // used in benchmarks
17pub unsafe fn format_simd_mul_to_slice(
18    slice: &mut [u8],
19    year: u32,
20    month: u32,
21    day: u32,
22    hour: u32,
23    minute: u32,
24    second: u32,
25    millisecond: u32,
26) {
27    //unsafe { asm!("#LLVM-MCA-BEGIN format_simd_mul") };
28
29    let slice = &mut slice[0..24];
30    let year = year as i16;
31    let month = month as i16;
32    let day = day as i16;
33    let hour = hour as i16;
34    let minute = minute as i16;
35    let second = second as i16;
36    let millisecond = millisecond as i16;
37
38    let input = _mm_setr_epi16(millisecond / 10, second, minute, hour, day, month, year % 100, year / 100);
39
40    // divide by 10 by reciprocal multiplication
41    let tens = _mm_mulhi_epu16(input, _mm_set1_epi16(52429_u16 as i16));
42    let tens = _mm_srli_epi16(tens, 3);
43
44    // remainder of division by 10
45    let tens_times10 = _mm_mullo_epi16(tens, _mm_set1_epi16(10));
46    let ones = _mm_sub_epi16(input, tens_times10);
47
48    // merge into bytes
49    let fmt = _mm_or_si128(_mm_slli_epi16(tens, 8), ones);
50
51    // broadcast to allow room for separators and lanewise shuffle
52    let fmt_lo = _mm_shuffle_epi8(fmt, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, -1, 2, 3, -1));
53    let fmt_hi = _mm_shuffle_epi8(fmt, _mm_set_epi8(4, 5, -1, 6, 7, -1, 8, 9, -1, 10, 11, -1, 12, 13, 14, 15));
54
55    // insert hundreds of milliseconds now that we have room
56    // this is the only instruction in this method that requires sse4.1
57    let fmt_lo = _mm_insert_epi8(fmt_lo, (millisecond % 10) as i32, 6);
58
59    // add '0' and separator ascii values
60    // let pattern = _mm256_loadu_si256(PATTERN_COMPLETE.as_ptr() as *const __m256i);
61    // let pattern_lo = _mm256_extractf128_si256(pattern, 1);
62    // let pattern_hi = _mm256_extractf128_si256(pattern, 0);
63    let pattern_lo = _mm_loadu_si128(PATTERN_COMPLETE.as_ptr().add(16) as *const _);
64    let pattern_hi = _mm_loadu_si128(PATTERN_COMPLETE.as_ptr().add(0) as *const _);
65    let fmt_lo = _mm_or_si128(fmt_lo, pattern_lo);
66    let fmt_hi = _mm_or_si128(fmt_hi, pattern_hi);
67
68    _mm_storeu_si128(slice.as_mut_ptr() as *mut __m128i, fmt_hi);
69    _mm_storel_epi64(slice.as_mut_ptr().offset(16) as *mut __m128i, fmt_lo);
70
71    //slice[22] = ('0' as u8 + ((millisecond % 10) as u8));
72    //unsafe { asm!("#LLVM-MCA-END format_simd_mul") };
73}
74
75#[inline]
76#[cfg(target_arch = "x86_64")]
77#[target_feature(enable = "sse2,ssse3")]
78unsafe fn simd_double_dabble(numbers: &[u16; 8]) -> std::arch::x86_64::__m128i {
79    let mut res = _mm_loadu_si128(numbers.as_ptr() as *const _);
80
81    // increment bcd digits which are > 4 by 3
82    let lookup_lo = _mm_setr_epi8(0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
83    // let lookup_hi = _mm_setr_epi8(0, 0, 0, 0, 0, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48);
84    let lookup_hi = _mm_slli_epi16(lookup_lo, 4);
85
86    let mask_bcd_lo = _mm_set1_epi16(0x0F00_u16 as i16);
87    let mask_bcd_hi = _mm_set1_epi16(0xF000_u16 as i16);
88
89    let mask_bcd = _mm_or_si128(mask_bcd_lo, mask_bcd_hi);
90
91    res = _mm_slli_epi16(res, 3 + 8 - 7);
92    for _i in 3..7 {
93        let bcd_lo = res;
94        let bcd_hi = _mm_srli_epi16(res, 4);
95
96        let inc_lo = _mm_shuffle_epi8(lookup_lo, bcd_lo);
97        let inc_hi = _mm_shuffle_epi8(lookup_hi, bcd_hi);
98
99        let inc = _mm_and_si128(_mm_or_si128(inc_lo, inc_hi), mask_bcd);
100
101        res = _mm_add_epi16(res, inc);
102        res = _mm_slli_epi16(res, 1);
103    }
104
105    // 2 bcd coded digits in hi8 of each 16bit lane
106    let rlo = _mm_srli_epi16(_mm_and_si128(res, mask_bcd_lo), 0);
107    let rhi = _mm_srli_epi16(_mm_and_si128(res, mask_bcd_hi), 12);
108
109    // bcd coded digits in each byte
110    res = _mm_or_si128(rlo, rhi);
111
112    res
113}
114
115#[inline]
116#[cfg(target_arch = "x86_64")]
117#[target_feature(enable = "avx2")]
118unsafe fn simd_double_dabble_256(numbers: &[u16; 16]) -> __m256i {
119    let mut res = _mm256_loadu_si256(numbers.as_ptr() as *const _);
120
121    // increment bcd digits which are > 4 by 3
122    let lookup_lo = _mm_setr_epi8(0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
123    let lookup_lo = _mm256_broadcastsi128_si256(lookup_lo);
124    let lookup_hi = _mm256_slli_epi16(lookup_lo, 4);
125    // let lookup_hi = _mm_setr_epi8(0, 0, 0, 0, 0, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48);
126    // let lookup_hi = _mm256_broadcastsi128_si256(lookup_hi);
127    let mask_bcd_lo = _mm256_set1_epi16(0x0F00_u16 as i16);
128    let mask_bcd_hi = _mm256_set1_epi16(0xF000_u16 as i16);
129    let mask_bcd = _mm256_or_si256(mask_bcd_lo, mask_bcd_hi);
130
131    res = _mm256_slli_epi16(res, 3 + 8 - 7);
132    for _i in 3..7 {
133        let bcd_lo = res;
134        let bcd_hi = _mm256_srli_epi16(res, 4);
135
136        let inc_lo = _mm256_shuffle_epi8(lookup_lo, bcd_lo);
137        let inc_hi = _mm256_shuffle_epi8(lookup_hi, bcd_hi);
138
139        let inc = _mm256_and_si256(_mm256_or_si256(inc_lo, inc_hi), mask_bcd);
140
141        res = _mm256_add_epi16(res, inc);
142        res = _mm256_slli_epi16(res, 1);
143    }
144
145    // 2 bcd coded digits in hi8 of each 16bit lane
146    let rlo = _mm256_srli_epi16(_mm256_and_si256(res, mask_bcd_lo), 0);
147    let rhi = _mm256_srli_epi16(_mm256_and_si256(res, mask_bcd_hi), 12);
148
149    // bcd coded digits in each byte
150    res = _mm256_or_si256(rlo, rhi);
151
152    res
153}
154
155/// formats the timestamp into the output buffer including separator chars, starting with the dash before the month and ending with a dot after the seconds.
156/// Example: -MM-ddThh:mm:ss.
157#[inline]
158#[cfg(target_arch = "x86_64")]
159#[target_feature(enable = "sse2,ssse3")]
160unsafe fn format_mmddhhmmss_double_dabble(buffer: *mut u8, month: u16, day: u16, hour: u16, minute: u16, second: u16) {
161    let mut res = simd_double_dabble(&[0, 0, 0, second, minute, hour, day, month]);
162
163    res = _mm_shuffle_epi8(res, _mm_set_epi8(-1, 9, 8, -1, 7, 6, -1, 5, 4, -1, 3, 2, -1, 1, 0, -1));
164    res = _mm_add_epi8(res, _mm_loadu_si128(PATTERN_AFTER_YEAR.as_ptr() as *const __m128i));
165
166    _mm_storeu_si128(buffer as *mut __m128i, res);
167}
168
169/// formats the timestamp into the output buffer including separator chars, starting with the dash before the month and ending with a dot after the seconds.
170/// Example: YYYY-MM-ddThh:mm:ss.
171#[inline]
172#[cfg(target_arch = "x86_64")]
173#[target_feature(enable = "sse2,ssse3")]
174unsafe fn format_yyyymmddhhmm_double_dabble(
175    buffer: *mut u8,
176    year_hi: u16,
177    year_lo: u16,
178    month: u16,
179    day: u16,
180    hour: u16,
181    minute: u16,
182) {
183    let mut res = simd_double_dabble(&[year_hi, year_lo, month, day, hour, minute, 0, 0]);
184
185    res = _mm_shuffle_epi8(res, _mm_setr_epi8(0, 1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10, 11));
186    res = _mm_add_epi8(res, _mm_loadu_si128(PATTERN_COMPLETE.as_ptr() as *const __m128i));
187
188    _mm_storeu_si128(buffer as *mut __m128i, res);
189}
190
191/// formats the timestamp into the output buffer including separator chars, starting with the dash before the month and ending with a dot after the seconds.
192/// Example: YYYY-MM-ddThh:mm:ss.
193#[inline]
194#[cfg(target_arch = "x86_64")]
195#[target_feature(enable = "sse2,ssse3")]
196unsafe fn format_ss_sss_double_dabble(buffer: *mut u8, second: u16, milli_hi: u16, milli_lo: u16) {
197    let mut res = simd_double_dabble(&[milli_hi, milli_lo, second, 0, 0, 0, 0, 0]);
198
199    res = _mm_shuffle_epi8(res, _mm_setr_epi8(-1, 4, 5, -1, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1));
200    res = _mm_add_epi8(res, _mm_loadu_si128(PATTERN_COMPLETE.as_ptr().add(16) as *const __m128i));
201
202    // (buffer as *mut i64).write(_mm_extract_epi64(res, 0));
203    _mm_storel_epi64(buffer as *mut __m128i, res);
204}
205
206#[inline]
207#[cfg(target_arch = "x86_64")]
208#[target_feature(enable = "sse2,ssse3")]
209#[doc(hidden)] // used in benchmarks
210pub unsafe fn format_simd_dd_to_slice(
211    slice: &mut [u8],
212    year: u32,
213    month: u32,
214    day: u32,
215    hour: u32,
216    minute: u32,
217    second: u32,
218    millisecond: u32,
219) {
220    //unsafe { asm!("#LLVM-MCA-BEGIN format_simd_dd") };
221
222    let slice = &mut slice[0..24];
223
224    format_yyyymmddhhmm_double_dabble(
225        slice.as_mut_ptr().add(0),
226        (year / 100) as u16,
227        (year % 100) as u16,
228        month as u16,
229        day as u16,
230        hour as u16,
231        minute as u16,
232    );
233    format_ss_sss_double_dabble(
234        slice.as_mut_ptr().add(16),
235        second as u16,
236        (millisecond / 100) as u16,
237        (millisecond % 100) as u16,
238    );
239
240    //unsafe { asm!("#LLVM-MCA-END format_simd_dd") };
241}
242
243#[inline]
244#[doc(hidden)] // used in benchmarks
245pub fn format_scalar_to_slice(
246    slice: &mut [u8],
247    year: u32,
248    month: u32,
249    day: u32,
250    hour: u32,
251    minute: u32,
252    second: u32,
253    millisecond: u32,
254) {
255    //unsafe { asm!("#LLVM-MCA-BEGIN format_scalar") };
256    let slice = &mut slice[0..24];
257
258    slice[0] = (b'0' + ((year / 1000) as u8));
259    slice[1] = (b'0' + ((year / 100 % 10) as u8));
260    slice[2] = (b'0' + ((year / 10 % 10) as u8));
261    slice[3] = (b'0' + ((year % 10) as u8));
262
263    slice[4] = b'-';
264
265    slice[5] = (b'0' + ((month / 10) as u8));
266    slice[6] = (b'0' + ((month % 10) as u8));
267
268    slice[7] = b'-';
269
270    slice[8] = (b'0' + ((day / 10) as u8));
271    slice[9] = (b'0' + ((day % 10) as u8));
272
273    slice[10] = b'T';
274
275    slice[11] = (b'0' + ((hour / 10) as u8));
276    slice[12] = (b'0' + ((hour % 10) as u8));
277
278    slice[13] = b':';
279
280    slice[14] = (b'0' + ((minute / 10) as u8));
281    slice[15] = (b'0' + ((minute % 10) as u8));
282
283    slice[16] = b':';
284
285    slice[17] = (b'0' + ((second / 10) as u8));
286    slice[18] = (b'0' + ((second % 10) as u8));
287
288    slice[19] = b'.';
289
290    slice[20] = (b'0' + ((millisecond / 100 % 10) as u8));
291    slice[21] = (b'0' + ((millisecond / 10 % 10) as u8));
292    slice[22] = (b'0' + ((millisecond % 10) as u8));
293
294    slice[23] = b'Z';
295
296    //unsafe { asm!("#LLVM-MCA-END format_scalar") };
297}
298
299pub fn format_to_rfc3339_utc_bytes(
300    year: u32,
301    month: u32,
302    day: u32,
303    hour: u32,
304    minute: u32,
305    second: u32,
306    millisecond: u32,
307) -> [u8; 24] {
308    let mut buffer = [0_u8; 24];
309    #[cfg(all(not(miri), target_feature = "sse4.1"))]
310    unsafe {
311        format_simd_mul_to_slice(&mut buffer, year, month, day, hour, minute, second, millisecond);
312    }
313    #[cfg(not(all(not(miri), target_feature = "sse4.1")))]
314    {
315        format_scalar_to_slice(&mut buffer, year, month, day, hour, minute, second, millisecond);
316    }
317    buffer
318}
319
320#[cfg(test)]
321type FormatToSlice = unsafe fn(&mut [u8], u32, u32, u32, u32, u32, u32, u32);
322
323#[cfg(test)]
324fn assert_format(
325    expected: &str,
326    year: u32,
327    month: u32,
328    day: u32,
329    hour: u32,
330    minute: u32,
331    second: u32,
332    millisecond: u32,
333    f: FormatToSlice,
334) {
335    let mut buffer = vec![0; 24];
336
337    unsafe {
338        f(buffer.as_mut_slice(), year, month, day, hour, minute, second, millisecond);
339    }
340
341    let actual = String::from_utf8(buffer).unwrap();
342
343    assert_eq!(expected, &actual);
344}
345
346#[cfg(test)]
347mod scalar_tests {
348    use crate::format::assert_format;
349    use crate::format_scalar_to_slice;
350
351    #[test]
352    fn test_format_scalar() {
353        assert_format(
354            "2021-09-10T23:45:31.987Z",
355            2021,
356            9,
357            10,
358            23,
359            45,
360            31,
361            987,
362            format_scalar_to_slice,
363        );
364        assert_format("2021-01-01T00:00:00.000Z", 2021, 1, 1, 0, 0, 0, 0, format_scalar_to_slice);
365        assert_format(
366            "2021-12-31T23:59:60.999Z",
367            2021,
368            12,
369            31,
370            23,
371            59,
372            60,
373            999,
374            format_scalar_to_slice,
375        );
376    }
377}
378
379#[cfg(test)]
380#[cfg(all(
381    not(miri),
382    target_arch = "x86_64",
383    target_feature = "sse2",
384    target_feature = "ssse3",
385    target_feature = "sse4.1"
386))]
387mod simd_tests {
388    use crate::format::assert_format;
389    use crate::{format_simd_dd_to_slice, format_simd_mul_to_slice};
390
391    #[test]
392    fn test_format_simd_dd() {
393        assert_format(
394            "2021-09-10T23:45:31.987Z",
395            2021,
396            09,
397            10,
398            23,
399            45,
400            31,
401            987,
402            format_simd_dd_to_slice,
403        );
404        assert_format("2021-01-01T00:00:00.000Z", 2021, 1, 1, 0, 0, 0, 0, format_simd_dd_to_slice);
405        assert_format(
406            "2021-12-31T23:59:60.999Z",
407            2021,
408            12,
409            31,
410            23,
411            59,
412            60,
413            999,
414            format_simd_dd_to_slice,
415        );
416    }
417
418    #[test]
419    fn test_format_simd_mul() {
420        assert_format(
421            "2021-09-10T23:45:31.987Z",
422            2021,
423            09,
424            10,
425            23,
426            45,
427            31,
428            987,
429            format_simd_mul_to_slice,
430        );
431        assert_format("2021-01-01T00:00:00.000Z", 2021, 1, 1, 0, 0, 0, 0, format_simd_mul_to_slice);
432        assert_format(
433            "2021-12-31T23:59:60.999Z",
434            2021,
435            12,
436            31,
437            23,
438            59,
439            60,
440            999,
441            format_simd_mul_to_slice,
442        );
443    }
444}